In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas_datareader import data
#matplotlib inline

from pymongo import MongoClient
import bson
from bson import json_util
import pandas as pd
import datetime
import time
from datetime import datetime as dt
import json
import re
import os

import plotly.io as pio
import plotly.express as px
import plotly.express as px

import plotly.figure_factory as ff


In [5]:
# Conexión a la base de datos
client = MongoClient('mongodb://soft-writer:ewl42je2@195.176.181.158:27017/')
db = client['api_ace_db']
# Selección de la colección
collection = db['commits']
# Crear una lista vacía para almacenar los datos
data = []
# Obtener los documentos
documents = collection.find()

In [6]:
pipeline = [
    {
        '$match': {
            'schemaSize': {'$exists': True}
        }
    },
    {
        '$match': {
            "api.components.securitySchemes": {'$exists': True}
        }
    },
    {
        '$addFields': {
            "security_size": {
                '$function': {
                    'body': '''
                    function(obj) {
                        function getObjectSize(obj) {
                            let size = 0;
                            for (let key in obj) {
                                size++;
                                if (typeof obj[key] === 'object' && obj[key] !== null) {
                                    size += getObjectSize(obj[key]);
                                }
                            }
                            return size;
                        }
                        return getObjectSize(obj);
                    }
                    ''',
                    'args': ["$api.components.securitySchemes"],
                    'lang': "js"
                }
            },
            "auth_values": {
                "$map": {
                    "input": { "$objectToArray": "$api.components.securitySchemes" },
                    "as": "auth",
                    "in": "$$auth.v.type"
                }
            }
        }
    },
    {
        '$project': {
            'apiSpec_id': '$api_spec_id',
            'timeStamp': '$commit_date',
            'paths_size': '$structureSize.paths',
            'schemas_size': '$schemaSize.schemas',
            'auth': {
                '$arrayElemAt': [
                    { '$objectToArray': '$api.components.securitySchemes' },
                    0
                ]
            },
            'security_size': 1,
            'auth_values': 1
        }
    }
]

results = list(collection.aggregate(pipeline))



In [20]:
df = pd.DataFrame(results)
print(df.columns)
print(df)

Index(['_id', 'security_size', 'auth_values', 'apiSpec_id', 'timeStamp',
       'paths_size', 'schemas_size', 'auth'],
      dtype='object')
                            _id  security_size auth_values  apiSpec_id   
0      6454b8685b78352ab727e656           11.0    [oauth2]         697  \
1      6454b8685b78352ab727e658           11.0    [oauth2]         697   
2      6454b8685b78352ab727e65a           11.0    [oauth2]         697   
3      6454b8685b78352ab727e65c           11.0    [oauth2]         697   
4      6454b8685b78352ab727e65e           11.0    [oauth2]         697   
...                         ...            ...         ...         ...   
53790  646b8278e8e9df061ad7ef1c            5.0    [apiKey]      272543   
53791  646b8279e8e9df061ad7ef1e            5.0    [apiKey]      272543   
53792  646b8279e8e9df061ad7ef20            5.0    [apiKey]      272543   
53793  646b827ae8e9df061ad7ef23            5.0    [apiKey]      272543   
53794  646b827ae8e9df061ad7ef25            5.

In [25]:
import plotly.graph_objects as go
import plotly.colors as pc

fig = go.Figure()

# Obtener la lista de API y generar una escala de colores para ellas
apis = df['apiSpec_id'].unique()
num_apis = len(apis)
color_scale = pc.sample_colorscale('Rainbow', num_apis)

# Definir la función get_value
# Función auxiliar para obtener el valor de 'auth' del documento
#def get_auth_value(row):
 #   if row['auth'] is not None:
  #      if isinstance(row['auth'], float):
   #         return str(row['auth'])
    #    else:
     #       return row['auth']['k'] if 'k' in row['auth'] else 'No asignado'
    #else:
     #   return 'No asignado'


# Agregar los puntos a la gráfica con colores diferentes para cada API
for i, api in enumerate(apis):
    data = df[df['apiSpec_id'] == api]
    fig.add_trace(go.Scatter(
        x=data['timeStamp'],
        y=data['schemas_size'],
        mode='markers',
        text=data.apply(lambda row: f"API: {row['apiSpec_id']}\n"
                                    f"TimeStamp: {row['timeStamp']}\n"
                                    f"Schema: {int(row['schemas_size'])}\n"
                                    f"Auth: {row['auth_values']}",
                        axis=1),
        hovertemplate='%{text}',
        marker=dict(color=color_scale[i]),
    ))

fig.update_layout(
    title='Gráfico de API',
    xaxis=dict(title='TimeStamp'),
    yaxis=dict(title='Schemas'),
    hovermode='closest',
    height=1000,
    hoverlabel=dict(
        bgcolor='black',
        font=dict(color='white'),
        align='left',
        bordercolor='black',
        namelength=-1
    )
)

# Guardar la figura en un archivo HTML y abrirlo en el navegador
fig.write_html('API_and_security.html', full_html=True)

In [22]:
import pandas as pd

# Convertir los valores del diccionario en una cadena de texto
df['auth_str'] = df['auth_values'].apply(str)

# Agrupar por 'apiSpec_id' y 'auth_str' y contar la frecuencia
auth_counts = df.groupby(['apiSpec_id', 'auth_str']).size().reset_index(name='count')

# Mostrar la tabla
print(auth_counts)

      apiSpec_id                      auth_str  count
0            521                      ['http']     40
1            697                    ['oauth2']     17
2            803                      ['http']     21
3            838  ['apiKey', 'apiKey', 'http']     10
4            838            ['apiKey', 'http']      4
...          ...                           ...    ...
1907      626516                    ['apiKey']     16
1908      628693          ['apiKey', 'apiKey']     14
1909      632089                      ['http']     30
1910      634334            ['apiKey', 'http']     20
1911      635583                      ['http']     10

[1912 rows x 3 columns]


In [23]:
import plotly.graph_objects as go

# Crear el gráfico de barras
fig = go.Figure(data=[go.Bar(x=auth_counts['auth_str'], y=auth_counts['count'])])

# Personalizar el gráfico
fig.update_layout(
    title='Frequency type of security',
    xaxis=dict(title='auth_values'),
    yaxis=dict(title='Frecuencia'),
)

# Guardar el gráfico en un archivo HTML
fig.write_html('Frequency_security.html')

