In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas_datareader import data

#matplotlib inline
from pymongo import MongoClient
import bson
from bson import json_util

import pandas as pd
import datetime
import time

from datetime import datetime as dt
import json
import re
import os
import plotly.io as pio

import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go

import plotly.colors as pc
from plotly.subplots import make_subplots
import colorlover as cl


from collections import Counter



In [13]:
# Connection to the database
client = MongoClient('mongodb://soft-writer:ewl42je2@195.176.181.158:27017/')
db = client['api_ace_db']
# Collection selection
collection = db['commits']
# Create an empty list to store the data
data = []
# Obtain documents
documents = collection.find()

Extraction of all types of security schemes divided into two variables auth_values_1 for openAPI30 and auth_values_2 for Swagger20 :

In [14]:
pipeline = [
    {
        '$match': {
            'schemaSize': {'$exists': True}
        }
    },
    {
        '$addFields': {
           
            "auth_values_1": {
                "$map": {
                    "input": { "$objectToArray": "$api.components.securitySchemes" },
                    "as": "auth",
                    "in": "$$auth.v.type"
                }
            },
            "auth_values_2": {
                "$map": {
                    "input": { "$objectToArray": "$api.securityDefinitions" },
                    "as": "auth",
                    "in": "$$auth.v.type"
                }
            }
        }
    },
    
    {
        '$project': {
            'apiSpec_id': '$api_spec_id',
            'timeStamp': '$commit_date',
            'paths_size': '$structureSize.paths',
            'schemas_size': '$schemaSize.schemas',
            'security_size': 1,
            'auth_values_1': 1,
            'auth_values_2': 1,
        }
    }
]

results = list(collection.aggregate(pipeline))
df = pd.DataFrame(results)

In [15]:
# Convert values to a text string
df['auth_str'] = df['auth_values_2'].apply(str)

# Group by 'apiSpec_id' and 'auth_str' and count frequency
auth_counts = df.groupby(['apiSpec_id', 'auth_str']).size().reset_index(name='count')

# Create the bar chart
fig = go.Figure(
    data=[
        go.Bar(x=auth_counts['auth_str'], 
               y=auth_counts['count'], 
               marker_color=auth_counts['apiSpec_id'])])

# List for storing the customized texts of each bar
hover_texts = []

for idx, row in auth_counts.iterrows():
    # Create the text for the mouse text box for each bar
    hover_text = f"API: {row['apiSpec_id']}<br>" \
                 f"Type: {row['auth_str']}<br>" \
                 f"Times Combination found: {row['count']}"
        
    hover_texts.append(hover_text)

 # Assign custom texts to each bar
fig.update_traces(hovertext=hover_texts, hoverinfo='text+y') 

# Customize the chart
fig.update_layout(
    title='Frequency type of security schema for Swagger 2.0',
    xaxis=dict(title='Combined security schemas'),
    yaxis=dict(title='Frecuencia'),
)

# Save the graphic in an HTML file
fig.write_html('Frequency_security_OpenAPI30.html', full_html=True)




In [16]:


fig = go.Figure()

apis = df['apiSpec_id'].unique()
num_apis = len(apis)
color_scale = pc.sample_colorscale('Rainbow', num_apis)


traces = []
for i, api in enumerate(apis):
    data = df[df['apiSpec_id'] == api]
    count = len(data)  # Count the points plotted for each 'apiSpec_id'.

    count_with_security = len(data[
        (data['auth_values_1'].notnull() & (data['auth_values_1'].str.len() > 0)) |
        (data['auth_values_2'].notnull() & (data['auth_values_2'].str.len() > 0))
    ])

    # List for storing customized texts for each point (timeStamp)
    hover_texts = []
    
    for idx, row in data.iterrows():
        # Verificar si el punto (timeStamp) tiene algún componente de seguridad definido
        if (row['auth_values_1'] and len(row['auth_values_1']) > 0) or (row['auth_values_2'] and len(row['auth_values_2']) > 0):
            auth_values_1 = row['auth_values_1']
            auth_values_2 = row['auth_values_2']
            type_value = 'OpenAPI 3.0' if auth_values_1 else 'Swagger 2.0' if auth_values_2 else 'Not define'
            count_with_security_solo = 1
        else:
            auth_values_1 = 'Not define'
            auth_values_2 = 'Not define'
            type_value = 'Not define'
            count_with_security_solo = 0

        # Create the text for the mouse text box of each point (timeStamp)
        hover_text = f"API: {row['apiSpec_id']}<br>" \
                     f"TimeStamp: {row['timeStamp']}<br>" \
                     f"Schema: {int(row['schemas_size'])}<br>" \
                     f"Security: {auth_values_1} - {auth_values_2}<br>" \
                     f"Type: {type_value}<br>" \
                     f"Security Commits: {count_with_security_solo}<br>" \
                     f"Security Commits Total: {count_with_security}<br>" \
                     f"Commits: {count}"
        
        hover_texts.append(hover_text)

    trace = go.Scatter(
        x=data['timeStamp'],
        y=data['schemas_size'],
        mode='markers',
        text=hover_texts,
        hovertemplate='%{text}',
        marker=dict(color=color_scale[i], size=6, line=dict(width=1, color='black')),  # Add the black line to each point
        name=str(api)
    )
    traces.append(trace)
    fig.add_trace(trace)

fig.update_layout(
    title='APIs',
    xaxis=dict(title='TimeStamp'),
    yaxis=dict(title='Schemas'),
    hovermode='closest',
    height=800,
    hoverlabel=dict(
        bgcolor='black',
        font=dict(color='white'),
        align='left',
        bordercolor='black',
        namelength=-1
    )
)

fig.write_html('API_and_security_points.html', full_html=True)


In [123]:


fig = go.Figure()

# Extract individual security types from 'auth_values'
security_types = [security for sublist in df['auth_values_1'].tolist() if isinstance(sublist, list) for security in sublist]
security_count = Counter(security_types)

# Obtain the APIs corresponding to each type of security
api_by_security = {}
for security_type, count in security_count.items():
    filtered_df = df[df['auth_values_1'].apply(lambda x: security_type in x if isinstance(x, list) else False)]
    apis = filtered_df['apiSpec_id'].unique()
    api_by_security[security_type] = apis

# Calculate the percentage of APIs for each type of security
total_apis = sum(len(api_list) for api_list in api_by_security.values())
percentages = [(len(api_list) / total_apis) * 100 for api_list in api_by_security.values()]

# Create the pie chart to show the APIs by type of security
pie_chart = go.Pie(
    labels=list(api_by_security.keys()),
    values=percentages,
    hovertemplate='%{label}: %{value:.2f}% APIs',
    marker=dict(colors=pc.sample_colorscale('Viridis', len(api_by_security)))
)

fig.add_trace(pie_chart)

fig.update_layout(
    title='OpenAPI 3.0: components/securitySchemes',
    height=800,
    hoverlabel=dict(
        bgcolor='black',
        font=dict(color='white'),
        align='left',
        bordercolor='black',
        namelength=-1
    )
)


fig.write_html('APIsSecurityCircle.html', full_html=True)


In [125]:


fig = go.Figure()


security_types = [security for sublist in df['auth_values_2'].tolist() if isinstance(sublist, list) for security in sublist]
security_count = Counter(security_types)


api_by_security = {}
for security_type, count in security_count.items():
    filtered_df = df[df['auth_values_2'].apply(lambda x: security_type in x if isinstance(x, list) else False)]
    apis = filtered_df['apiSpec_id'].unique()
    api_by_security[security_type] = (apis, count)


total_apis = sum(len(api_list) for api_list, _ in api_by_security.values())
percentages = [(len(api_list) / total_apis) * 100 for api_list, _ in api_by_security.values()]


text_labels = [f"{security_type}<br>{len(api_list)} APIs" for security_type, (api_list, _) in api_by_security.items()]


pie_chart = go.Pie(
    labels=list(api_by_security.keys()),
    values=percentages,
    text=text_labels,
    hovertemplate='%{label}: %{value:.2f}% APIs<br>Total APIs: %{text}',
    marker=dict(colors=pc.sample_colorscale('Viridis', len(api_by_security)))
)

fig.add_trace(pie_chart)

fig.update_layout(
    title='Swagger 2.0: securityDefinitions',
    height=800,
    hoverlabel=dict(
        bgcolor='black',
        font=dict(color='white'),
        align='left',
        bordercolor='black',
        namelength=-1
    )
)


fig.write_html('APIsSecurityCircle_sw.html', full_html=True)


In [11]:

fig = go.Figure()


security_types = [security for sublist in df['auth_values_1'].tolist() if isinstance(sublist, list) for security in sublist]
security_count = Counter(security_types)


points_by_security = {}
for security_type, count in security_count.items():
    filtered_df = df[df['auth_values_1'].apply(lambda x: security_type in x if isinstance(x, list) else False)]
    points = len(filtered_df)
    points_by_security[security_type] = points

# Create the bar chart to show the points plotted by type of security
bar_chart = go.Bar(
    x=list(points_by_security.keys()),
    y=list(points_by_security.values()),
    marker=dict(color=pc.sample_colorscale('Viridis', len(points_by_security))),
    hovertemplate='%{y} Security Commits',
)

fig.add_trace(bar_chart)

fig.update_layout(
    title='Security by commits',
    xaxis=dict(title='Security Type'),
    yaxis=dict(title='Commits'),
    hovermode='closest',
    height=600,
    hoverlabel=dict(
        bgcolor='black',
        font=dict(color='white'),
        align='left',
        bordercolor='black',
        namelength=-1
    )
)

fig.write_html('Commits_by_Security_Type.html', full_html=True)



In [20]:


df = pd.DataFrame(results)

# Exclude null values in 'auth_values_1'.
df = df[~df['auth_values_1'].isnull()]

# Create a list of unique security types
security_types = df['auth_values_1'].explode().unique()

colors = cl.scales['8']['qual']['Set1']

# Create a list of graphs for each type of security
data = []
for security_type in security_types:
    # Filter records by security schemes
    filtered_df = df[df['auth_values_1'].apply(lambda x: security_type in x)]
    
    # Calculate the frequency of use of the security schemes per API and time
    count_by_date = filtered_df.groupby('timeStamp')['apiSpec_id'].nunique().reset_index()
    
    # Obtain dates and frequencies
    dates = count_by_date['timeStamp']
    frequencies = count_by_date['apiSpec_id']
    
    # Create the dot plot
    #scatter = go.Scatter(
     #   x=dates,
      #  y=frequencies,
       # mode='markers',
        #name=security_type
    #)
    
    # lines
    #line = go.Scatter(
     #   x = dates,
      #  y = frequencies,
       # mode = 'lines+markers',
        #line=dict(dash='dash'),
        #marker=dict(symbol='circle', size=6)
    #)


   # Create the density graph
    #density = go.Scatter(
     #   x = dates,
      #  y = frequencies,
      #  name=security_type,
      #  mode='lines',
      #  fill='tozeroy',
      #  line=dict(shape='spline'),
      #  marker=dict(symbol='circle')
    #)

    #scatter = go.Scatter(
    #    x=dates,
    #    y=frequencies,
    #    mode='markers',
    #    name=security_type,
    #    marker=dict(size=12),
    #    line=dict(width=2, color='DarkSlateGrey'),
    #    selector=dict(mode='markers')
    #)

    scatter = go.Scatter(
        x=dates,
        y=frequencies,
        mode='markers',
        name=security_type,
        marker=dict(size=12, color=colors[i % len(colors)]),
        line=dict(width=2, color='DarkSlateGrey'),
        selected=dict(marker=dict(size=12)),
    )

    # Add the graph to the data list
    data.append(scatter)

# Set up the figure design
layout = go.Layout(
    title='Frequency of use of security types of OpenAPI 3.0 by API over time',
    xaxis=dict(title='timeStamp'),
    yaxis=dict(title='Frequency of use in APIS'),
    hovermode='closest'
)

# figure
fig = go.Figure(data=data, layout=layout)


fig.write_html('grafica.html', full_html=True)

In [21]:


df = pd.DataFrame(results)

df = df[~df['auth_values_2'].isnull()]

security_types = df['auth_values_2'].explode().unique()

colors = cl.scales['8']['qual']['Set1'][:len(security_types)]

data = []
for i, security_type in enumerate(security_types):
    # Filter records by security type
    filtered_df = df[df['auth_values_2'].apply(lambda x: security_type in x)]
    
    # Calculate the frequency of use of the security tyoe by API and time
    count_by_date = filtered_df.groupby('timeStamp')['apiSpec_id'].nunique().reset_index()
    
    # Obtain dates and frequencies
    dates = count_by_date['timeStamp']
    frequencies = count_by_date['apiSpec_id']
    
    # Create the distributed dot plot
    scatter = go.Scatter(
        x=dates,
        y=frequencies,
        mode='markers',
        name=security_type,
        marker=dict(size=12, color=colors[i], line=dict(color='black', width=1)),
        line=dict(width=2, color='DarkSlateGrey'),
        selected=dict(marker=dict(size=12)),
    )

    # Add the chart to the data list
    data.append(scatter)

# Configure the figure design
layout = go.Layout(
    title='Frequency of use of security schemes of Swagger 2.0 by API over time',
    xaxis=dict(title='timeStamp'),
    yaxis=dict(title='Frequency of use in APIS'),
    hovermode='closest'
)

# Create the figure
fig = go.Figure(data=data, layout=layout)


fig.write_html('Frequency of use of security schemes.html', full_html=True)


In [26]:



df = pd.DataFrame(results)


df = df[~df['auth_values_2'].isnull()]

security_counts = df['auth_values_2'].explode().value_counts()

security_types = security_counts.index.sort_values()

fig = make_subplots(rows=len(security_types), cols=1, shared_xaxes=True, subplot_titles=security_types[::-1])

for i, security_type in enumerate(security_types):
    
    filtered_df = df[df['auth_values_2'].apply(lambda x: security_type in x)]
    
    count_by_date = filtered_df.groupby('timeStamp')['apiSpec_id'].nunique().reset_index()
    
    dates = count_by_date['timeStamp']
    frequencies = count_by_date['apiSpec_id']
    
    fig.add_trace(go.Scatter(
        x=dates,
        y=frequencies,
        mode='markers',
        marker=dict(
            size=6,
            line=dict(
                color='black',
                width=1
            )
        ),
        name=security_type
    ), row=len(security_types)-i, col=1)

    fig.update_xaxes(title_text='timeStamp', row=len(security_types)-i, col=1)
    fig.update_yaxes(title_text='Frequency of use in APIS', row=len(security_types)-i, col=1)

fig.update_layout(
    title='Frequency of use of security types of Swagger 2.0 by API over time',
    hovermode='closest',
    xaxis=dict(
        title=dict(
            text='timeStamp',
            standoff=10,
            font=dict(size=12),
        ),
    ),
    yaxis=dict(
        title=dict(
            text='Frequency of use in APIS',
            standoff=10,
            font=dict(size=12),
        ),
    ),
)


fig.write_html('grafica_subplot.html', full_html=True)
