<a href="https://colab.research.google.com/github/Brevex/Code-Metric-Data-Analisis/blob/main/Code_Metric_Analisis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [76]:
import pandas as pd
import numpy as np
import seaborn as sns

In [77]:
from matplotlib import pyplot as plt

In [None]:
!gdown https://raw.githubusercontent.com/Brevex/Complexity/main/ckclass.csv

In [None]:
df = pd.read_csv('/content/ckclass.csv')
df

In [None]:
df.info()

In [None]:
for analyzedValue in df.columns:
    if df[analyzedValue].isnull().any():

      missingValuesPercentual = (df[analyzedValue].isnull().sum() * 100) / df.shape[0]
      print(f"{analyzedValue}: {missingValuesPercentual:.3f}% of values are null.")

In [None]:
for nullValues in df.columns:
    if df[nullValues].isnull().any():

        print(nullValues + ":", df[nullValues].isnull().sum())

In [83]:
df.dropna(axis = 0 , inplace = True)

In [None]:
df

In [87]:
# Maintainability Score calculation using apply
df['Maintainability Score'] = df[['loopQty', 'comparisonsQty', 'numbersQty', 'variablesQty']].apply(sum, axis = 1)

# Calculation of Fan-in and Fan-out Complexity using lambda and apply
df['Fan-in and Fan-out Complexity'] = df.apply(lambda row: row['wmc'] * (row['fanin'] * row['fanout']) ** 2, axis=1)

# Categorization using cut for Risk Category and Fan-in and Fan-out Category
df['Risk by Cyclomatic Complexity'] = pd.cut(df['wmc'], bins = [-float('inf'), 10, 20, 50, float('inf')], labels = False)
df['Risk by Cyclomatic Complexity'] = pd.Categorical(df['Risk by Cyclomatic Complexity'], categories = [0, 1, 2, 3], ordered = True)

df['Fan-in and Fan-out Complexity'] = pd.cut(df['Fan-in and Fan-out Complexity'], bins = [-float('inf'), 100, 1000, float('inf')], labels = False)
df['Fan-in and Fan-out Complexity'] = pd.Categorical(df['Fan-in and Fan-out Complexity'], categories = [0, 1, 2], ordered = True)

# Add 'Maintainability Score' column before categorizing
df['Maintainability Score'] = pd.cut(df['Maintainability Score'], bins = [-float('inf'), 65, 85, float('inf')], labels = False)
df['Maintainability Score'] = pd.Categorical(df['Maintainability Score'], categories = [0, 1, 2], ordered = True)

complexity_categories = {

    'Risk by Cyclomatic Complexity': ['Low', 'Moderate', 'High', 'Very High'],
    'Fan-in and Fan-out Complexity': ['Good', 'Moderate', 'High'],
    'Maintainability Score': ['Good', 'Moderate', 'Bad']
}

# Loop to categorize DataFrame columns
for category, labels in complexity_categories.items():
    df[category] = df[category].map({i: label for i, label in enumerate(labels)})

# Selection of relevant columns for the final table
complexity_analysis_table = df[['file', 'class', 'Risk by Cyclomatic Complexity', 'Fan-in and Fan-out Complexity', 'Maintainability Score']]

# Export to CSV
complexity_analysis_table.to_csv('Complexity_Analysis_Table.csv', index = False)

In [None]:
complexidade_ciclomatica = pd.read_csv('/content/Complexity_Analysis_Table.csv')
complexidade_ciclomatica

In [None]:
# Calculate 'wmc' statistics
total_wmc = df['wmc'].sum()
min_wmc = df['wmc'].min()
max_wmc = df['wmc'].max()
mean_wmc = df['wmc'].mean()

# Create a chart
fig, ax = plt.subplots()

# Add bars for total, min, max and average of 'wmc'
bar1 = ax.bar('Total CC', total_wmc, label='Total CC', color='blue')
bar2 = ax.bar('Min CC', min_wmc, label='Min CC', color='green')
bar3 = ax.bar('Max CC', max_wmc, label='Max CC', color='red')
bar4 = ax.bar('Average CC', mean_wmc, label='Average CC', color='orange')

# Add exact values to bars
def add_values_bars(bars):
    for bar in bars:
        yval = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2, yval, round(yval, 2), ha='center', va='bottom')

add_values_bars(bar1)
add_values_bars(bar2)
add_values_bars(bar3)
add_values_bars(bar4)

# Add labels and title
ax.set_ylabel('Cyclomatic Complexity Value')
ax.set_title('Cyclomatic Complexity Statistics')
ax.legend()

# Show the graph
plt.show()