<a href="https://colab.research.google.com/github/Brevex/Code-Complexity-Data-Analisis/blob/main/Code_Metric_Analisis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas matplotlib

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
!gdown https://raw.githubusercontent.com/Brevex/Complexity/main/ckclass.csv

In [None]:
df = pd.read_csv('/content/ckclass.csv')
df

In [None]:
df.info()

In [None]:
for analyzedValue in df.columns:
    if df[analyzedValue].isnull().any():

      missingValuesPercentual = (df[analyzedValue].isnull().sum() * 100) / df.shape[0]
      print(f"{analyzedValue}: {missingValuesPercentual:.3f}% of values are null.")

In [None]:
for nullValues in df.columns:
    if df[nullValues].isnull().any():

        print(nullValues + ":", df[nullValues].isnull().sum())

In [7]:
df.dropna(axis = 0 , inplace = True)

In [None]:
df

In [None]:
# Maintainability Score calculation using apply
df['Maintainability Score'] = df[['loopQty', 'comparisonsQty', 'numbersQty', 'variablesQty']].apply(sum, axis = 1)

# Calculation of Fan-in and Fan-out Complexity using lambda and apply
df['Fan-in and Fan-out Complexity'] = df.apply(lambda row: row['wmc'] * (row['fanin'] * row['fanout']) ** 2, axis=1)

# Categorization using cut
df['Risk by Cyclomatic Complexity'] = pd.cut(df['wmc'], bins = [-float('inf'), 10, 20, 50, float('inf')], labels = False)
df['Risk by Cyclomatic Complexity'] = pd.Categorical(df['Risk by Cyclomatic Complexity'], categories = [0, 1, 2, 3], ordered = True)

df['Fan-in and Fan-out Complexity'] = pd.cut(df['Fan-in and Fan-out Complexity'], bins = [-float('inf'), 100, 1000, float('inf')], labels = False)
df['Fan-in and Fan-out Complexity'] = pd.Categorical(df['Fan-in and Fan-out Complexity'], categories = [0, 1, 2], ordered = True)

df['Maintainability Score'] = pd.cut(df['Maintainability Score'], bins = [-float('inf'), 65, 85, float('inf')], labels = False)
df['Maintainability Score'] = pd.Categorical(df['Maintainability Score'], categories = [0, 1, 2], ordered = True)

complexity_categories = {

    'Risk by Cyclomatic Complexity': ['Low', 'Moderate', 'High', 'Very High'],
    'Fan-in and Fan-out Complexity': ['Good', 'Moderate', 'High'],
    'Maintainability Score': ['Good', 'Moderate', 'Bad']
}

# Loop to categorize DataFrame columns
for category, labels in complexity_categories.items():
    df[category] = df[category].map({i: label for i, label in enumerate(labels)})

# Selection of relevant columns for the final table
complexity_analysis_table = df[['file', 'class', 'Risk by Cyclomatic Complexity', 'Fan-in and Fan-out Complexity', 'Maintainability Score']]

# Export to CSV
complexity_analysis_table.to_csv('Complexity_Analysis_Table.csv', index = False)

complexity_analysis_df = pd.read_csv('/content/Complexity_Analysis_Table.csv')
complexity_analysis_df

In [None]:
Risk_by_Cyclomatic_Complexity = ['Low', 'Moderate', 'High', 'Very High']
Fanin_and_Fanout_Complexity = ['Good', 'Moderate', 'High']
Maintainability_Score = ['Good', 'Moderate', 'Bad']

fig, axs = plt.subplots(1, 3, figsize=(15, 5))

axs[0].pie(complexity_analysis_df['Risk by Cyclomatic Complexity'].value_counts(), labels = Risk_by_Cyclomatic_Complexity, autopct = "%0.01f%%")
axs[0].set_title("Risk by Cyclomatic Complexity Percentage")

axs[1].pie(complexity_analysis_df['Fan-in and Fan-out Complexity'].value_counts(), labels = Fanin_and_Fanout_Complexity, autopct = "%0.01f%%")
axs[1].set_title("Fan-in and Fan-out Complexity Percentage")

axs[2].pie(complexity_analysis_df['Maintainability Score'].value_counts(), labels = Maintainability_Score, autopct = "%0.01f%%")
axs[2].set_title("Maintainability Score Percentage")

plt.tight_layout()
plt.show()