# Confounding Factors
---

In [2]:
import pandas as pd
from tqdm.notebook import tqdm
import os
import seaborn as sns
import matplotlib.pyplot as plt
from radon.raw import analyze
from radon.metrics import h_visit, mi_visit
from radon.complexity import cc_visit
import radon.visitors

plt.rcParams['savefig.facecolor'] = 'white'

In [3]:
if not os.path.exists('./results/ConfoundingFactors'):
    os.makedirs('./results/ConfoundingFactors/data')
    os.makedirs('./results/ConfoundingFactors/figures')

# Calculate confounding factors - Complexity Metrics

In [4]:
df = pd.read_csv('./data/preprocessed_experiment_data.csv')
algorithms = df['Algorithm'].unique()
programming_styles = df['ProgrammingStyle'].unique()

In [7]:
df_confounding = pd.DataFrame([], columns=["Algorithm", "ProgrammingStyle", "LinesOfCode", "LogicalLinesOfCode", "SourceLinesOfCode", "MaintainabilityIndex", "CyclomaticComplexity", "HalsteadDifficulty"])

for programming_style in tqdm(programming_styles):
    programming_style = programming_style.replace('-', '_')
    for algorithm in algorithms:
        snippet_path = f"./data/CodeSnippets/Source/{algorithm}_{programming_style}.py"
        # Load code snippet
        with open(snippet_path, 'r') as f:
            code = f.read()
            # Calculate confounding factors
            loc = analyze(code).loc
            lloc = analyze(code).lloc
            sloc = analyze(code).sloc
            mi = mi_visit(code, False)
            data = cc_visit(code)
            cc = 0
            for c in data:
                if isinstance(c, radon.visitors.Class):
                    continue
                cc += c.complexity
            hi = h_visit(code)[0].difficulty
            # Append to dataframe
            df_confounding.loc[len(df_confounding)] = [algorithm, programming_style, loc, lloc, sloc, mi, cc, hi]
df_confounding["LinesOfCode"] = df_confounding["LinesOfCode"].astype(int)
df_confounding["LogicalLinesOfCode"] = df_confounding["LogicalLinesOfCode"].astype(int)
df_confounding["SourceLinesOfCode"] = df_confounding["SourceLinesOfCode"].astype(int)
df_confounding["MaintainabilityIndex"] = df_confounding["MaintainabilityIndex"].astype(float)
df_confounding["CyclomaticComplexity"] = df_confounding["CyclomaticComplexity"].astype(int)
df_confounding["HalsteadDifficulty"] = df_confounding["HalsteadDifficulty"].astype(float)
df_confounding.to_excel('./results/ConfoundingFactors/data/confounding_factors.xlsx')

  0%|          | 0/4 [00:00<?, ?it/s]

In [8]:
# violin plot for confounding factors
for factor in df_confounding.columns[2:]:
    sns.violinplot(x="ProgrammingStyle", y=factor, data=df_confounding, legend=False)
    if factor == "MaintainabilityIndex":
        plt.gca().invert_yaxis()
        plt.ylim(max(df_confounding['MaintainabilityIndex']), 0)
    else:
        plt.ylim(0, max(df_confounding[factor]))
    plt.savefig(f"./results/ConfoundingFactors/figures/{factor}.png")
    plt.close('all')
# invert y axis
# disable legend

# Calculate confounding factors - Size Of AOIS