In [28]:
import json
import functools
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import pandas as pd

colors = sns.color_palette()

# Different result files
stackoverflow = "stackoverflow_run.json"
ground_truth = "ground_truth_run.json"
binnacle = "binnacle_run.json"
merged = "merged_run.json"

smellyFiles = "smellyFiles"
totalSmellsPresent = "totalSmellsPresent"
smellsPerFile = "smellsPerFile"

# Current result file we are analysing
currentFile = stackoverflow

# Loading in the file and transforming the JSON to a dict
f = open(currentFile)
data = json.load(f)
data[totalSmellsPresent].sort(key=lambda x: x["times"], reverse=True)
data[smellyFiles].sort(key=lambda x: x["times"], reverse=True)


# Calculate total amount of smells detected
total_amount_of_smells_detected = sum(map(lambda a: a["times"], data[totalSmellsPresent]))
print("Total amount of smells: ", total_amount_of_smells_detected)

# Plot files with smells and files without smells (and numbers)
files_without_smells = list(filter(lambda x:len(x["smells"]) == 0, data[smellsPerFile]))
print("Files without smells: ", len(files_without_smells))

# Plot graph of files infected with smell


# Plot proportional detection of smells (pie chart, once more)

# NOTE: at the moment, the pie chart is too detailed. For StackOverflow it would be wise to stack everything under 2% together
# and explode that piece, and then show it in another pie-chart. Maybe accompanied by a table that lists them from most to least.
smells = []
detections = []
proportions = []

for smell in data[totalSmellsPresent]:
    smells.append(smell["rule"])
    detections.append(smell["times"])
    proportions.append(smell["times"] / total_amount_of_smells_detected)
    
smells_copy = []
detections_copy = []

other_smells = []
other_detections = []

# Threshold decides whenever a smell gets grouped into others;
other_threshold = 0.04

for idx in range(len(smells)):
    proportion = detections[idx] / total_amount_of_smells_detected
    if(proportion < other_threshold):
        other_smells.append(smells[idx])
        other_detections.append(detections[idx])
    else:
        smells_copy.append(smells[idx])
        detections_copy.append(detections[idx])

smells_copy.append("Others")
detections_copy.append(sum(other_detections))
        

# Plot
#plt.pie(detections_copy, labels = smells_copy, colors = colors, autopct='%.1f%%', explode = (0,0,0,0,0,0,0,0.1))
#plt.show()
# Plot list of most detected to lowest detected
total_smells_df = pd.DataFrame({'Smell': smells, '# detections': detections, 'proportion': proportions})




smellyfiles_smells = []
smellyfiles_detections = []
smellyfiles_proportions = []

for smell in data[smellyFiles]:
    smellyfiles_smells.append(smell["rule"])
    smellyfiles_detections.append(smell["times"])
    smellyfiles_proportions.append(smell["times"] / len(data[smellsPerFile]))
    
smellyfiles_df = pd.DataFrame({'Smell': smellyfiles_smells, '# files': smellyfiles_detections, 'proportion': smellyfiles_proportions})
print(smellyfiles_df)



# Could be a table with SMELL # DETECTED # PRESENT IN FILES # Proportion of smells


# Calculate smell density
print("Amount of files: ", len(data[smellsPerFile]))
smell_density = total_amount_of_smells_detected / len(data[smellsPerFile])
print("Smell density is: ", smell_density)


f.close()

Total amount of smells:  57292
Files without smells:  17023
     Smell  # files  proportion
0   DL3008     3128    0.125718
1   DL9011     3003    0.120695
2   DL3015     2910    0.116957
3   DL3059     1835    0.073751
4   DL9005     1610    0.064708
5   DL9008     1588    0.063824
6   DL3042     1470    0.059081
7   DL3018     1128    0.045336
8   DL3013      744    0.029902
9   DL3019      621    0.024959
10  DL3016      460    0.018488
11  DL9004      437    0.017564
12  DL9009      398    0.015996
13  DL9020      362    0.014549
14  DL9000      300    0.012057
15  DL3032      297    0.011937
16  DL9012      288    0.011575
17  DL9001      284    0.011414
18  DL3014      284    0.011414
19  DL9006      282    0.011334
20  DL3033      271    0.010892
21  DL9002      253    0.010168
22  DL3060      174    0.006993
23  DL9018      133    0.005345
24  DL9014       48    0.001929
25  DL9003       41    0.001648
26  DL9010       35    0.001407
27  DL9015       29    0.001166
28  DL3030  