## Data processing and Analysis

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import math

In [None]:
buggy_elements_df = pd.read_csv('../data/generated/buggy-code-lifetime-data.csv', sep=',')

In [None]:
buggy_elements_df.head()

In [None]:
#quantum = buggy_elements_df.loc[((buggy_elements_df['bug_type'] == 'Classical'))]

#test_list = quantum['bug_id'].unique().tolist()


#test_list = list(map(int, test_list))

#print(len(test_list))

## Collecting lifetime grouping by bug_id,buggy_line and buggy_component

In [None]:
groupby_author_commit_date_dict =  buggy_elements_df.groupby(['bug_id','buggy_line_number','buggy_component','bug_type','fix_commit_hash'])['author_commit_date'].apply(list).to_dict()

In [None]:
print(groupby_author_commit_date_dict)

In [None]:
for k, v in groupby_author_commit_date_dict.items():
    
    v.sort()
    lifetime = v[-1] - v[-2]
    groupby_author_commit_date_dict[k] = lifetime

In [None]:
print(groupby_author_commit_date_dict)

## Parsing dict to obtain lifetime per component into a dataframe

In [None]:
data_items = groupby_author_commit_date_dict.items()
data_list = list(data_items)

lifetime_per_component_df = pd.DataFrame(data_list)

In [None]:
lifetime_per_component_df.head()

In [None]:
def obtain_fifth_element(x):

    return x[4]

In [None]:
def obtain_fourth_element(x):

    return x[3]

In [None]:
def obtain_third_element(x):

    return x[2]

In [None]:
def obtain_second_element(x):

    return x[1]

In [None]:
def obtain_first_element(x):

    return x[0]

In [None]:
lifetime_per_component_df['Fix_Commit_Hash'] = lifetime_per_component_df[0].apply(obtain_fifth_element)

In [None]:
lifetime_per_component_df['Bug Type'] = lifetime_per_component_df[0].apply(obtain_fourth_element)

In [None]:
lifetime_per_component_df['Buggy Line'] = lifetime_per_component_df[0].apply(obtain_second_element)

In [None]:
lifetime_per_component_df['Bug ID'] = lifetime_per_component_df[0].apply(obtain_first_element)

In [None]:
lifetime_per_component_df[0] = lifetime_per_component_df[0].apply(obtain_third_element)

In [None]:
lifetime_per_component_df.rename(columns = { 0 : 'Buggy Component', 1 : 'Lifetime'}, inplace = True)

In [None]:
lifetime_per_component_df.head()

## Collecting Number of Authors and Number of times modified grouping by bug_id,buggy_line and buggy_component

In [None]:
groupby_author_dict =  buggy_elements_df.groupby(['bug_id','buggy_line_number','buggy_component','bug_type','fix_commit_hash'])['author_name'].apply(list).to_dict()

In [None]:
print(groupby_author_dict)

In [None]:
Number_Authors = []
Number_Modified = []

In [None]:
for k, v in groupby_author_dict.items():
    
    x = np.array(v)
    Number_Modified.append(len(x))
    Number_Authors.append(len(np.unique(x)))
    

# Adding number of authors to dataframe

In [None]:
lifetime_per_component_df['Number of Authors'] = Number_Authors

# Adding number of times modified to dataframe

In [None]:
lifetime_per_component_df['Number of Times Modified'] = Number_Modified

In [None]:
lifetime_per_component_df.head()

In [None]:
lifetime_per_component_df.shape

In [None]:
lifetime_per_component_df = lifetime_per_component_df.sort_index(axis=1)

# GroupBy buggy component and aggregation for Lifetime, Number of Authors and Number of times modified using the mean as metric

In [None]:
component_df = lifetime_per_component_df.groupby('Buggy Component').aggregate({'Lifetime': 'mean',
                             'Number of Authors': 'mean','Number of Times Modified' : 'mean'})


# Converting seconds to days 

In [None]:
def second_to_days(x):
    
    return x/60/60/24

In [None]:
## Testing function
## input : 14322352 seconds
## expected output : ~165.76796296 days

print(second_to_days(14322352))


In [None]:
component_df['Lifetime'] = component_df['Lifetime'].apply(second_to_days)

In [None]:
component_df.head()

## Quantum Number of Author, Number of Times modified, Lifetime buggy code element 

In [None]:
quantum_bug_dataframe = lifetime_per_component_df.loc[((lifetime_per_component_df['Bug Type'] == 'Quantum'))]

In [None]:
quantum_component_df = quantum_bug_dataframe.groupby('Buggy Component').aggregate({'Lifetime': 'mean',
                             'Number of Authors': 'mean','Number of Times Modified' : 'mean'})

In [None]:
quantum_component_df['Lifetime'] = quantum_component_df['Lifetime'].apply(second_to_days)

## Classical Number of Author, Number of Times modified, Lifetime buggy code element 

In [None]:
classical_bug_dataframe = lifetime_per_component_df.loc[((lifetime_per_component_df['Bug Type'] == 'Classical'))]

In [None]:
classical_component_df = classical_bug_dataframe.groupby('Buggy Component').aggregate({'Lifetime': 'mean',
                             'Number of Authors': 'mean','Number of Times Modified' : 'mean'})

In [None]:
classical_component_df['Lifetime'] = classical_component_df['Lifetime'].apply(second_to_days)

# Barplot generator for each feature 

## Lifetime

In [None]:
quantum_lifetime = quantum_component_df['Lifetime'].to_numpy().tolist()
classical_lifetime = classical_component_df['Lifetime'].to_numpy()

classical_lifetime = np.insert(classical_lifetime, 29, 0)
quantum_lifetime = np.insert(np.insert(quantum_lifetime, 6, 0),39,0)

quantum_round_to_tenths = [round(num, 1) for num in quantum_lifetime]
classical_round_to_tenths = [round(num, 1) for num in classical_lifetime]

index = component_df.index.tolist()

df = pd.DataFrame({'Quantum':quantum_round_to_tenths ,
                   'Classical':classical_round_to_tenths}, index=index )

ax = df.plot.barh(figsize=(25,25), color = {'Quantum':'black', 'Classical':'gray'})

for p in ax.patches:
    
    ax.annotate(str(p.get_width()), (p.get_x() + p.get_width(), p.get_y()), xytext=(2, 2), textcoords='offset points', size = 6, fontsize=9)
    

ax.figure.savefig("RQ2_Lifetime.pdf", bbox_inches='tight',)


## Number of Authors

In [None]:
quantum_author = quantum_component_df['Number of Authors'].to_numpy().tolist()
classical_author = classical_component_df['Number of Authors'].to_numpy()

classical_author = np.insert(classical_author, 29, 0)
quantum_author = np.insert(np.insert(quantum_author, 6, 0),39,0)

quantum_round_to_tenths = [round(num, 1) for num in quantum_author]
classical_round_to_tenths = [round(num, 1) for num in classical_author]

index = component_df.index.tolist()

df = pd.DataFrame({'Quantum':quantum_round_to_tenths ,
                   'Classical':classical_round_to_tenths}, index=index )

ax = df.plot.barh(figsize=(25,25), color = {'Quantum':'black', 'Classical':'gray'})

for p in ax.patches:
    
    ax.annotate(str(p.get_width()), (p.get_x() + p.get_width(), p.get_y()), xytext=(2, 2), textcoords='offset points', size = 6, fontsize=9)

ax.figure.savefig("RQ2_Author.pdf", bbox_inches='tight',)

## Number of Times Modified

In [None]:
quantum_modified = quantum_component_df['Number of Times Modified'].to_numpy().tolist()
classical_modified = classical_component_df['Number of Times Modified'].to_numpy()

classical_modified = np.insert(classical_modified, 29, 0)
quantum_modified = np.insert(np.insert(quantum_modified, 6, 0),39,0)

quantum_round_to_tenths = [round(num, 1) for num in quantum_modified]
classical_round_to_tenths = [round(num, 1) for num in classical_modified]

index = component_df.index.tolist()

df = pd.DataFrame({'Quantum':quantum_round_to_tenths ,
                   'Classical':classical_round_to_tenths}, index=index )

ax = df.plot.barh(figsize=(25,25), color = {'Quantum':'black', 'Classical':'gray'})

for p in ax.patches:
    
    ax.annotate(str(p.get_width()), (p.get_x() + p.get_width(), p.get_y()), xytext=(2, 2), textcoords='offset points', size = 6, fontsize=9)

    
ax.figure.savefig("RQ2_Modifications.pdf", bbox_inches='tight',)

## Boxplot

In [None]:
classical_boxplot_component_bugId_df = classical_bug_dataframe.groupby(['Buggy Component', 'Bug ID']).aggregate({'Lifetime': 'mean',
                             'Number of Authors': 'mean','Number of Times Modified' : 'mean'})

In [None]:
classical_boxplot_component_bugId_df['Lifetime'] = classical_boxplot_component_bugId_df['Lifetime'].apply(second_to_days)

In [None]:
classical_boxplot_component_bugId_df.head()

In [None]:
quantum_boxplot_component_bugId_df =  quantum_bug_dataframe.groupby(['Buggy Component', 'Bug ID']).aggregate({'Lifetime': 'mean',
                             'Number of Authors': 'mean','Number of Times Modified' : 'mean'})

In [None]:
quantum_boxplot_component_bugId_df['Lifetime'] = quantum_boxplot_component_bugId_df['Lifetime'].apply(second_to_days)

In [None]:
quantum_boxplot_component_bugId_df.head()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, figsize = (25,25))

classical_boxplot_component_bugId_df.boxplot(column=['Lifetime'],by='Buggy Component',vert=False, figsize=(25,25),ax = ax2)

quantum_boxplot_component_bugId_df.boxplot(column=['Lifetime'],by='Buggy Component',vert=False, figsize=(25,25), ax = ax1)

plt.title('')
plt.suptitle('')
ax1.set_title('Quantum');
ax1.set_xlabel("");
ax1.set_ylabel("");
ax2.set_title('Classical');
ax2.set_xlabel("");
ax2.set_ylabel("");

fig.savefig("Average_Lifetime_Distribution_Per_BugID_BuggyComponent.pdf", bbox_inches='tight',)




In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, figsize = (25,25))

classical_boxplot_component_bugId_df.boxplot(column=['Number of Times Modified'],by='Buggy Component',vert=False, figsize=(25,25),ax = ax2)

quantum_boxplot_component_bugId_df.boxplot(column=['Number of Times Modified'],by='Buggy Component',vert=False, figsize=(25,25), ax = ax1)

plt.title('')
plt.suptitle('')
ax1.set_title('Quantum');
ax1.set_xlabel("");
ax1.set_ylabel("");
ax2.set_title('Classical');
ax2.set_xlabel("");
ax2.set_ylabel("");


fig.savefig("Average_NumberOfTimesModified_Distribution_Per_BugID_BuggyComponent.pdf", bbox_inches='tight',)

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2, figsize = (25,25))

classical_boxplot_component_bugId_df.boxplot(column=['Number of Authors'],by='Buggy Component',vert=False, figsize=(25,25),ax = ax2)

quantum_boxplot_component_bugId_df.boxplot(column=['Number of Authors'],by='Buggy Component',vert=False, figsize=(25,25), ax = ax1)

plt.title('')
plt.suptitle('')
ax1.set_title('Quantum');
ax1.set_xlabel("");
ax1.set_ylabel("");
ax2.set_title('Classical');
ax2.set_xlabel("");
ax2.set_ylabel("");



fig.savefig("Average_NumberOfAuthors_Distribution_Per_BugID_BuggyComponent.pdf", bbox_inches='tight',)