# Introduction

Notebook to analyse the PyBossa taskruns from the Expert App Version 2.

# Load Libraries and Data

In [72]:
from mod_finder_util import mod_finder_util
mod_finder_util.add_modules_origin_search_path()

In [64]:
import pandas as pd
import seaborn as sns

import modules.utils.firefox_dataset_p2 as fd

from sklearn.metrics import cohen_kappa_score

taskruns = pd.read_csv('../data/mozilla_firefox_v2/firefoxDataset/br_feat_recovery_empirical_study/pybossa-apps/recover_taskruns/taskruns_expert_2.csv')

print('TaskRuns shape: {}'.format(taskruns.shape))

TaskRuns shape: (93, 11)


# Grouping Contributions By Time

In [65]:
taskruns['created'] = pd.to_datetime(taskruns['created'], yearfirst=True)
taskruns['created_month'] = taskruns.apply(lambda row: row['created'].month, axis=1)
taskruns['created_day'] = taskruns.apply(lambda row: row['created'].day, axis=1)
taskruns['created_hour'] = taskruns.apply(lambda row: row['created'].hour, axis=1)

grouped_trs = taskruns.groupby(by=['created_month','created_day','created_hour']).count()
grouped_trs

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,bug_id,user_id,task_id,created,finish_time,user_ip,link,timeout,project_id,id,answers
created_month,created_day,created_hour,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
3,7,0,38,38,38,38,38,38,38,38,38,38,38
3,7,11,6,6,6,6,6,6,6,6,6,6,6
3,7,12,49,49,49,49,49,49,49,49,49,49,49


# Count TaskRuns by Task

In [66]:
cols = ['task_id','id']

df = taskruns[cols].groupby(by='task_id').count()
df.rename(columns={"id" :'count_trs'}, inplace=True)
display(df.tail(10))
print(df.shape)

Unnamed: 0_level_0,count_trs
task_id,Unnamed: 1_level_1
2011,1
2012,1
2013,1
2014,1
2015,1
2016,1
2017,1
2018,1
2019,1
2020,1


(93, 1)


# Check All Tasks Have At Least One Answer

In [67]:
print(set(df.index) - set(range(1928,2020+1,1)))

set()


# Analysis of Taskruns Infos

In [68]:
taskruns[['bug_id','answers']].head(10)

Unnamed: 0,bug_id,answers
0,1181835,0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1,1248267,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2,1248268,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3,1257087,1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4,1264988,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
5,1267480,0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
6,1267501,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
7,1269348,0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
8,1269485,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
9,1270274,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


In [71]:
taskruns.dtypes

bug_id                    int64
user_id                   int64
task_id                   int64
created          datetime64[ns]
finish_time              object
user_ip                  object
link                     object
timeout                  object
project_id                int64
id                        int64
answers                  object
created_month             int64
created_day               int64
created_hour              int64
dtype: object

# Comparing Taskruns Infos with BR_2_Feat Matrix

In [70]:
features = fd.Datasets.read_features_df()
bugreports = fd.Datasets.read_selected_bugreports_df()

df_2 = pd.DataFrame(columns=features.feat_name.values, 
                    index=bugreports.Bug_Number)

taskruns.sort_values(by='bug_id', inplace=True)

for idx,row in taskruns.iterrows():
    ans = row.answers.split(" ")
    for i in range(len(ans)):
        feat_name = df_2.columns[i]
        df_2.at[row.bug_id, feat_name] = int(ans[i])

br_2_feat_expert_matrix = fd.Feat_BR_Oracles.read_feat_br_expert_df()        

print(str(br_2_feat_expert_matrix.index.values) == str(df_2.index.values))
    
a1,a2 = [],[]    
for idx,row in br_2_feat_expert_matrix.iterrows():
    for col in br_2_feat_expert_matrix.columns:
        a1.append(df_2.at[idx,col])
        a2.append(br_2_feat_expert_matrix.at[idx,col])

print(cohen_kappa_score(a1, a2))

display(br_2_feat_expert_matrix.head(10))
display(df_2.head(10))

Features.shape: (21, 8)
SelectedBugReports.shape: (93, 18)
Expert Matrix shape: (93, 21)
True
0.97906759890104


Unnamed: 0_level_0,new_awesome_bar,windows_child_mode,apz_async_scrolling,browser_customization,pdf_viewer,context_menu,w10_comp,tts_in_desktop,tts_in_rm,webgl_comp,...,pointer_lock_api,webm_eme,zoom_indicator,downloads_dropmaker,webgl2,flac_support,indicator_device_perm,flash_support,notificationbox,update_directory
bug_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1181835,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1248267,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1248268,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1257087,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1264988,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1267480,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1267501,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1269348,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1269485,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1270274,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Unnamed: 0_level_0,new_awesome_bar,windows_child_mode,apz_async_scrolling,browser_customization,pdf_viewer,context_menu,w10_comp,tts_in_desktop,tts_in_rm,webgl_comp,...,pointer_lock_api,webm_eme,zoom_indicator,downloads_dropmaker,webgl2,flac_support,indicator_device_perm,flash_support,notificationbox,update_directory
Bug_Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1181835,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1248267,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1248268,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1257087,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1264988,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1267480,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1267501,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1269348,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1269485,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1270274,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Making Treatment in Taskruns Answers - Solving Identified Bug in Applications

In [77]:
display(taskruns[['bug_id','answers']].head())
display(taskruns[['bug_id','answers']].tail())

Unnamed: 0,bug_id,answers
0,1181835,0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1,1248267,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2,1248268,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3,1257087,1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4,1264988,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


Unnamed: 0,bug_id,answers
88,1408361,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
89,1430603,0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
90,1432915,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
91,1449700,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
92,1451475,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


In [82]:
new_answers = list(taskruns.answers.values)
new_answers = [new_answers[-1]] + new_answers
del new_answers[-1]
taskruns['new_answers'] = new_answers
taskruns[['bug_id','answers','new_answers']].head()

Unnamed: 0,bug_id,answers,new_answers
0,1181835,0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1,1248267,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0,0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2,1248268,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3,1257087,1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4,1264988,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0,1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


In [86]:
features = fd.Datasets.read_features_df()
bugreports = fd.Datasets.read_selected_bugreports_df()

df_3 = pd.DataFrame(columns=features.feat_name.values, 
                    index=bugreports.Bug_Number)

taskruns.sort_values(by='bug_id', inplace=True)

for idx,row in taskruns.iterrows():
    ans = row.new_answers.split(" ")
    for i in range(len(ans)):
        feat_name = df_3.columns[i]
        df_3.at[row.bug_id, feat_name] = int(ans[i])

br_2_feat_expert_matrix = fd.Feat_BR_Oracles.read_feat_br_expert_df()        

print(str(br_2_feat_expert_matrix.index.values) == str(df_3.index.values))
    
a1,a2 = [],[]    
for idx,row in br_2_feat_expert_matrix.iterrows():
    for col in br_2_feat_expert_matrix.columns:
        a1.append(df_3.at[idx,col])
        a2.append(br_2_feat_expert_matrix.at[idx,col])

print(cohen_kappa_score(a1, a2))

df_3.index.names = ['bug_number']

display(br_2_feat_expert_matrix.head(10))
display(df_3.head(10))

df_3.to_csv('../data/mozilla_firefox_v2/firefoxDataset/oracle/output/firefox_v2/feat_br/br_2_feature_matrix_expert_2_2.csv')

Features.shape: (21, 8)
SelectedBugReports.shape: (93, 18)
Expert Matrix shape: (93, 21)
True
0.46265615803349713


Unnamed: 0_level_0,new_awesome_bar,windows_child_mode,apz_async_scrolling,browser_customization,pdf_viewer,context_menu,w10_comp,tts_in_desktop,tts_in_rm,webgl_comp,...,pointer_lock_api,webm_eme,zoom_indicator,downloads_dropmaker,webgl2,flac_support,indicator_device_perm,flash_support,notificationbox,update_directory
bug_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1181835,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1248267,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1248268,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1257087,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1264988,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1267480,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1267501,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1269348,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1269485,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1270274,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Unnamed: 0_level_0,new_awesome_bar,windows_child_mode,apz_async_scrolling,browser_customization,pdf_viewer,context_menu,w10_comp,tts_in_desktop,tts_in_rm,webgl_comp,...,pointer_lock_api,webm_eme,zoom_indicator,downloads_dropmaker,webgl2,flac_support,indicator_device_perm,flash_support,notificationbox,update_directory
bug_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1181835,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1248267,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1248268,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1257087,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1264988,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1267480,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1267501,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1269348,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1269485,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1270274,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
