In [1]:
import pandas as pd
import os

# Files to use for the analysis
detected_bugs_file = os.path.join('detected_bugs_3.csv')
projects_ranking_file = os.path.join('jmh-projects-bigquery-fh-201702 - jmh-projects-gh.csv')
top = 25

detected_bugs = pd.read_csv(detected_bugs_file, sep=';')
projects_ranking = pd.read_csv(projects_ranking_file, sep=',')

print('Detected Bugs...')
detected_bugs.head(3)

print('\n\nProjects...')
projects_ranking.head(3)

Detected Bugs...


Projects...


839

In [140]:
# Normalize the projects name
projects_ranking.index = projects_ranking['project'].apply(lambda x: x.split('/')[1])
print('Normalizing the project names - Using only the first part...')
projects_ranking.head(3)

Normalizing the project names - Using only the first part...


Unnamed: 0_level_0,project,forked,watchers,stars,forks,subscribers
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
RxJava,ReactiveX/RxJava,False,23558,23558,4143,1754
feign,Netflix/feign,False,1716,1716,335,276
netty,netty/netty,False,9746,9746,4775,1243


In [141]:
# I had some problems with duplicated entries in the data 
duplicated = detected_bugs.duplicated(subset=['project', 'bugtype'], keep='first')

duplicated_bugs = detected_bugs[duplicated]
print('Duplicated entries (last)')
duplicated_bugs

# Remove duplicates
detected_bugs = detected_bugs[~duplicated]

Duplicated entries (last)


In [142]:
# Use a Pivot Table for the bugs
detected_bugs_pivot = detected_bugs.pivot(index='project', columns='bugtype', values='count')
detected_bugs_pivot = detected_bugs_pivot.fillna(0)
print('Using a Pivot table for the bugs...')
detected_bugs_pivot.columns
detected_bugs_pivot.head(3)

Using a Pivot table for the bugs...


bugtype,JMH_BENCHMARKMODE_SINGLESHOT,JMH_BENCHMARK_METHOD_FOUND,JMH_FIXTURE_USING_INVOCATION_SCOPE,JMH_IGNORED_METHOD_RETURN,JMH_LOOP_INSIDE_BENCHMARK,JMH_NOTFORKED_BENCHMARK,JMH_STATE_FINAL_FIELD
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
FieldMagic,0.0,3.0,0.0,0.0,1.0,0.0,7.0
JCTools,0.0,90.0,2.0,8.0,41.0,0.0,355.0
NeoEMF,2.0,15.0,0.0,1.0,0.0,0.0,21.0


In [143]:
relative = pd.DataFrame()
for column in detected_bugs_pivot.columns:
    relative['%s_rel' % column] = (detected_bugs_pivot[column] / detected_bugs_pivot['JMH_BENCHMARK_METHOD_FOUND']) * 100.00

# Rounding the float 
relative = relative.round(2)
    
detected_bugs_pivot = detected_bugs_pivot.join(relative)
detected_bugs_pivot.head(3)
#bugs_relative = detected_bugs_pivot / (detected_bugs_pivot['JMH_BENCHMARK_METHOD_FOUND'], axis='columns')
#print('Amount of bugs found relative to the amount of bench analyzed')
#print(bugs_relative)

Unnamed: 0_level_0,JMH_BENCHMARKMODE_SINGLESHOT,JMH_BENCHMARK_METHOD_FOUND,JMH_FIXTURE_USING_INVOCATION_SCOPE,JMH_IGNORED_METHOD_RETURN,JMH_LOOP_INSIDE_BENCHMARK,JMH_NOTFORKED_BENCHMARK,JMH_STATE_FINAL_FIELD,JMH_BENCHMARKMODE_SINGLESHOT_rel,JMH_BENCHMARK_METHOD_FOUND_rel,JMH_FIXTURE_USING_INVOCATION_SCOPE_rel,JMH_IGNORED_METHOD_RETURN_rel,JMH_LOOP_INSIDE_BENCHMARK_rel,JMH_NOTFORKED_BENCHMARK_rel,JMH_STATE_FINAL_FIELD_rel
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
FieldMagic,0.0,3.0,0.0,0.0,1.0,0.0,7.0,0.0,100.0,0.0,0.0,33.33,0.0,233.33
JCTools,0.0,90.0,2.0,8.0,41.0,0.0,355.0,0.0,100.0,2.22,8.89,45.56,0.0,394.44
NeoEMF,2.0,15.0,0.0,1.0,0.0,0.0,21.0,13.33,100.0,0.0,6.67,0.0,0.0,140.0


In [144]:
# Merge both bugs x projects ranking
bugs_per_project = detected_bugs_pivot.join(projects_ranking)
print('\n\nMerged Dataset...')
print('Amount of Projects: %d' % len(bugs_per_project))
bugs_per_project.head(3)




Merged Dataset...
Amount of Projects: 268


Unnamed: 0_level_0,JMH_BENCHMARKMODE_SINGLESHOT,JMH_BENCHMARK_METHOD_FOUND,JMH_FIXTURE_USING_INVOCATION_SCOPE,JMH_IGNORED_METHOD_RETURN,JMH_LOOP_INSIDE_BENCHMARK,JMH_NOTFORKED_BENCHMARK,JMH_STATE_FINAL_FIELD,JMH_BENCHMARKMODE_SINGLESHOT_rel,JMH_BENCHMARK_METHOD_FOUND_rel,JMH_FIXTURE_USING_INVOCATION_SCOPE_rel,JMH_IGNORED_METHOD_RETURN_rel,JMH_LOOP_INSIDE_BENCHMARK_rel,JMH_NOTFORKED_BENCHMARK_rel,JMH_STATE_FINAL_FIELD_rel,project,forked,watchers,stars,forks,subscribers
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
FieldMagic,0.0,3.0,0.0,0.0,1.0,0.0,7.0,0.0,100.0,0.0,0.0,33.33,0.0,233.33,jerrinot/FieldMagic,False,16,16,2,5
JCTools,0.0,90.0,2.0,8.0,41.0,0.0,355.0,0.0,100.0,2.22,8.89,45.56,0.0,394.44,JCTools/JCTools,False,1053,1053,149,134
JCTools,0.0,90.0,2.0,8.0,41.0,0.0,355.0,0.0,100.0,2.22,8.89,45.56,0.0,394.44,akarnokd/JCTools,True,0,0,0,1


In [145]:
# Pivot Table

print('---------------------------------------')
print('\n\nAnalyzing the top %d projects' % top)
analysis = bugs_per_project.sort_values(by='stars', ascending=False)[:top]



---------------------------------------


Analyzing the top 25 projects


In [146]:
columns_to_export = [
 #   'stars',
  #  'forks',
    #'subscribers',
    'JMH_BENCHMARK_METHOD_FOUND',
    'JMH_FIXTURE_USING_INVOCATION_SCOPE',
    'JMH_FIXTURE_USING_INVOCATION_SCOPE_rel',
    'JMH_IGNORED_METHOD_RETURN',
    'JMH_IGNORED_METHOD_RETURN_rel',
    'JMH_LOOP_INSIDE_BENCHMARK',
    'JMH_LOOP_INSIDE_BENCHMARK_rel',
    'JMH_STATE_FINAL_FIELD',
    'JMH_STATE_FINAL_FIELD_rel',
    'JMH_NOTFORKED_BENCHMARK',
    'JMH_NOTFORKED_BENCHMARK_rel',
    'JMH_BENCHMARKMODE_SINGLESHOT',
    'JMH_BENCHMARKMODE_SINGLESHOT_rel'
]


bug_type_dict = {
    'JMH_BENCHMARK_METHOD_FOUND': 'Benchs',
    'JMH_FIXTURE_USING_INVOCATION_SCOPE': 'INVO',
    'JMH_FIXTURE_USING_INVOCATION_SCOPE_rel': 'INVO %',
    'JMH_IGNORED_METHOD_RETURN': 'RETU',
    'JMH_IGNORED_METHOD_RETURN_rel': 'RETU %',
    'JMH_LOOP_INSIDE_BENCHMARK': 'LOOP',
    'JMH_LOOP_INSIDE_BENCHMARK_rel': 'LOOP %',
    'JMH_STATE_FINAL_FIELD': 'FINA',
    'JMH_STATE_FINAL_FIELD_rel': 'FINA %',
    'JMH_NOTFORKED_BENCHMARK': 'FORK',
    'JMH_NOTFORKED_BENCHMARK_rel': 'FORK %',
    'JMH_BENCHMARKMODE_SINGLESHOT': 'SING',
    'JMH_BENCHMARKMODE_SINGLESHOT_rel': 'SING %'
}

latex = analysis[columns_to_export]
latex = latex.rename(columns=bug_type_dict)
latex = latex.fillna('')
latex = latex.replace({0: ''})

print(latex.to_latex(bold_rows=True))

\begin{tabular}{llllllllllllll}
\toprule
{} & Benchs & INVO & INVO \% &  RETU & RETU \% &   LOOP & LOOP \% &   FINA &   FINA \% & FORK & FORK \% & SING & SING \% \\
project             &        &      &        &       &        &        &        &        &          &      &        &      &        \\
\midrule
RxJava              & 122.00 &      &        &       &        &  13.00 &  10.66 & 132.00 &   108.20 &      &        &      &        \\
netty               & 143.00 & 3.00 &   2.10 & 88.00 &  61.54 &  30.00 &  20.98 & 327.00 &   228.67 &      &        &      &        \\
zipkin              &  52.00 &      &        &       &        &        &        &  69.00 &   132.69 &      &        &      &        \\
druid               & 127.00 & 1.00 &   0.79 & 20.00 &  15.75 & 148.00 & 116.54 & 328.00 &   258.27 &      &        &      &        \\
okio                &  11.00 & 1.00 &   9.09 &  5.00 &  45.45 &        &        &  34.00 &   309.09 &      &        &      &        \\
grpc-java       

In [147]:
latex

Unnamed: 0_level_0,Benchs,INVO,INVO %,RETU,RETU %,LOOP,LOOP %,FINA,FINA %,FORK,FORK %,SING,SING %
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
RxJava,122.0,,,,,13.0,10.66,132.0,108.2,,,,
netty,143.0,3.0,2.1,88.0,61.54,30.0,20.98,327.0,228.67,,,,
zipkin,52.0,,,,,,,69.0,132.69,,,,
druid,127.0,1.0,0.79,20.0,15.75,148.0,116.54,328.0,258.27,,,,
okio,11.0,1.0,9.09,5.0,45.45,,,34.0,309.09,,,,
grpc-java,18.0,,,2.0,11.11,6.0,33.33,26.0,144.44,,,,
caffeine,37.0,,,1.0,2.7,9.0,24.32,141.0,381.08,,,,
h2o-3,14.0,8.0,57.14,,,12.0,85.71,48.0,342.86,,,,
gs-collections,451.0,,,41.0,9.09,114.0,25.28,520.0,115.3,,,,
byte-buddy,39.0,,,,,,,42.0,107.69,,,,
