## Read Files

In [63]:
import pandas as pd
import os

# Files to use for the analysis

# detected_bugs_file = os.path.join('detected_bugs_3.csv')
dir = os.path.join("results")

projects_ranking_file = os.path.join('jmh-projects-bigquery-fh-201702 - jmh-projects-gh.csv')

top = 25

projects = pd.read_csv(projects_ranking_file, sep=',')

# Normalize columns
projects['xml'] = projects['project'].apply(lambda x: x.split('/')[1] + '.xml')
projects['name'] = projects['project'].apply(lambda x: x.split('/')[0])

## Remove Projects that could not be analyzed

In [64]:
# Remove projects that could not be analyzed
projects_to_remove = ['jgrapht/jgrapht', 'lemire/RoaringBitmap', 'RoaringBitmap/RoaringBitmap', 
                      'requery/requery', 'Netflix/feign', 'crate/crate', 'real-logic/simple-binary-encoding', 
                      'jbachorik/btrace', 'promeG/TinyPinyin', 'pcollections/pcollections', 'ktoso/sbt-jmh', 
                      'junkdog/artemis-odb', 'fakemongo/fongo', 'graalvm/truffle', 'graalvm/graal-core']

projects = projects[~projects['project'].isin(projects_to_remove)] # jgrapht

top_projects = projects.sort_values(by=['watchers', 'stars', 'forks'], ascending=False)[:top]

## Analyze the bugs based on the Type reported

In [65]:
from collections import Counter
import xml.etree.ElementTree as ET

def analyzeBugTypes(df):
    
    tree = ET.parse(os.path.join(dir, df['xml']))
    root = tree.getroot()
    
    all_bugs = root.findall('BugInstance')
    
    bugs_per_type = [] 
    for b in all_bugs:
        bug_type = b.get('type')
        clazz = b.find('Class')
        if clazz and 'generated' not in clazz.get('classname'):
            bugs_per_type.append(bug_type)

    # Count bugs per type
    bugs = Counter(bugs_per_type)
    for key, value in bugs.items():
        df[key] = value
    
    return df

analyzed = top_projects.apply(analyzeBugTypes, axis=1)
analyzed = analyzed.fillna(0)
analyzed = analyzed.set_index('name')

## Generate Latex

In [66]:
columns_to_export = [
 #   'stars',
  #  'forks',
    #'subscribers',
    'JMH_BENCHMARK_METHOD_FOUND',
    'JMH_FIXTURE_USING_INVOCATION_SCOPE',
#     'JMH_FIXTURE_USING_INVOCATION_SCOPE_rel',
    'JMH_IGNORED_METHOD_RETURN',
#     'JMH_IGNORED_METHOD_RETURN_rel',
    'JMH_LOOP_INSIDE_BENCHMARK',
#     'JMH_LOOP_INSIDE_BENCHMARK_rel',
    'JMH_STATE_FINAL_FIELD',
#     'JMH_STATE_FINAL_FIELD_rel',
    'JMH_NOTFORKED_BENCHMARK',
#     'JMH_NOTFORKED_BENCHMARK_rel',
    'JMH_BENCHMARKMODE_SINGLESHOT',
#     'JMH_BENCHMARKMODE_SINGLESHOT_rel'
]


bug_type_dict = {
    'JMH_BENCHMARK_METHOD_FOUND': 'Benchs',
    'JMH_FIXTURE_USING_INVOCATION_SCOPE': 'INVO',
#     'JMH_FIXTURE_USING_INVOCATION_SCOPE_rel': 'INVO %',
    'JMH_IGNORED_METHOD_RETURN': 'RETU',
#     'JMH_IGNORED_METHOD_RETURN_rel': 'RETU %',
    'JMH_LOOP_INSIDE_BENCHMARK': 'LOOP',
#     'JMH_LOOP_INSIDE_BENCHMARK_rel': 'LOOP %',
    'JMH_STATE_FINAL_FIELD': 'FINA',
#     'JMH_STATE_FINAL_FIELD_rel': 'FINA %',
    'JMH_NOTFORKED_BENCHMARK': 'FORK',
#     'JMH_NOTFORKED_BENCHMARK_rel': 'FORK %',
    'JMH_BENCHMARKMODE_SINGLESHOT': 'SING',
#     'JMH_BENCHMARKMODE_SINGLESHOT_rel': 'SING %'
}

latex = analyzed[columns_to_export]
latex = latex.rename(columns=bug_type_dict)
latex = latex.fillna('')
latex = latex.replace({0: ''})

print(latex.to_latex(bold_rows=True))

\begin{tabular}{lrllllll}
\toprule
{} &  Benchs & INVO & RETU & LOOP & FINA & FORK & SING \\
\textbf{name         } &         &      &      &      &      &      &      \\
\midrule
\textbf{ReactiveX    } &     122 &      &      &   13 &      &      &      \\
\textbf{netty        } &     143 &    3 &   88 &   30 &   57 &      &      \\
\textbf{openzipkin   } &      52 &      &      &      &   39 &      &      \\
\textbf{druid-io     } &     127 &    1 &   20 &  148 &  100 &      &      \\
\textbf{square       } &      11 &    1 &    5 &      &    4 &      &      \\
\textbf{grpc         } &      18 &      &    2 &    6 &    8 &      &      \\
\textbf{ben-manes    } &      37 &      &    1 &    9 &   27 &      &      \\
\textbf{h2oai        } &      14 &    8 &      &   12 &      &      &      \\
\textbf{goldmansachs } &     451 &      &   41 &  114 &  178 &      &      \\
\textbf{raphw        } &      39 &      &      &      &   12 &      &      \\
\textbf{linkedin     } &      25 &      

In [67]:
latex

Unnamed: 0_level_0,Benchs,INVO,RETU,LOOP,FINA,FORK,SING
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ReactiveX,122,,,13.0,,,
netty,143,3.0,88.0,30.0,57.0,,
openzipkin,52,,,,39.0,,
druid-io,127,1.0,20.0,148.0,100.0,,
square,11,1.0,5.0,,4.0,,
grpc,18,,2.0,6.0,8.0,,
ben-manes,37,,1.0,9.0,27.0,,
h2oai,14,8.0,,12.0,,,
goldmansachs,451,,41.0,114.0,178.0,,
raphw,39,,,,12.0,,
