## Read Files

In [18]:
import pandas as pd
import os

# Files to use for the analysis

# detected_bugs_file = os.path.join('detected_bugs_3.csv')
dir = os.path.join("results")

projects_ranking_file = os.path.join('jmh-projects-bigquery-fh-201702 - jmh-projects-gh.csv')

top = 25

projects = pd.read_csv(projects_ranking_file, sep=',')

# Normalize columns
projects['xml'] = projects['project'].apply(lambda x: x.split('/')[1] + '.xml')
projects['name'] = projects['project'].apply(lambda x: x.split('/')[1])


## Remove Projects that could not be analyzed

In [19]:
# Remove projects that could not be analyzed
projects_to_remove = ['jgrapht/jgrapht', 'lemire/RoaringBitmap', 'RoaringBitmap/RoaringBitmap', 
                      'requery/requery', 'Netflix/feign', 'crate/crate', 'real-logic/simple-binary-encoding', 
                      'jbachorik/btrace', 'promeG/TinyPinyin', 'pcollections/pcollections', 'ktoso/sbt-jmh', 
                      'junkdog/artemis-odb', 'fakemongo/fongo', 'graalvm/truffle', 'graalvm/graal-core']

projects = projects[~projects['project'].isin(projects_to_remove)] # jgrapht

top_projects = projects.sort_values(by=['watchers', 'stars', 'forks'], ascending=False)[:top]

In [20]:
top_projects

Unnamed: 0,project,forked,watchers,stars,forks,subscribers,xml,name
0,ReactiveX/RxJava,False,23558,23558,4143,1754,RxJava.xml,RxJava
2,netty/netty,False,9746,9746,4775,1243,netty.xml,netty
3,openzipkin/zipkin,False,5627,5627,851,507,zipkin.xml,zipkin
4,druid-io/druid,False,4743,4743,1132,457,druid.xml,druid
5,square/okio,False,3703,3703,601,220,okio.xml,okio
6,grpc/grpc-java,False,2631,2631,821,380,grpc-java.xml,grpc-java
7,ben-manes/caffeine,False,2414,2414,192,176,caffeine.xml,caffeine
8,h2oai/h2o-3,False,1943,1943,836,283,h2o-3.xml,h2o-3
10,goldmansachs/gs-collections,False,1652,1652,247,204,gs-collections.xml,gs-collections
12,raphw/byte-buddy,False,1495,1495,156,97,byte-buddy.xml,byte-buddy


## Analyze the bugs based on the Type reported

In [21]:
from collections import Counter
import xml.etree.ElementTree as ET

def analyzeBugTypes(df):
    
    tree = ET.parse(os.path.join(dir, df['xml']))
    root = tree.getroot()
    
    all_bugs = root.findall('BugInstance')
    
    bugs_per_type = [] 
    for b in all_bugs:
        bug_type = b.get('type')
        clazz = b.find('Class')
        if clazz and 'generated' not in clazz.get('classname'):
            bugs_per_type.append(bug_type)

    # Count bugs per type
    bugs = Counter(bugs_per_type)
    for key, value in bugs.items():
        df[key] = value
    
    return df

analyzed = top_projects.apply(analyzeBugTypes, axis=1)
analyzed = analyzed.fillna(0)
analyzed = analyzed.set_index('name')

## Generate Latex

In [24]:
columns_to_export = [
   'stars',
   'forks',
    'subscribers',
    'JMH_BENCHMARK_METHOD_FOUND',
    'JMH_FIXTURE_USING_INVOCATION_SCOPE',
#     'JMH_FIXTURE_USING_INVOCATION_SCOPE_rel',
    'JMH_IGNORED_METHOD_RETURN',
#     'JMH_IGNORED_METHOD_RETURN_rel',
    'JMH_LOOP_INSIDE_BENCHMARK',
#     'JMH_LOOP_INSIDE_BENCHMARK_rel',
    'JMH_STATE_FINAL_FIELD',
#     'JMH_STATE_FINAL_FIELD_rel',
    'JMH_NOTFORKED_BENCHMARK',
#     'JMH_NOTFORKED_BENCHMARK_rel',
    'JMH_BENCHMARKMODE_SINGLESHOT',
#     'JMH_BENCHMARKMODE_SINGLESHOT_rel'
]


bug_type_dict = {
    'stars': 'Stars',
    'forks': 'Forks',
    'subscribers': 'Subs',
    'JMH_BENCHMARK_METHOD_FOUND': 'Benchs',
    'JMH_FIXTURE_USING_INVOCATION_SCOPE': 'INVO',
#     'JMH_FIXTURE_USING_INVOCATION_SCOPE_rel': 'INVO %',
    'JMH_IGNORED_METHOD_RETURN': 'RETU',
#     'JMH_IGNORED_METHOD_RETURN_rel': 'RETU %',
    'JMH_LOOP_INSIDE_BENCHMARK': 'LOOP',
#     'JMH_LOOP_INSIDE_BENCHMARK_rel': 'LOOP %',
    'JMH_STATE_FINAL_FIELD': 'FINA',
#     'JMH_STATE_FINAL_FIELD_rel': 'FINA %',
    'JMH_NOTFORKED_BENCHMARK': 'FORK',
#     'JMH_NOTFORKED_BENCHMARK_rel': 'FORK %',
    'JMH_BENCHMARKMODE_SINGLESHOT': 'SING',
#     'JMH_BENCHMARKMODE_SINGLESHOT_rel': 'SING %'
}

latex = analyzed[columns_to_export]
latex = latex.rename(columns=bug_type_dict)
latex = latex.fillna('')
latex = latex.replace({0: ''})

print(latex.to_latex())

\begin{tabular}{lrrrrllllll}
\toprule
{} &  Stars &  Forks &  Subs &  Benchs & INVO & RETU & LOOP & FINA & FORK & SING \\
name                &        &        &       &         &      &      &      &      &      &      \\
\midrule
RxJava              &  23558 &   4143 &  1754 &     122 &      &      &   13 &      &      &      \\
netty               &   9746 &   4775 &  1243 &     143 &    3 &   88 &   30 &   57 &      &      \\
zipkin              &   5627 &    851 &   507 &      52 &      &      &      &   39 &      &      \\
druid               &   4743 &   1132 &   457 &     127 &    1 &   20 &  148 &  100 &      &      \\
okio                &   3703 &    601 &   220 &      11 &    1 &    5 &      &    4 &      &      \\
grpc-java           &   2631 &    821 &   380 &      18 &      &    2 &    6 &    8 &      &      \\
caffeine            &   2414 &    192 &   176 &      37 &      &    1 &    9 &   27 &      &      \\
h2o-3               &   1943 &    836 &   283 &      14 &    

In [23]:
latex

Unnamed: 0_level_0,Stars,Forks,Subs,Benchs,INVO,RETU,LOOP,FINA,FORK,SING
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
RxJava,23558,4143,1754,122,,,13.0,,,
netty,9746,4775,1243,143,3.0,88.0,30.0,57.0,,
zipkin,5627,851,507,52,,,,39.0,,
druid,4743,1132,457,127,1.0,20.0,148.0,100.0,,
okio,3703,601,220,11,1.0,5.0,,4.0,,
grpc-java,2631,821,380,18,,2.0,6.0,8.0,,
caffeine,2414,192,176,37,,1.0,9.0,27.0,,
h2o-3,1943,836,283,14,8.0,,12.0,,,
gs-collections,1652,247,204,451,,41.0,114.0,178.0,,
byte-buddy,1495,156,97,39,,,,12.0,,
