In [1]:
import pandas as pd
from rq6.utils import RESOURCES

In [2]:
compcheck_data_path = RESOURCES / "evaluated" / "compcheck_corrected.csv"
df = pd.read_csv(compcheck_data_path)

In [3]:
df

Unnamed: 0,Client,Library,Old Version,New Version,Error Type,MaRCo evaluation
0,HikariCP,org.slf4j:slf4j-api,1.7.25,2.0.0-alpha1,Error Only,statically_incompatible
1,HikariCP,org.apache.logging.log4j:log4j-core,2.11.1,2.12.1,Fail + Error,statically_incompatible
2,quickfixj,org.slf4j:slf4j-jdk14,1.7.25,2.0.0-alpha1,Fail Only,statically_incompatible
3,openscoring,org.glassfish.jersey.media:jersey-media-multipart,2.27,2.29.1,Error Only,no_github_link
4,openscoring,org.glassfish.jersey.core:jersey-client,2.27,2.29.1,Error Only,statically_incompatible
...,...,...,...,...,...,...
658,wasabi,com.datastax.cassandra:cassandra-driver-mapping,3.0.3,3.8.0,Error Only,statically_incompatible
659,wasabi,com.fasterxml.jackson.core:jackson-databind,2.7.2,2.10.1,Error Only,statically_incompatible
660,exhibitor,org.apache.curator:curator-test,2.7.1,4.2.0,Fail Only,statically_incompatible
661,distributed-redis-tool,org.springframework:spring-beans,4.2.1.RELEASE,5.2.2.RELEASE,Error Only,statically_incompatible


In [5]:
unique_gas = df.groupby(['Library']).size()
unique_gas

Library
cglib:cglib                           1
cglib:cglib-nodep                     1
ch.qos.logback:logback-access         1
ch.qos.logback:logback-classic        5
ch.qos.logback:logback-core           3
                                     ..
tech.units:indriya                    1
ua.net.nlp:morfologik-ukrainian-lt    1
us.codecraft:webmagic-core            2
xerces:xercesImpl                     1
xml-apis:xml-apis                     1
Length: 430, dtype: int64

In [6]:
unique_gavs = df.groupby(['Library', 'Old Version']).size()
unique_gavs

Library                             Old Version
cglib:cglib                         3.2.8          1
cglib:cglib-nodep                   2.2.2          1
ch.qos.logback:logback-access       1.2.3          1
ch.qos.logback:logback-classic      1.0.13         1
                                    1.1.11         1
                                                  ..
ua.net.nlp:morfologik-ukrainian-lt  4.6.2          1
us.codecraft:webmagic-core          0.6.1          1
                                    0.7.2          1
xerces:xercesImpl                   2.11.0         1
xml-apis:xml-apis                   1.4.01         1
Length: 656, dtype: int64

In [22]:
print(f"Number of unique GAs: {len(unique_gas)}")
print(f"Number of unique GAVs: {len(df)}")

Number of unique GAs: 157
Number of unique GAVs: 480


In [7]:
evaluation_counts = df['MaRCo evaluation'].value_counts()
print(f"distribution of evaluations: ")
print(evaluation_counts)

distribution of evaluations: 
MaRCo evaluation
statically_incompatible    511
no_github_link             101
no_jar                      29
no_maven                    11
no_compile                   7
no_github_tag                3
no_test                      1
Name: count, dtype: int64


In [27]:
not_empty_count = df['compatible_versions (ours)'].notna().sum()
print("Number of times 'compatible_versions (ours)' column is not empty:", not_empty_count)

Number of times 'compatible_versions (ours)' column is not empty: 5


In [37]:
filtered_df = df[df['compatible_versions (ours)'].notna()]
filtered_df

Unnamed: 0,GA,version,compatible_versions,compatible_versions (ours),err
49,com.indoqa:indoqa-boot,0.12.0,"['0.16.0', '0.15.0', '0.14.0', '0.13.0', '0.12...","['0.11.0', '0.10.0', '0.13.0', '0.14.0', '0.15...",
65,eu.unicore.security:securityLibrary,5.3.1,"['5.3.2', '5.3.1']","['5.3.0', '5.3.2', '5.3.3', '5.3.4', '5.3.6', ...",
156,org.dhatim:dropwizard-sentry,2.0.25-2,"['2.0.26-1', '2.0.25-2']","['2.0.25-1', '2.0.25', '2.0.26-1', '2.0.26-2',...",
166,org.robotframework:jrobotremoteserver,4.0.1,"['4.1.0', '4.0.1']","['4.0.0', '4.1.0', '4.0.1']",
178,org.spdx:spdx-tools,2.2.5,"['2.2.6', '2.2.5']",['2.2.5'],


In [68]:
# Find the intersection of sets
intersection = []
only_in_versions = []
only_in_ours = []

for index, row in filtered_df.iterrows():
    ranger_result = pd.eval(row['compatible_versions'])
    our_result = pd.eval(row['compatible_versions (ours)'])
    intersection = [x for x in ranger_result if x in our_result]
    only_ranger = [x for x in ranger_result if x not in our_result]
    only_our = [x for x in our_result if x not in ranger_result]
    print(f"GAV: {row['GA']}:{row['version']}")
    print(f"  ranger: {ranger_result}")
    print(f"  our: {our_result}")
    print(f"  intersection: {intersection}")
    print(f"  only_ranger: {only_ranger}")
    print(f"  only_our: {only_our}")
    print(f"    Our result covered {(len(intersection) / len(ranger_result) * 100)}% of ranger's result")
    print(f"    Ranger included {len(only_ranger)} versions not included in our range.")
    print(f"    Our tool included {len(only_our)} versions not included in Ranger's range.")
    if len(ranger_result) > len(our_result):
        print(f"    Ranger has the largest range, with {len(ranger_result) - len(our_result)} more versions")
    elif len(ranger_result) < len(our_result):
        print(f"    Our tool has the largest range, with {len(our_result) - len(ranger_result)} more versions")
        
    print()
    

GAV: com.indoqa:indoqa-boot:0.12.0
  ranger: ['0.16.0', '0.15.0', '0.14.0', '0.13.0', '0.12.0']
  our: ['0.11.0', '0.10.0', '0.13.0', '0.14.0', '0.15.0', '0.16.0', '0.12.0']
  intersection: ['0.16.0', '0.15.0', '0.14.0', '0.13.0', '0.12.0']
  only_ranger: []
  only_our: ['0.11.0', '0.10.0']
    Our result covered 100.0% of ranger's result
    Ranger included 0 versions not included in our range.
    Our tool included 2 versions not included in Ranger's range.
    Our tool has the largest range, with 2 more versions

GAV: eu.unicore.security:securityLibrary:5.3.1
  ranger: ['5.3.2', '5.3.1']
  our: ['5.3.0', '5.3.2', '5.3.3', '5.3.4', '5.3.6', '5.3.1']
  intersection: ['5.3.2', '5.3.1']
  only_ranger: []
  only_our: ['5.3.0', '5.3.3', '5.3.4', '5.3.6']
    Our result covered 100.0% of ranger's result
    Ranger included 0 versions not included in our range.
    Our tool included 4 versions not included in Ranger's range.
    Our tool has the largest range, with 4 more versions

GAV: org