In [15]:
import pandas as pd
import os
import numpy as np


# Projects Selected

In [27]:
# projects with git
project_git = pd.read_csv('../test-projects/historical_projects.csv')
project_git['project'] = project_git['project'].map(lambda x:x.lower())

# specify sha for each project
historical_rerun_flaky_tests = pd.read_csv('../test-projects/historical_rerun_flaky_tests.csv')
print('Flaky test shape:', historical_rerun_flaky_tests.shape)
# sha of project with the most test methods
projects_sha = historical_rerun_flaky_tests.groupby(by='project').apply(lambda x:x['sha'].value_counts().index[0])

# join git with sha
projects = pd.concat([projects_sha, project_git.set_index('project')], axis=1, join='inner').reset_index().rename(columns={0:'sha'})
print('Selected Project shape:',projects.shape)
projects.head()
# projects.to_csv('../test-projects/projects.csv', index=None)

Flaky test shape: (5075, 4)
Selected Project shape: (22, 3)


Unnamed: 0,project,sha,Project Address
0,achilles,3ef293744f913c1aa8cf59d2fdd78f46a5e2f79f,git@github.com:doanduyhai/Achilles.git
1,ambari,400a37ab0b6636a5476461e731f43c085a5b8cfc,git@github.com:apache/ambari.git
2,assertj-core,aa90c2d3a757b67f6bd64ca82f9b9f15b1e1161c,git@github.com:joel-costigliola/assertj-core.git
3,cloudera.oryx,54cc78b4ef8381fd13aaaebec71d10b81a465ecb,git@github.com:cloudera/oryx.git
4,commons-exec,59a147cb8d6ae25c19bd3ade20f8920e92c264ce,git@github.com:apache/commons-exec.git


In [108]:
# Remove flaky test detected in other sha
flaky_tests = historical_rerun_flaky_tests[historical_rerun_flaky_tests['sha'].isin(projects_sha)]
print('Flaky tests shape:', flaky_tests.shape)
flaky_tests.head()
# flaky_tests.to_csv('../test-projects/flaky_tests.csv', index=None)

Flaky tests shape: (903, 4)


# Flakify Results

In [13]:
flakify_results_IDoFT = pd.read_csv('../Flakify/results/Flakify_per_project_results_on_IDoFT_dataset.csv', header=0)
flakify_results_IDoFT['project_name'] = flakify_results_IDoFT['project_name'].map(lambda x:x.lower())
flakify_results_flakeflagger = pd.read_csv('../Flakify/results/Flakify_per_project_results_on_FlakeFlagger_dataset.csv', header=0)
flakify_results_flakeflagger['project_name'] = flakify_results_flakeflagger['Project Name'].map(lambda x:x.lower())
flakify_results_flakeflagger = flakify_results_flakeflagger.drop(['Project Name'], axis=1)
flakify_results = pd.concat([flakify_results_IDoFT, flakify_results_flakeflagger])
flakify_results.drop_duplicates(subset=['project_name'], inplace=True)
print(flakify_results.shape)

(310, 5)


In [14]:
# Projects that flakify has already run
results_for_test_projects = pd.concat([test_projects.set_index('project_name'), flakify_results.set_index('project_name')], axis=1, join='inner').reset_index()
print(results_for_test_projects.shape)
results_for_test_projects

(17, 7)


Unnamed: 0,project_name,Project Address,SHA,Accuracy,F1,Precision,Recall
0,achilles,git@github.com:doanduyhai/Achilles.git,e3099bdce342910951c4862c78751fd81ed4552e,100.0,100.0,100.0,100.0
1,ambari,git@github.com:apache/ambari.git,,94.0,84.0,75.0,95.0
2,assertj-core,git@github.com:joel-costigliola/assertj-core.git,,99.0,40.0,25.0,100.0
3,commons-exec,git@github.com:apache/commons-exec.git,,90.0,40.0,25.0,100.0
4,dropwizard,git@github.com:dropwizard/dropwizard.git,07dfaed697427e208d65049f80a5d1949833b7cd,100.0,100.0,100.0,100.0
5,hadoop,git@github.com:apache/hadoop.git,cc2babc1f75c93bf89a8f10da525f944c15d02ea,97.315436,98.630137,98.630137,98.630137
6,hbase,git@github.com:apache/hbase.git,801fc05e9c082ab12c7c1207087d3667ab162d1a,82.692308,90.322581,97.674419,84.0
7,httpcore,git@github.com:apache/httpcore.git,,98.0,81.0,74.0,90.0
8,jackrabbit-oak,git@github.com:apache/jackrabbit-oak.git,11985b3fbbd4f7f39dfaf368b01b5e0c67a32f0d,96.0,97.959184,100.0,96.0
9,jimfs,git@github.com:google/jimfs.git,ced6093fe69a31e37ba8bbf63858b50c7164f4a4,100.0,100.0,100.0,100.0


In [15]:
# Projects that flakify needs to run
additional_projects = test_projects.loc[~test_projects['project_name'].isin(results_for_test_projects['project_name'])]
print(additional_projects)

                     Project Address  SHA project_name
15  git@github.com:togglz/togglz.git  NaN       togglz


# Flakify Test case extract

In [2]:
flakeflagger_dataset = pd.read_csv("../Flakify/dataset/FlakeFlagger/Flakify_FlakeFlagger_dataset.csv")
print(flakeflagger_dataset.shape)
flakeflagger_dataset.head()

(21890, 9)


Unnamed: 0,project,class_name,test_name,flaky,full_code,preprocessed_code,NoOfTokens for orignal code,tokens of reduced code,final_code
0,achilles,ArgumentExtractorTest,should_init_entity_packages,0.0,@Test public void should_init_entity_packages(...,@Test public void should_init_entity_packages(...,116.0,43.0,@Test public void should_init_entity_packages(...
1,achilles,ArgumentExtractorTest,should_init_empty_entity_packages,0.0,@Test public void should_init_empty_entity_pac...,@Test public void should_init_empty_entity_pac...,58.0,29.0,@Test public void should_init_empty_entity_pac...
2,achilles,ArgumentExtractorTest,should_init_entities_list,0.0,@Test public void should_init_entities_list(){...,@Test public void should_init_entities_list(){...,82.0,30.0,@Test public void should_init_entities_list(){...
3,achilles,ArgumentExtractorTest,should_init_empty_entities_list,0.0,@Test public void should_init_empty_entities_l...,@Test public void should_init_empty_entities_l...,56.0,27.0,@Test public void should_init_empty_entities_l...
4,achilles,ArgumentExtractorTest,should_init_from_packages_and_entities_list,0.0,@Test public void should_init_from_packages_an...,@Test public void should_init_from_packages_an...,123.0,42.0,@Test public void should_init_from_packages_an...


In [7]:
IDoFT_dataset = pd.read_csv("../Flakify/dataset/IDoFT/Flakify_IDoFT_dataset.csv")
print(IDoFT_dataset.shape)
IDoFT_dataset.head()

(3862, 11)


Unnamed: 0,project,SHA,class_name,test_name,flaky,full_code,smells_found,preprocessed_code,NoOfTokens for orignal code,NoOfTokens for reduced code,final_code
0,dubbo,737f7a7ea67832d7f17517326fb2491d0a086dd7,DubboMonitorTest,testMonitorFactory,1,@Test public void testMonitorFactory() throws ...,"[Conditional Test, Fire and Forget, Assertion ...",@Test public void testMonitorFactory() throws ...,690,88,@Test public void testMonitorFactory() throws ...
1,Mapper,1764748eedb2f320a0d1c43cb4f928c4ccb1f2f5,FieldHelperTest,testComplex,1,@Test public void testComplex(){\n List<Entit...,"[Assertion Roulette, Assertion Roulette, Asser...",@Test public void testComplex(){\nAssert.asser...,90,65,@Test public void testComplex(){\n List<Entit...
2,Mapper,3c0b3307011fad53f811e08d05147d94fc6c0d67,TestBasicAble,testInsert,1,/** \n * \n */\n@Test public void testInsert()...,"[Assertion Roulette, Assertion Roulette, Asser...",/** \n * \n */\n@Test public void testInsert()...,269,81,/** \n * \n */\n@Test public void testInsert()...
3,Mapper,3c0b3307011fad53f811e08d05147d94fc6c0d67,TestDeleteByPrimaryKey,testDynamicDelete,1,/** \n * \n */\n@Test public void testDynamicD...,"[Assertion Roulette, Assertion Roulette, Asser...",/** \n * \n */\n@Test public void testDynamicD...,189,95,/** \n * \n */\n@Test public void testDynamicD...
4,Mapper,1764748eedb2f320a0d1c43cb4f928c4ccb1f2f5,SqlHelperTest,testLogicDeleteSql,1,@Test public void testLogicDeleteSql(){\n Str...,"[Assertion Roulette, Assertion Roulette, Asser...",@Test public void testLogicDeleteSql(){\nAsser...,312,183,@Test public void testLogicDeleteSql(){\n Str...


In [11]:
IDoFT_dataset.groupby(by='project').nunique()

Unnamed: 0_level_0,SHA,class_name,test_name,flaky,full_code,smells_found,preprocessed_code,NoOfTokens for orignal code,NoOfTokens for reduced code,final_code
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Achilles,1,9,28,1,29,7,29,25,18,29
Activiti,1,12,24,1,25,14,24,24,23,25
Algorithms,1,1,1,1,1,1,1,1,1,1
AuthMeReloaded,1,1,1,1,1,1,1,1,1,1
Chronicle-Wire,2,28,49,1,57,17,55,53,49,57
...,...,...,...,...,...,...,...,...,...,...
wildfly-maven-plugin,2,3,4,1,5,3,5,5,4,5
workflow-cps-global-lib-http-plugin,1,1,1,1,1,1,1,1,1,1
wro4j,1,18,23,1,25,6,24,24,19,25
yawp,1,1,1,1,1,1,1,1,1,1


True

In [46]:
# project selected
projects = pd.read_csv('../test-projects/projects.csv')
project_names = set(projects['project'].apply(lambda x:x.lower()))
project_names.add('wro')
project_names.add('springframework')
project_names.add('assertj')
project_names.add('exec')
project_names.add('springframework')
project_names.add('hc')
project_names.add('jackrabbit')
basedir = '../flaky_datesets/deflaker/'
flaky_dir = '/samples_flaky/test_cases/'
nonflaky_dir = '/samples_nonflaky/test_cases/'
data = []
for f in os.listdir(basedir+flaky_dir):
    names = f.split('.')
    for name in reversed(names):
        if name.lower() in project_names:
            project_name = name
            break
    class_name, test_name = names[-2], names[-1]
    file = open( basedir+flaky_dir+f, mode='r')
    test_case = file.read()
    file.close()
    data.append([project_name, class_name, test_name, test_case, 1])

for f in os.listdir(basedir+nonflaky_dir):
    names = f.split('.')
    for name in reversed(names):
        if name.lower() in project_names:
            project_name = name
            break
    class_name, test_name = names[-2], names[-1]
    file = open(basedir+nonflaky_dir+f, mode='r')
    test_case = file.read()
    file.close()
    data.append([project_name, class_name, test_name, test_case, 0])

dataset = pd.DataFrame(np.array(data), columns=['project','class','test_name','final_code', 'flaky'])
dataset.head()


Unnamed: 0,project,class,test_name,final_code,flaky
0,oozie,TestCoordSubmitXCommand,testSubmitReservedVars,/**\n * Don't include controls in XML.\n *\n *...,1
1,oozie,TestAuthFilterAuthOozieClient,testClientWithAnonymous,public void testClientWithAnonymous() throws E...,1
2,oozie,TestBulkMonitorJPAExecutor,testJavaNoRecords,public void testJavaNoRecords() throws Excepti...,1
3,oozie,TestLiteWorkflowStoreService,testRetry,public void testRetry() throws Exception {\n ...,1
4,oozie,TestPurgeXCommand,testPurgeWFWithSubWF3,/**\n * Test : The subworkflow should get purg...,1


In [57]:
def mapProjectNames(name):
  mapper = {
    'wro': 'wro4j',
    'springframework': 'spring-boot',
    'assertj': 'assertj-core',
    'jackrabbit': 'jackrabbit-oak',
    'exec': 'commons-exec',
    'hc': 'httpcore'
  }
  return mapper[name] if name in mapper else name

dataset['project'] = dataset['project'].apply(mapProjectNames)
dataset.to_csv('../test-projects/flakify_data.csv')

In [56]:
dataset.groupby(by='project').nunique()

Unnamed: 0_level_0,class,test_name,final_code,flaky
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
achilles,15,67,75,2
ambari,11,12,13,2
assertj-core,2670,5934,10868,1
commons-exec,41,124,124,2
dropwizard,482,1505,1670,2
hadoop,1023,3886,4160,2
hbase,195,480,528,2
hector,9,20,20,1
httpcore,327,1171,1382,1
jackrabbit-oak,1965,9761,11370,2


In [19]:
fire = open('../flaky_datesets/deflaker/samples_flaky/test_cases/ch.qos.logback.core.net.AbstractSocketAppenderTest.addsErrorMessageWhenAppendingIsInterruptedWhileWaitingForTheQueueToAcceptTheEvent', mode='r')
a = fire.read()
fire.close()

In [20]:
a

'@Test\npublic void addsErrorMessageWhenAppendingIsInterruptedWhileWaitingForTheQueueToAcceptTheEvent() throws Exception {\n    // given\n    final InterruptedException interruptedException = new InterruptedException();\n    doThrow(interruptedException).when(deque).offer(eq("some event"), anyLong(), any(TimeUnit.class));\n    appender.start();\n    // when\n    appender.append("some event");\n    // then\n    verify(appender).addError("Interrupted while appending event to SocketAppender", interruptedException);\n}\n'

In [40]:


for i, r in project_flaky_tests.iterrows():
  test_class = r['Test_class']
  test_method = r['Test_method']
  print()


.ZookeeperStateRepositoryTest
.SystemPropertyActivationStrategyTest
.SystemPropertyActivationStrategyTest
.ManagedFeatureManagerTest
.CDIBasicOperationTest
.GuiceIntegrationTest
.JSFMapTest
.ServletBasicOperationTest
.CacheRepositoryTest
.JDBCRepositoryTest
.ThreadBasedUsersTest
.HttpServletRequestHolderTest
.ShiroUsersTest
.SpringBasicOperationTest
.SpringEarlyFeatureUsageTest
.ManagedFeatureManagerTest
.FeatureProxyTest
.SystemPropertyActivationStrategyTest
.CassandraStateRepositoryTest
.CassandraStateRepositoryTest
.CassandraStateRepositoryTest
.CassandraStateRepositoryTest
.CassandraStateRepositoryTest
.SystemPropertyActivationStrategyTest
.SystemPropertyActivationStrategyTest
.SystemPropertyActivationStrategyTest
.SystemPropertyActivationStrategyTest
.ZookeeperStateRepositoryTest
