In [51]:
import pandas as pd

testcases = pd.read_csv('../data/mozilla_firefox_v2/firefoxDataset/docs_english/TC/testcases.csv')
print('Test Cases Shape: {}'.format(testcases.shape))

bugreports_p1 = pd.read_csv('../data/mozilla_firefox_v2/firefoxDataset/docs_english/BR/all_bugs_p1.csv', sep="|")
bugreports_p2 = pd.read_csv('../data/mozilla_firefox_v2/firefoxDataset/docs_english/BR/all_bugs_p2.csv', sep='|')
bugreports_p3 = pd.read_csv('../data/mozilla_firefox_v2/firefoxDataset/docs_english/BR/all_bugs_p3.csv', sep='|')

bugreports = pd.concat([bugreports_p1, bugreports_p2, bugreports_p3])
print('Bug Reports shape: {}'.format(bugreports.shape))

Test Cases Shape: (207, 8)
Bug Reports shape: (35955, 10)


#### Removing Duplicate Bugs

In [52]:
print('BR previous shape: {}'.format(bugreports.shape))
bugreports.drop_duplicates('Bug_Number', keep=False, inplace=True)
print('BR shape: {}'.format(bugreports.shape))

BR previous shape: (35955, 10)
BR shape: (35314, 10)


#### Bug Reports Names and Descriptions

In [53]:
bugreports['br_name'] = bugreports.apply(lambda row : 'BR_' + str(row[0]) + '_SRC', axis=1)
bugreports['br_desc'] = bugreports.apply(lambda row : ' '.join([str(el) for el in row]), axis=1) 
bugreports.head()

Unnamed: 0,Bug_Number,Summary,Platform,Component,Version,Creation_Time,Whiteboard,QA_Whiteboard,First_Comment_Text,First_Comment_Creation_Time,br_name,br_desc
0,506297,Livemarks with null site/feed uris cause sync ...,All,Sync,unspecified,2009-07-24T17:08:43Z,,,2009-07-24 09:54:28 FaultTolerance D...,2009-07-24T17:08:43Z,BR_506297_SRC,506297 Livemarks with null site/feed uris caus...
1,506338,Enhance Crash Recovery to better help the user,All,Session Restore,Trunk,2009-07-24T19:17:21Z,[crashkill][crashkill-metrics],,When our users crash they are pretty much in t...,2009-07-24T19:17:21Z,BR_506338_SRC,506338 Enhance Crash Recovery to better help t...
2,506507,Dragging multiple bookmarks in the bookmarks s...,x86,Bookmarks & History,Trunk,2009-07-26T06:16:02Z,,,User-Agent: Mozilla/5.0 (Windows; U; Win...,2009-07-26T06:16:02Z,BR_506507_SRC,506507 Dragging multiple bookmarks in the book...
3,506550,Unreliable Back Button navigating nytimes.com,x86,Extension Compatibility,3.5 Branch,2009-07-26T16:12:10Z,[caused by adblock plus][platform-rel-NYTimes],,User-Agent: Mozilla/5.0 (Windows; U; Win...,2009-07-26T16:12:10Z,BR_506550_SRC,506550 Unreliable Back Button navigating nytim...
4,506575,ALT + F4 when dropdown of autocomplete is open...,x86,Address Bar,3.5 Branch,2009-07-26T20:14:54Z,,,Pressing ALT + F4 when the autocomplete dropdo...,2009-07-26T20:14:54Z,BR_506575_SRC,506575 ALT + F4 when dropdown of autocomplete ...


#### Test Cases Names and Descriptions

In [54]:
testcases['tc_name'] = testcases.apply(lambda row : 'TC_' + str(row[0]) + '_TRG', axis=1)
testcases['tc_desc'] = testcases.apply(lambda row : ' '.join([str(el) for el in row]), axis=1)
testcases.head()

Unnamed: 0,TC_Number,TestDay,Gen_Title,Crt_Nr,Title,Preconditions,Steps,Expected_Result,tc_name,tc_desc
0,1,20181221,<notificationbox> \nand\n <notification>\n cha...,1,Notification - Popup Block,,1. Launch Firefox\n2. Navigate to http://www.p...,1. Firefox is successfully launched\n9. The al...,TC_1_TRG,1 20181221 <notificationbox> \nand\n <notifica...
1,2,20181221,<notificationbox> \nand\n <notification>\n cha...,2,Notification - Process Hang,,"1. Launch Firefox\n2. In the URL bar, navigate...",1. Firefox is successfully launched\n2. Firefo...,TC_2_TRG,2 20181221 <notificationbox> \nand\n <notifica...
2,3,20181221,<notificationbox> \nand\n <notification>\n cha...,3,Verify Notifications appear in RTL Mode,,"1. Launch Firefox\n2. In about:config, change ...",1. Firefox is successfully launched\n2.The for...,TC_3_TRG,3 20181221 <notificationbox> \nand\n <notifica...
3,4,20181221,<notificationbox> \nand\n <notification>\n cha...,4,Verify Notifications appear in High Contrast M...,,"1. While the browser is in High Contrast Mode,...",1. Firefox has been launched.\n2. Firefox begi...,TC_4_TRG,4 20181221 <notificationbox> \nand\n <notifica...
4,5,20181221,<notificationbox> \nand\n <notification>\n cha...,5,Verify notifications react to differing Zoom l...,,"1. While the browser is in High Contrast Mode,...",1. Firefox has been launched.\n2. Firefox begi...,TC_5_TRG,5 20181221 <notificationbox> \nand\n <notifica...


In [55]:
print('Expected instances amount: {}'.format(len(bugreports) * len(testcases)))

print('Num BRs 48 Branch: {}'.format(len(bugreports[bugreports.Version == '48 Branch'])))
print('Num BRs 49 Branch: {}'.format(len(bugreports[bugreports.Version == '49 Branch'])))
print('Num BRs 50 Branch: {}'.format(len(bugreports[bugreports.Version == '50 Branch'])))
print('Num BRs 51 Branch: {}'.format(len(bugreports[bugreports.Version == '51 Branch'])))

print('Num TCs: {}'.format(len(testcases)))

Expected instances amount: 7309998
Num BRs 48 Branch: 412
Num BRs 49 Branch: 353
Num BRs 50 Branch: 518
Num BRs 51 Branch: 461
Num TCs: 207


#### Create Oracle

In [110]:
#%%timeit

list_fversion_to_testday = [('48 Branch','20160603'),('48 Branch','20160624'),('48 Branch','20160708'),
                            ('49 Branch','20160722'),('49 Branch','20160812'),('49 Branch','20160826'),
                            ('50 Branch','20160909'),('50 Branch','20160930'),('50 Branch','20161014'),
                            ('51 Branch','20161028'),('51 Branch','20161125'),('51 Branch','20170106')]

links_df = pd.DataFrame(columns=['src_artf','version','trg_artf','testday','link'])

#br_aux = bugreports[(bugreports.Version == '50 Branch') |  (bugreports.Version == '48 Branch') | (bugreports.Version == '60 Branch')].sample(10, random_state=42)
#tc_aux = testcases.sample(50, random_state=42)

def check_link_condition(list_of_tuples):
    for tup in list_of_tuples:
        if tup in list_fversion_to_testday:
            return True
    return False

counter = 0
for idx_1,br in bugreports.iterrows():
    for idx_2,tc in testcases.iterrows():
        if check_link_condition([(br['Version'],tday) for tday in tc['TestDay'].split(' + ')]):
            for tday in tc['TestDay'].split(' + '):
                links_df.loc[counter] = [br['br_name'], br['Version'], tc['tc_name'], tday, 1]
                counter = counter + 1
        else:
            for tday in tc['TestDay'].split(' + '):
                links_df.loc[counter] = [br['br_name'], br['Version'], tc['tc_name'], tday, 0]
                counter = counter + 1      

Unnamed: 0,src_artf,version,trg_artf,testday,link
0,BR_1301918_SRC,48 Branch,TC_163_TRG,20161028,0
1,BR_1301918_SRC,48 Branch,TC_16_TRG,20160603,1
2,BR_1301918_SRC,48 Branch,TC_16_TRG,20160624,1
3,BR_1301918_SRC,48 Branch,TC_16_TRG,20161014,1
4,BR_1301918_SRC,48 Branch,TC_75_TRG,20160722,0
5,BR_1301918_SRC,48 Branch,TC_98_TRG,20160722,0
6,BR_1301918_SRC,48 Branch,TC_168_TRG,20161028,0
7,BR_1301918_SRC,48 Branch,TC_10_TRG,20181221,0
8,BR_1301918_SRC,48 Branch,TC_102_TRG,20160722,0
9,BR_1301918_SRC,48 Branch,TC_137_TRG,20160930,0


In [None]:
links_df.shape

In [111]:
br_aux[['br_name','Version']]

Unnamed: 0,br_name,Version
13796,BR_1301918_SRC,48 Branch
13348,BR_1296919_SRC,48 Branch
8404,BR_1468467_SRC,60 Branch
13485,BR_1298185_SRC,48 Branch
12225,BR_1280820_SRC,50 Branch
532,BR_1491664_SRC,60 Branch
5983,BR_1444687_SRC,60 Branch
5505,BR_1440823_SRC,60 Branch
4583,BR_1433172_SRC,60 Branch
13244,BR_1295558_SRC,48 Branch


In [112]:
tc_aux[['tc_name','TestDay']]

Unnamed: 0,tc_name,TestDay
161,TC_163_TRG,20161028
15,TC_16_TRG,20160603 + 20160624 + 20161014
73,TC_75_TRG,20160722
96,TC_98_TRG,20160722
166,TC_168_TRG,20161028
9,TC_10_TRG,20181221
100,TC_102_TRG,20160722
135,TC_137_TRG,20160930
18,TC_19_TRG,20160603 + 20160624 + 20161014
148,TC_150_TRG,20161028 + 20161125 + 20170106


TestCase that belongs to 3 different TestDays and BugReport that belongs to mapped version:

In [115]:
links_df[(links_df.trg_artf == 'TC_16_TRG') & (links_df.src_artf == 'BR_1280820_SRC')]

Unnamed: 0,src_artf,version,trg_artf,testday,link
281,BR_1280820_SRC,50 Branch,TC_16_TRG,20160603,1
282,BR_1280820_SRC,50 Branch,TC_16_TRG,20160624,1
283,BR_1280820_SRC,50 Branch,TC_16_TRG,20161014,1


TestCase that belongs to 1 TestDay and BugReport that belongs to mapped version:

In [118]:
links_df[(links_df.trg_artf == 'TC_77_TRG')]

Unnamed: 0,src_artf,version,trg_artf,testday,link
27,BR_1301918_SRC,48 Branch,TC_77_TRG,20160722,0
97,BR_1296919_SRC,48 Branch,TC_77_TRG,20160722,0
167,BR_1468467_SRC,60 Branch,TC_77_TRG,20160722,0
237,BR_1298185_SRC,48 Branch,TC_77_TRG,20160722,0
307,BR_1280820_SRC,50 Branch,TC_77_TRG,20160722,0
377,BR_1491664_SRC,60 Branch,TC_77_TRG,20160722,0
447,BR_1444687_SRC,60 Branch,TC_77_TRG,20160722,0
517,BR_1440823_SRC,60 Branch,TC_77_TRG,20160722,0
587,BR_1433172_SRC,60 Branch,TC_77_TRG,20160722,0
657,BR_1295558_SRC,48 Branch,TC_77_TRG,20160722,0


In [117]:
links_df[(links_df.trg_artf == 'TC_16_TRG')]

Unnamed: 0,src_artf,version,trg_artf,testday,link
1,BR_1301918_SRC,48 Branch,TC_16_TRG,20160603,1
2,BR_1301918_SRC,48 Branch,TC_16_TRG,20160624,1
3,BR_1301918_SRC,48 Branch,TC_16_TRG,20161014,1
71,BR_1296919_SRC,48 Branch,TC_16_TRG,20160603,1
72,BR_1296919_SRC,48 Branch,TC_16_TRG,20160624,1
73,BR_1296919_SRC,48 Branch,TC_16_TRG,20161014,1
141,BR_1468467_SRC,60 Branch,TC_16_TRG,20160603,0
142,BR_1468467_SRC,60 Branch,TC_16_TRG,20160624,0
143,BR_1468467_SRC,60 Branch,TC_16_TRG,20161014,0
211,BR_1298185_SRC,48 Branch,TC_16_TRG,20160603,1
