In [30]:
import pandas as pd
import numpy as np
from sklearn.externals.joblib import Parallel, delayed
from tqdm import tqdm

testcases = pd.read_csv('../data/mozilla_firefox_v2/firefoxDataset/docs_english/TC/testcases.csv')
print('Test Cases Shape: {}'.format(testcases.shape))

bugreports_p1 = pd.read_csv('../data/mozilla_firefox_v2/firefoxDataset/docs_english/BR/all_bugs_p1.csv', sep="|")
bugreports_p2 = pd.read_csv('../data/mozilla_firefox_v2/firefoxDataset/docs_english/BR/all_bugs_p2.csv', sep='|')
bugreports_p3 = pd.read_csv('../data/mozilla_firefox_v2/firefoxDataset/docs_english/BR/all_bugs_p3.csv', sep='|')

bugreports = pd.concat([bugreports_p1, bugreports_p2, bugreports_p3])
print('Bug Reports shape: {}'.format(bugreports.shape))

Test Cases Shape: (207, 8)
Bug Reports shape: (35955, 10)


#### Removing Duplicate Bugs

In [31]:
print('BR previous shape: {}'.format(bugreports.shape))
bugreports.drop_duplicates('Bug_Number', keep=False, inplace=True)
print('BR shape: {}'.format(bugreports.shape))

BR previous shape: (35955, 10)
BR shape: (35314, 10)


#### Bug Reports Names and Descriptions

In [32]:
bugreports['br_name'] = bugreports.apply(lambda row : 'BR_' + str(row[0]) + '_SRC', axis=1)
bugreports['br_desc'] = bugreports.apply(lambda row : ' '.join([str(el) for el in row]), axis=1) 
bugreports.head()

Unnamed: 0,Bug_Number,Summary,Platform,Component,Version,Creation_Time,Whiteboard,QA_Whiteboard,First_Comment_Text,First_Comment_Creation_Time,br_name,br_desc
0,506297,Livemarks with null site/feed uris cause sync ...,All,Sync,unspecified,2009-07-24T17:08:43Z,,,2009-07-24 09:54:28 FaultTolerance D...,2009-07-24T17:08:43Z,BR_506297_SRC,506297 Livemarks with null site/feed uris caus...
1,506338,Enhance Crash Recovery to better help the user,All,Session Restore,Trunk,2009-07-24T19:17:21Z,[crashkill][crashkill-metrics],,When our users crash they are pretty much in t...,2009-07-24T19:17:21Z,BR_506338_SRC,506338 Enhance Crash Recovery to better help t...
2,506507,Dragging multiple bookmarks in the bookmarks s...,x86,Bookmarks & History,Trunk,2009-07-26T06:16:02Z,,,User-Agent: Mozilla/5.0 (Windows; U; Win...,2009-07-26T06:16:02Z,BR_506507_SRC,506507 Dragging multiple bookmarks in the book...
3,506550,Unreliable Back Button navigating nytimes.com,x86,Extension Compatibility,3.5 Branch,2009-07-26T16:12:10Z,[caused by adblock plus][platform-rel-NYTimes],,User-Agent: Mozilla/5.0 (Windows; U; Win...,2009-07-26T16:12:10Z,BR_506550_SRC,506550 Unreliable Back Button navigating nytim...
4,506575,ALT + F4 when dropdown of autocomplete is open...,x86,Address Bar,3.5 Branch,2009-07-26T20:14:54Z,,,Pressing ALT + F4 when the autocomplete dropdo...,2009-07-26T20:14:54Z,BR_506575_SRC,506575 ALT + F4 when dropdown of autocomplete ...


#### Test Cases Names and Descriptions

In [33]:
testcases['tc_name'] = testcases.apply(lambda row : 'TC_' + str(row[0]) + '_TRG', axis=1)
testcases['tc_desc'] = testcases.apply(lambda row : ' '.join([str(el) for el in row]), axis=1)
testcases.head()

Unnamed: 0,TC_Number,TestDay,Gen_Title,Crt_Nr,Title,Preconditions,Steps,Expected_Result,tc_name,tc_desc
0,1,20181221,<notificationbox> \nand\n <notification>\n cha...,1,Notification - Popup Block,,1. Launch Firefox\n2. Navigate to http://www.p...,1. Firefox is successfully launched\n9. The al...,TC_1_TRG,1 20181221 <notificationbox> \nand\n <notifica...
1,2,20181221,<notificationbox> \nand\n <notification>\n cha...,2,Notification - Process Hang,,"1. Launch Firefox\n2. In the URL bar, navigate...",1. Firefox is successfully launched\n2. Firefo...,TC_2_TRG,2 20181221 <notificationbox> \nand\n <notifica...
2,3,20181221,<notificationbox> \nand\n <notification>\n cha...,3,Verify Notifications appear in RTL Mode,,"1. Launch Firefox\n2. In about:config, change ...",1. Firefox is successfully launched\n2.The for...,TC_3_TRG,3 20181221 <notificationbox> \nand\n <notifica...
3,4,20181221,<notificationbox> \nand\n <notification>\n cha...,4,Verify Notifications appear in High Contrast M...,,"1. While the browser is in High Contrast Mode,...",1. Firefox has been launched.\n2. Firefox begi...,TC_4_TRG,4 20181221 <notificationbox> \nand\n <notifica...
4,5,20181221,<notificationbox> \nand\n <notification>\n cha...,5,Verify notifications react to differing Zoom l...,,"1. While the browser is in High Contrast Mode,...",1. Firefox has been launched.\n2. Firefox begi...,TC_5_TRG,5 20181221 <notificationbox> \nand\n <notifica...


In [34]:
print('Expected instances amount: {}'.format(len(bugreports) * len(testcases)))

print('Num BRs 48 Branch: {}'.format(len(bugreports[bugreports.Version == '48 Branch'])))
print('Num BRs 49 Branch: {}'.format(len(bugreports[bugreports.Version == '49 Branch'])))
print('Num BRs 50 Branch: {}'.format(len(bugreports[bugreports.Version == '50 Branch'])))
print('Num BRs 51 Branch: {}'.format(len(bugreports[bugreports.Version == '51 Branch'])))

print('Num TCs: {}'.format(len(testcases)))

Expected instances amount: 7309998
Num BRs 48 Branch: 412
Num BRs 49 Branch: 353
Num BRs 50 Branch: 518
Num BRs 51 Branch: 461
Num TCs: 207


#### Create Oracle

In [35]:
ex_df = pd.DataFrame(index=testcases.tc_name, columns=bugreports.br_name, data=0, dtype='int8')
print(ex_df.shape)
print(ex_df.info())
ex_df.head()

(207, 35314)
<class 'pandas.core.frame.DataFrame'>
Index: 207 entries, TC_1_TRG to TC_208_TRG
Columns: 35314 entries, BR_506297_SRC to BR_1516895_SRC
dtypes: int8(35314)
memory usage: 7.0+ MB
None


br_name,BR_506297_SRC,BR_506338_SRC,BR_506507_SRC,BR_506550_SRC,BR_506575_SRC,BR_506729_SRC,BR_506768_SRC,BR_506795_SRC,BR_506820_SRC,BR_506831_SRC,...,BR_1516270_SRC,BR_1516329_SRC,BR_1516358_SRC,BR_1516416_SRC,BR_1516505_SRC,BR_1516547_SRC,BR_1516582_SRC,BR_1516749_SRC,BR_1516792_SRC,BR_1516895_SRC
tc_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
TC_1_TRG,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
TC_2_TRG,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
TC_3_TRG,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
TC_4_TRG,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
TC_5_TRG,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [36]:
list_fversion_to_testday = [('48 Branch','20160603'),('48 Branch','20160624'),('48 Branch','20160708'),
                            ('49 Branch','20160722'),('49 Branch','20160812'),('49 Branch','20160826'),
                            ('50 Branch','20160909'),('50 Branch','20160930'),('50 Branch','20161014'),
                            ('51 Branch','20161028'),('51 Branch','20161125'),('51 Branch','20170106')]

#br_aux = bugreports[(bugreports.Version == '50 Branch') |  (bugreports.Version == '48 Branch') | (bugreports.Version == '60 Branch')].sample(10, random_state=42)
#tc_aux = testcases.sample(50, random_state=42)

def check_link_condition(br, tc):
    for tup in [(br['Version'],tday) for tday in tc['TestDay'].split(' + ')]:
        if tup in list_fversion_to_testday:
            return True
    return False


def create_links(idx, tc_df, br_df):
    oracle_df = pd.DataFrame(columns=br_df.br_name, index=tc_df.tc_name, data=np.zeros(shape=(len(tc_df),len(br_df))), dtype='int8')
    for idx_1,br in tqdm(br_df.iterrows()):
        for idx_2,tc in tc_df.iterrows():
            if check_link_condition(br, tc):
                oracle_df.at[tc.tc_name, br.br_name] = 1
            else:
                oracle_df.at[tc.tc_name, br.br_name] = 0
    
    oracle_df.to_csv('../data/mozilla_firefox_v2/firefoxDataset/oracle/output/part/trace_matrix_{}.csv'.format(idx))

def create_br_dfs_list():
    list_br_dfs = []
    for i in range(0, 36000, 5045):
        list_br_dfs.append(bugreports.iloc[i:i+5045,:])
    return list_br_dfs

def create_tc_dfs_list():
    return [testcases.copy() for i in range(6)]

#create_links(testcases, bugreports)

tasks = [(idx,tc_df, br_df) for idx,(tc_df,br_df) in enumerate(zip(create_tc_dfs_list(),create_br_dfs_list()))]
#tasks = [(tc_df, br_df) for tc_df,br_df in zip([tc_aux],[br_aux])]
results = Parallel(n_jobs=7, verbose=1)(delayed(create_links)(idx,tc_df,br_df) for idx,tc_df,br_df in tasks)


[Parallel(n_jobs=7)]: Using backend LokyBackend with 7 concurrent workers.
[Parallel(n_jobs=7)]: Done   6 out of   6 | elapsed:  3.0min finished


In [37]:
oo_df = pd.DataFrame(index=testcases.tc_name, columns=bugreports.br_name, data=np.zeros(shape=(len(testcases),len(bugreports)), dtype='int8'))
for i in range(6):
    oo_df.append(pd.read_csv('../data/mozilla_firefox_v2/firefoxDataset/oracle/output/part/trace_matrix_{}.csv'.format(i)))

print(oo_df.shape)
print(oo_df.info())

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


(207, 35314)
<class 'pandas.core.frame.DataFrame'>
Index: 207 entries, TC_1_TRG to TC_208_TRG
Columns: 35314 entries, BR_506297_SRC to BR_1516895_SRC
dtypes: int8(35314)
memory usage: 7.0+ MB
None


#### Save DataFrames

In [38]:
oo_df.to_csv('../data/mozilla_firefox_v2/firefoxDataset/oracle/output/trace_matrix_final.csv')
bugreports.to_csv('../data/mozilla_firefox_v2/firefoxDataset/docs_english/BR/bugreports_final.csv', index=False)
testcases.to_csv('../data/mozilla_firefox_v2/firefoxDataset/docs_english/TC/testcases_final.csv', index=False)

### -----

### Checking Values

In [41]:
ck_df = pd.DataFrame(columns=['testday','f_version','features_released','testcases_list'])
ck_df.testday = ['20160603', '20160624', '20160708', 
                 '20160722', '20160812', '20160826', 
                 '20160909', '20160930', '20161014', 
                 '20161028', '20161125', '20170106']
ck_df.f_version = ['Branch 48', 'Branch 48', 'Branch 48', 
                  'Branch 49', 'Branch 49', 'Branch 49', 
                  'Branch 50', 'Branch 50', 'Branch 50', 
                  'Branch 51', 'Branch 51', 'Branch 51' ]
ck_df.features_released = [
    "Awesome Bar Search, Awesome Bar Icons - Left, Awesome Bar Icons - Right",
    "Awesome Bar Search, Awesome Bar Icons - Left, Awesome Bar Icons - Right",
    "apz, Scrolling using different devices (wired mouse, wireless mouse, trackpad/touchpad) - where available devices",
    'context menu - exploratory testing, context menu - full functional testing, pdf viewer, browser customization',
    'windows 10 compatibility, text to speech in reader mode, text to speech on desktop',
    'webgl compatibility, exploratory testing',
    '',
    'Pointer Lock API, WebM EME support for Widevine',
    'New Awesome Bar',
    'Zoom indicator, Downloads dropmaker',
    'WebGL2,  FLAC support,  Indicator for device permissions,  Zoom Indicator',
    'WebGL2, Zoom Indicator, Flash support']

ck_df.head(20)


Unnamed: 0,testday,f_version,features_released,testcases_list
0,20160603,Branch 48,"Awesome Bar Search, Awesome Bar Icons - Left, ...",
1,20160624,Branch 48,"Awesome Bar Search, Awesome Bar Icons - Left, ...",
2,20160708,Branch 48,"apz, Scrolling using different devices (wired ...",
3,20160722,Branch 49,"context menu - exploratory testing, context me...",
4,20160812,Branch 49,"windows 10 compatibility, text to speech in re...",
5,20160826,Branch 49,"webgl compatibility, exploratory testing",
6,20160909,Branch 50,,
7,20160930,Branch 50,"Pointer Lock API, WebM EME support for Widevine",
8,20161014,Branch 50,New Awesome Bar,
9,20161028,Branch 51,"Zoom indicator, Downloads dropmaker",
