## import modules

In [65]:
import pandas as pd
import datetime
import numpy as np
from dateutil import parser
import re
from git import *

### Most detailed commit history for a given repository (ActiveMQ)

### load commits data

In [66]:
path="/home/herimanitra/Téléchargements/data-master/"
commits = pd.read_csv(path+"activemq/git2.log", sep="\u0012", header=None, names=['raw'])                    
commits.head()

  


Unnamed: 0,raw
0,--2518bde--Fri May 5 15:43:09 2017 +0100--gtul...
1,10\t2\tactivemq-kahadb-store/src/main/java/org...
2,1\t1\tactivemq-kahadb-store/src/main/java/org/...
3,201\t0\tactivemq-kahadb-store/src/test/java/or...
4,--ae97fbd--Fri May 5 11:01:33 2017 +0100--gtul...


### Format data into a useful structure

In [67]:
commit_marker = commits[commits['raw'].str.startswith("--")]
commit_info = commit_marker['raw'].str.extract(r"^--(?P<sha>.*?)--(?P<date>.*?)--(?P<author>.*?)--(?P<message>.*?)$", 
                                               expand=True)   
commit_info['date'] = pd.to_datetime(commit_info['date'])
file_stats_marker = commits[~commits.index.isin(commit_info.index)]
#
file_stats = file_stats_marker['raw'].str.split("\t", expand=True)
file_stats = file_stats.rename(columns={ 0: "insertions", 1: "deletions", 2: "filename"})
file_stats['insertions'] = pd.to_numeric(file_stats['insertions'], errors='coerce')
file_stats['deletions'] = pd.to_numeric(file_stats['deletions'], errors='coerce')
#
commit_info.reindex(commits.index)
#addition des statistiques
commit_data = commit_info.reindex(commits.index).fillna(method="ffill")
commit_data = commit_data[~commit_data.index.isin(commit_info.index)]
commit_data=commit_data.drop_duplicates()
commit_data = commit_data.join(file_stats)
commit_data.tail()

Unnamed: 0,sha,date,author,message,insertions,deletions,filename
144827,ab8f458,2005-12-13 16:17:50,James Strachan,removed old configuration documentation,0.0,1.0,activemq-core/src/main/resources/META-INF/serv...
144829,8f40a7e,2005-12-13 16:17:31,James Strachan,removed old configuration documentation,0.0,7.0,activemq-core/src/main/java/org/activemq/broke...
144831,fba1488,2005-12-12 20:28:34,James Strachan,updated the build so that the recursive build ...,30.0,13.0,activecluster/.classpath
144836,8f1763f,2005-12-12 17:57:25,James Strachan,Moved the trunk code into the trunk sub directory,8.0,0.0,activemq-gbean/.cvsignore
144850,40a7d3b,2005-12-12 17:53:59,James Strachan,Moved the trunk code into the trunk sub directory,20.0,0.0,LICENSE.txt


### finding test files

In [68]:
def isTest(x):
    import re
    if len(re.findall(r'/src/test/', x))>0:
        presence=1
    else:
        presence=0
    return presence
commit_data['isTest']= commit_data['filename'].apply(lambda x:isTest(x) )


### Only keep java file/class:

In [69]:
commit_data=commit_data[commit_data['filename'].str.endswith('.java')]
commit_data = commit_data.reset_index(drop=True)
#onversion en date time
commit_data['dateT']=commit_data['date'].apply(lambda x: datetime.datetime.strftime(x, "%Y-%m-%d"))
commit_data.head()

Unnamed: 0,sha,date,author,message,insertions,deletions,filename,isTest,dateT
0,2518bde,2017-05-05 14:43:09,gtully,[AMQ-6670] - trap classcast and marshall error...,10.0,2.0,activemq-kahadb-store/src/main/java/org/apache...,0,2017-05-05
1,b30ae4e,2017-05-05 09:47:34,gtully,[AMQ-6665] - remove extranious import from test,0.0,1.0,activemq-unit-tests/src/test/java/org/apache/a...,1,2017-05-05
2,bc879d7,2017-05-04 20:37:53,Timothy Bish,https://issues.apache.org/jira/browse/AMQ-6669,5.0,0.0,activemq-amqp/src/main/java/org/apache/activem...,0,2017-05-04
3,2e2d5dd,2017-05-04 20:37:53,Timothy Bish,https://issues.apache.org/jira/browse/AMQ-6669,5.0,0.0,activemq-amqp/src/main/java/org/apache/activem...,0,2017-05-04
4,dcd9b9e,2017-05-04 11:58:25,gtully,use jdk8 allowed cypher in the restricted set,2.0,2.0,activemq-unit-tests/src/test/java/org/apache/a...,1,2017-05-04


## Extract class name that are tested

In [70]:
commit_data_test= commit_data[commit_data["isTest"]==1]
commit_data_test=commit_data_test.reset_index()
def extract_tested_class(x):
    x1=re.compile("Test.java$").split(x)[0]
    x2= re.compile("/").split(x1)
    return x2[len(x2)-1]
commit_data_test["class"]=commit_data_test["filename"].apply(lambda x: extract_tested_class(x))
commit_data_test.head()


Unnamed: 0,index,sha,date,author,message,insertions,deletions,filename,isTest,dateT,class
0,1,b30ae4e,2017-05-05 09:47:34,gtully,[AMQ-6665] - remove extranious import from test,0.0,1.0,activemq-unit-tests/src/test/java/org/apache/a...,1,2017-05-05,JaasDualAuthenticationNetworkBridge
1,4,dcd9b9e,2017-05-04 11:58:25,gtully,use jdk8 allowed cypher in the restricted set,2.0,2.0,activemq-unit-tests/src/test/java/org/apache/a...,1,2017-05-04,SslBrokerService
2,6,2b3f0e5,2017-05-04 09:23:13,gtully,add some logging to diagnose ci failure,19.0,14.0,activemq-mqtt/src/test/java/org/apache/activem...,1,2017-05-04,MQTT
3,7,18d05ba,2017-05-03 15:17:29,gtully,modify test to validate setting isDLQ flag via...,192.0,199.0,activemq-unit-tests/src/test/java/org/apache/a...,1,2017-05-03,AMQ6059
4,8,35f3010,2017-05-03 11:31:09,gtully,[AMQ-2100] fix for intermittent ci failure,4.0,1.0,activemq-unit-tests/src/test/java/org/apache/a...,1,2017-05-03,JMSConsumer


## Extract name class from src

In [71]:
commit_data= commit_data[commit_data["isTest"]==0]
commit_data=commit_data.reset_index()
def extract_class(x):
    x1=re.compile(".java$").split(x)[0]
    x2= re.compile("/").split(x1)
    return x2[len(x2)-1]
commit_data["class"]=commit_data["filename"].apply(lambda x: extract_class(x))
commit_data.head()

Unnamed: 0,index,sha,date,author,message,insertions,deletions,filename,isTest,dateT,class
0,0,2518bde,2017-05-05 14:43:09,gtully,[AMQ-6670] - trap classcast and marshall error...,10.0,2.0,activemq-kahadb-store/src/main/java/org/apache...,0,2017-05-05,KahaDBStore
1,2,bc879d7,2017-05-04 20:37:53,Timothy Bish,https://issues.apache.org/jira/browse/AMQ-6669,5.0,0.0,activemq-amqp/src/main/java/org/apache/activem...,0,2017-05-04,AmqpWSTransport
2,3,2e2d5dd,2017-05-04 20:37:53,Timothy Bish,https://issues.apache.org/jira/browse/AMQ-6669,5.0,0.0,activemq-amqp/src/main/java/org/apache/activem...,0,2017-05-04,AmqpWSTransport
3,5,2bd3379,2017-05-04 10:08:32,gtully,[AMQ-6068] make full cleanup of the rar manage...,4.0,0.0,activemq-client/src/main/java/org/apache/activ...,0,2017-05-04,ActiveMQConnection
4,10,a0ba0bf,2017-05-03 10:36:06,gtully,[AMQ-6667] gate cursor cache enablement on a s...,11.0,2.0,activemq-broker/src/main/java/org/apache/activ...,0,2017-05-03,Queue


## merge src java with their latest respective test file

In [72]:
commit_data_test.columns= "test_"+commit_data_test.columns

In [73]:
latest_test=commit_data_test.sort_values('test_dateT', ascending=False).groupby('test_class', as_index=False).first()
latest_test=latest_test.reset_index('test_dateT',drop=True)
latest_test =latest_test.reset_index()
latest_test.head()

Unnamed: 0,index,test_class,test_index,test_sha,test_date,test_author,test_message,test_insertions,test_deletions,test_filename,test_isTest,test_dateT
0,0,AMQ1282.java,2815,5219fa1,2013-02-05 20:43:15,Timothy A. Bish,Fix warnings from use of deprecated asserts etc.,13.0,9.0,activemq-unit-tests/src/test/java/org/apache/a...,1,2013-02-05
1,1,AMQ1687,3844,0e7d5a9,2011-08-11 21:17:24,Timothy A. Bish,https://issues.apache.org/jira/browse/AMQ-2411,3.0,2.0,activemq-core/src/test/java/org/apache/activem...,1,2011-08-11
2,2,AMQ1730,2806,600f209,2013-02-07 18:20:32,Timothy A. Bish,Don't use hardcoded ports.,12.0,14.0,activemq-spring/src/test/java/org/apache/bugs/...,1,2013-02-07
3,3,AMQ1853,3675,ddb15af,2011-11-18 14:23:33,Timothy A. Bish,Fix for NPE in test.,10.0,2.0,activemq-core/src/test/java/org/apache/activem...,1,2011-11-18
4,4,AMQ1866.java,5589,08e01e7,2008-07-25 16:13:20,Hiram R. Chirino,Updating testcase.. this new version fails ver...,59.0,51.0,activemq-core/src/test/java/org/apache/activem...,1,2008-07-25


In [74]:
commit_data= pd.merge(commit_data,latest_test,how="left",left_on="class",right_on="test_class")

### Days elapsed since last change per java class (file) : One component of the ranking formula I want to build

In [75]:
commit_data['days_sinceLastModif']= 0
for index,obs in enumerate(range(commit_data.shape[0])):
    javaclass= commit_data['filename'][index]
    lastModif=commit_data.loc[index,'date'] 
    beforelastModif= commit_data.loc[(commit_data['filename']==
                                     javaclass) & (commit_data['date']<lastModif),['date']].max()
    #daysdiff=  datetime.datetime.now()-lastModif
    daysdiff=lastModif -beforelastModif
    commit_data.loc[index,'days_sinceLastModif'] =np.int64(daysdiff.dt.days)

### Extract issue keys from commit message in commit_data

In [76]:
def extractKey(text):
    import re
    m = re.search('(?<=-)\w+', text)
    if m:
        found = m.group(0)
        return 'AMQ-'+found.strip()
    else:
        return ''
commit_data['Key']= commit_data['message'].apply(lambda x: extractKey(x))
commit_data.head()

Unnamed: 0,index_x,sha,date,author,message,insertions,deletions,filename,isTest,dateT,...,test_date,test_author,test_message,test_insertions,test_deletions,test_filename,test_isTest,test_dateT,days_sinceLastModif,Key
0,0,2518bde,2017-05-05 14:43:09,gtully,[AMQ-6670] - trap classcast and marshall error...,10.0,2.0,activemq-kahadb-store/src/main/java/org/apache...,0,2017-05-05,...,NaT,,,,,,,,153,AMQ-6670
1,2,bc879d7,2017-05-04 20:37:53,Timothy Bish,https://issues.apache.org/jira/browse/AMQ-6669,5.0,0.0,activemq-amqp/src/main/java/org/apache/activem...,0,2017-05-04,...,NaT,,,,,,,,-9223372036854775808,AMQ-6669
2,3,2e2d5dd,2017-05-04 20:37:53,Timothy Bish,https://issues.apache.org/jira/browse/AMQ-6669,5.0,0.0,activemq-amqp/src/main/java/org/apache/activem...,0,2017-05-04,...,NaT,,,,,,,,-9223372036854775808,AMQ-6669
3,5,2bd3379,2017-05-04 10:08:32,gtully,[AMQ-6068] make full cleanup of the rar manage...,4.0,0.0,activemq-client/src/main/java/org/apache/activ...,0,2017-05-04,...,NaT,,,,,,,,28,AMQ-6068
4,10,a0ba0bf,2017-05-03 10:36:06,gtully,[AMQ-6667] gate cursor cache enablement on a s...,11.0,2.0,activemq-broker/src/main/java/org/apache/activ...,0,2017-05-03,...,NaT,,,,,,,,98,AMQ-6667


## Now,Load metrics of activemq

### il faudrait joindre Release-Date dans activemq-metric-history

In [77]:
activemq_summary= pd.read_csv(path+"activemq-summary.txt")
activemq_summary['release'] = activemq_summary[' Release-Date'].apply(lambda x: parser.parse(x))
activemq_summary.rename(columns={' Version-ID':'version'},inplace=True)
activemq_summary['version'] = activemq_summary['version'].apply(lambda x: x.strip())
activemq_summary.head()

Unnamed: 0,RSN,version,Release-Date,#Classes,#Interfaces,#Methods,#Fields,#Size-In-Bytes,release
0,1,1.0M1,26-April-2004,205,1934,144,689,624569,2004-04-26
1,2,1.0.0,19-August-2004,368,3831,250,1385,1228417,2004-08-19
2,3,1.1.0,07-October-2004,453,4941,286,1792,1579034,2004-10-07
3,4,1.2.0,23-November-2004,467,5095,282,1806,1631249,2004-11-23
4,5,1.3.0,07-December-2004,470,5124,282,1818,1640507,2004-12-07


### OO metrics et traditionnal activeMQ per release version:

In [78]:
activemq= pd.read_csv(path+"activemq-metric-history.txt")
activemq['Class Name']= activemq['Class Name'].apply(lambda x: x + '.java' )
activemq.tail()

Unnamed: 0,Class Name,Metric Name,1.0M1,1.0.0,1.1.0,1.2.0,1.3.0,1.4.0,1.5.0,2.0.0,...,4.0.0,4.1.d,4.1.0,4.1.1,5.0.0,5.1.0,5.2.0,5.3.0,5.3.1,5.3.2
330115,org/codehaus/activemq/web/ConnectionManager.java,CCE,,0.0,0.0,,,,,,...,,,,,,,,,,
330116,org/codehaus/activemq/web/MessageServlet.java,CCE,,3.0,3.0,,,,,,...,,,,,,,,,,
330117,org/codehaus/activemq/web/MessageServletSuppor...,CCE,,0.0,0.0,,,,,,...,,,,,,,,,,
330118,org/codehaus/activemq/web/NoDestinationSupplie...,CCE,,0.0,0.0,,,,,,...,,,,,,,,,,
330119,org/codehaus/activemq/web/WebClient.java,CCE,,3.0,3.0,,,,,,...,,,,,,,,,,


### reshape variables (release version) to values

In [79]:
activemq= pd.melt(activemq, id_vars=['Class Name','Metric Name'], value_vars=list(activemq.columns[2:]))
activemq.rename(columns={'variable':'version','value': 'metric_value'},inplace=True)
activemq.tail()

Unnamed: 0,Class Name,Metric Name,version,metric_value
11224075,org/codehaus/activemq/web/ConnectionManager.java,CCE,5.3.2,
11224076,org/codehaus/activemq/web/MessageServlet.java,CCE,5.3.2,
11224077,org/codehaus/activemq/web/MessageServletSuppor...,CCE,5.3.2,
11224078,org/codehaus/activemq/web/NoDestinationSupplie...,CCE,5.3.2,
11224079,org/codehaus/activemq/web/WebClient.java,CCE,5.3.2,


### display some data without NaN

In [80]:
activemq[(activemq['version']=='1.0M1') & (activemq['metric_value'].isnull()==False) & (activemq['Metric Name']=='RFC') ].tail()

Unnamed: 0,Class Name,Metric Name,version,metric_value
161352,org/codehaus/activemq/transport/udp/UdpTranspo...,RFC,1.0M1,0.0
161353,org/codehaus/activemq/transport/vm/VmTransport...,RFC,1.0M1,0.0
161354,org/codehaus/activemq/transport/vm/VmTransport...,RFC,1.0M1,0.0
161355,org/codehaus/activemq/transport/vm/VmTransport...,RFC,1.0M1,0.0
161356,org/codehaus/activemq/transport/vm/VmTransport...,RFC,1.0M1,0.0



## merge release date : activemq_summary['release'] with activemq (metrics)



In [81]:
activemq=pd.merge(activemq,activemq_summary,how='left',on='version')
activemq=activemq[activemq['metric_value'].isnull()==False]
activemq.tail()

Unnamed: 0,Class Name,Metric Name,version,metric_value,RSN,Release-Date,#Classes,#Interfaces,#Methods,#Fields,#Size-In-Bytes,release
11223398,org/apache/activemq/wireformat/WireFormatFacto...,CCE,5.3.2,0.0,34,26-April-2010,1382,17354,545,5225,5901986,2010-04-26
11223400,org/apache/activemq/xbean/BrokerFactoryBean.java,CCE,5.3.2,5.0,34,26-April-2010,1382,17354,545,5225,5901986,2010-04-26
11223409,org/apache/activemq/xbean/PooledBrokerFactoryB...,CCE,5.3.2,0.0,34,26-April-2010,1382,17354,545,5225,5901986,2010-04-26
11223410,org/apache/activemq/xbean/XBeanBrokerFactory.java,CCE,5.3.2,0.0,34,26-April-2010,1382,17354,545,5225,5901986,2010-04-26
11223411,org/apache/activemq/xbean/XBeanBrokerService.java,CCE,5.3.2,4.0,34,26-April-2010,1382,17354,545,5225,5901986,2010-04-26


## Change metric modality values to column of metric

In [82]:
X=activemq.pivot( columns='Metric Name' ,values='metric_value')
X.columns

Index([u'AGE', u'AMC', u'BRS', u'CAC', u'CBC', u'CC', u'CCC', u'CCE', u'CLC',
       u'DIT', u'DMB', u'DMV', u'EC', u'EODC', u'EVD', u'EVS', u'FFC', u'FMC',
       u'GUD', u'IAC', u'IAS', u'ICC', u'ID', u'IDC', u'IE', u'IFC', u'IGC',
       u'II', u'IIC', u'IIN', u'IK', u'ILC', u'IM', u'IMC', u'IMN', u'INC',
       u'INF', u'INS', u'IOC', u'IODC', u'IP', u'IR', u'ISC', u'ITC', u'JUO',
       u'LAY', u'LFI', u'LIC', u'LMCE', u'LMCI', u'LRT', u'LVC', u'MCC',
       u'MCE', u'MCI', u'MFR', u'MMB', u'MMC', u'MSB', u'NAC', u'NBC', u'NCN',
       u'NOA', u'NOC', u'NOD', u'NOF', u'NOM', u'NOP', u'NVS', u'ODC', u'PFC',
       u'PMC', u'RFC', u'RLC', u'RMC', u'RSC', u'RSZ', u'SCC', u'SFC', u'SFI',
       u'SIC', u'SMC', u'TCC', u'THC', u'TIC', u'UFC', u'USC', u'YMC', u'ZFC',
       u'ZOC'],
      dtype='object', name=u'Metric Name')

In [83]:
activemq.columns

Index([u'Class Name', u'Metric Name', u'version', u'metric_value', u'RSN',
       u' Release-Date', u' #Classes', u' #Interfaces', u' #Methods',
       u' #Fields', u' #Size-In-Bytes', u'release'],
      dtype='object')

In [84]:
for var in activemq.columns:
    if var not in X.columns:
        X[var]= activemq[var]
X.tail()

Metric Name,AGE,AMC,BRS,CAC,CBC,CC,CCC,CCE,CLC,DIT,...,version,metric_value,RSN,Release-Date,#Classes,#Interfaces,#Methods,#Fields,#Size-In-Bytes,release
11223398,,,,,,,,0.0,,,...,5.3.2,0.0,34,26-April-2010,1382,17354,545,5225,5901986,2010-04-26
11223400,,,,,,,,5.0,,,...,5.3.2,5.0,34,26-April-2010,1382,17354,545,5225,5901986,2010-04-26
11223409,,,,,,,,0.0,,,...,5.3.2,0.0,34,26-April-2010,1382,17354,545,5225,5901986,2010-04-26
11223410,,,,,,,,0.0,,,...,5.3.2,0.0,34,26-April-2010,1382,17354,545,5225,5901986,2010-04-26
11223411,,,,,,,,4.0,,,...,5.3.2,4.0,34,26-April-2010,1382,17354,545,5225,5901986,2010-04-26


## Load issues data of activeMQ

In [85]:
severity1=pd.read_excel(path+"activeMQ_part1.xls")
severity1['Key']=severity1['Key'].apply(lambda x: str(x).strip())
severity2=pd.read_excel(path+"activeMQ_part2.xls")
severity2['Key']=severity2['Key'].apply(lambda x: str(x).strip())
severity3=pd.read_excel(path+"activeMQ_part3.xls")
severity3['Key']=severity3['Key'].apply(lambda x: str(x).strip())
severity4=pd.read_excel(path+"activeMQ_part4.xls")
severity4['Key']=severity4['Key'].apply(lambda x: str(x).strip())
severity5=pd.read_excel(path+"activeMQ_part5.xls")
severity5['Key']=severity5['Key'].apply(lambda x: str(x).strip())
severity=pd.concat([severity1,severity2,severity3,severity4,severity5])

In [86]:
cols=['Project','Key','Summary','Description','Issue Type','Status',
      'Resolution','Resolved','Creator','Created','Affects Version/s','Fix Version/s']
severity=severity.loc[severity['Project']=='ActiveMQ',cols]
severity.reset_index()

Unnamed: 0,index,Project,Key,Summary,Description,Issue Type,Status,Resolution,Resolved,Creator,Created,Affects Version/s,Fix Version/s
0,0,ActiveMQ,AMQ-6670,KahaDB - Inconsistent error handling on corrup...,"When the journal is corrupt, for example if it...",Bug,Resolved,Fixed,05/May/17 14:46,Gary Tully,05/May/17 14:28,5.14.0,5.15.0
1,12,ActiveMQ,AMQ-6669,AMQP: WS connections don't respect maxFrameSiz...,For AMQP WebSocket connections the maxFrameSiz...,Bug,Resolved,Fixed,04/May/17 20:55,Timothy Bish,04/May/17 20:36,5.14.5,"5.15.0, 5.14.6"
2,13,ActiveMQ,AMQ-6668,Broker not created by activemq.xml file on off...,"If the PC is not connected to internet, during...",Bug,Resolved,Duplicate,03/May/17 17:15,Giuseppe Gerla,03/May/17 16:23,5.14.1,
3,14,ActiveMQ,AMQ-6667,"Many instances of ""duplicate message ... from ...",In a high throughput scenario on a single dest...,Bug,Resolved,Fixed,03/May/17 10:43,Gary Tully,03/May/17 10:28,5.14.0,5.15.0
4,29,ActiveMQ,AMQ-6666,Failover Transport - send timeout not working,Running into a situation with the Failover Tra...,Bug,Open,Unresolved,,Martin Lichtin,30/Apr/17 20:01,5.14.0,
5,67,ActiveMQ,AMQ-6665,certificate-based authentication on network br...,client certificate authentication works in the...,Bug,Resolved,Fixed,27/Apr/17 11:10,Gary Tully,27/Apr/17 10:57,5.14.0,5.15.0
6,88,ActiveMQ,AMQ-6664,XMLs in ActiveMQ admin panel are not generated...,Start ActiveMQ 5.14.5 and go to http://127.0.0...,Bug,Open,Unresolved,,Jacek K.,26/Apr/17 13:47,5.14.5,
7,113,ActiveMQ,AMQ-6663,IO exceptions causing Broker Service shutdown;...,ActiveMQ version: 5.11.0.redhat-620143,Bug,Open,Unresolved,,Vidyashree,26/Apr/17 09:00,5.11.0,
8,218,ActiveMQ,AMQ-6662,NullPointerException in org.apache.activemq.br...,My company's application uses JMX with ActiveM...,Bug,Open,Unresolved,,Dan Groves,25/Apr/17 19:57,5.14.5,
9,270,ActiveMQ,AMQ-6661,Auto transport with MQTT will fail for a long ...,The issue is that there is a variable length h...,Bug,Resolved,Fixed,25/Apr/17 12:47,Christopher L. Shannon,25/Apr/17 12:41,,"5.15.0, 5.14.6"


## merge with bug severity database found on issue.apache.org using issue keys

In [87]:
mask1=(commit_data['Key'].isnull()==False)
mask2 =(commit_data['Key']!="")
df=pd.merge (commit_data[mask1 & mask2], 
            severity[severity['Key'].isnull()==False], 
            how="left",on="Key")
df.tail()

Unnamed: 0,index_x,sha,date,author,message,insertions,deletions,filename,isTest,dateT,...,Summary,Description,Issue Type,Status,Resolution,Resolved,Creator,Created,Affects Version/s,Fix Version/s
4113,7903,22474cd,2005-12-21 20:43:50,Hiram R. Chirino,Fixed: https://jira.logicblaze.com/jira/browse...,4.0,0.0,activemq-core/src/main/java/org/activemq/Activ...,0,2005-12-21,...,,,,,,,,,,
4114,7905,75a6b83,2005-12-21 09:31:18,Adrian T. Co,Added a basic mean to query the jmx context. P...,386.0,183.0,activemq-core/src/main/java/org/activemq/broke...,0,2005-12-21,...,,,,,,,,,,
4115,7912,dad9a3b,2005-12-20 10:20:22,Adrian T. Co,Re-removed old config documentation.,0.0,7.0,activemq-core/src/main/java/org/activemq/broke...,0,2005-12-20,...,,,,,,,,,,
4116,7919,8b1f5a7,2005-12-19 17:43:22,James Strachan,* added test case to demonstrate query-based s...,5.0,0.0,activemq-core/src/main/java/org/activemq/Activ...,0,2005-12-19,...,,,,,,,,,,
4117,7936,639265d,2005-12-15 08:06:44,Hiram R. Chirino,Re-enable the use of continuations.,2.0,2.0,activemq-web/src/java/org/activemq/web/Message...,0,2005-12-15,...,,,,,,,,,,


In [88]:
len(np.unique(severity['Key']))

4978

In [89]:
df.shape

(4118, 36)

## Finally, merge with OO and traditional metrics based on version and class name

### split filename to begin with org/... and match with Class Name of X

In [90]:
def split_org(x):
    if "org" not in x:
        return x
    else:
        return "org" + re.compile("/org").split(x)[1]
        
df['filename']=df['filename'].apply(lambda x: split_org(x))

In [91]:
mydata=pd.merge(df, X, how="left", left_on=["filename","Affects Version/s"],right_on=["Class Name","version"])

In [92]:
mydata.head()

Unnamed: 0,index_x,sha,date,author,message,insertions,deletions,filename,isTest,dateT,...,version,metric_value,RSN,Release-Date,#Classes,#Interfaces,#Methods,#Fields,#Size-In-Bytes,release
0,0,2518bde,2017-05-05 14:43:09,gtully,[AMQ-6670] - trap classcast and marshall error...,10.0,2.0,org/apache/activemq/store/kahadb/KahaDBStore.java,0,2017-05-05,...,,,,,,,,,,NaT
1,2,bc879d7,2017-05-04 20:37:53,Timothy Bish,https://issues.apache.org/jira/browse/AMQ-6669,5.0,0.0,org/apache/activemq/transport/amqp/AmqpWSTrans...,0,2017-05-04,...,,,,,,,,,,NaT
2,3,2e2d5dd,2017-05-04 20:37:53,Timothy Bish,https://issues.apache.org/jira/browse/AMQ-6669,5.0,0.0,org/apache/activemq/transport/amqp/AmqpWSTrans...,0,2017-05-04,...,,,,,,,,,,NaT
3,5,2bd3379,2017-05-04 10:08:32,gtully,[AMQ-6068] make full cleanup of the rar manage...,4.0,0.0,org/apache/activemq/ActiveMQConnection.java,0,2017-05-04,...,,,,,,,,,,NaT
4,10,a0ba0bf,2017-05-03 10:36:06,gtully,[AMQ-6667] gate cursor cache enablement on a s...,11.0,2.0,org/apache/activemq/broker/region/Queue.java,0,2017-05-03,...,,,,,,,,,,NaT
