# Load Libraries

In [1]:
import numpy
import pandas

import os
os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = "C:\Program Files\Git\cmd\git.exe"

import git
from git import RemoteProgress
from git import Repo

from datetime import datetime

import statistics
from statistics import mode

# Initialise & Clone Repos

### Jenkins

In [2]:
# Initialise and clone Jenkins repo
jenkins_remote = "https://github.com/jenkinsci/jenkins"
jenkins_local = "repo/jenkins"
# Repo.clone_from(jenkins_remote, jenkins_local) # Uncomment to clone repo
jenkins_repo = Repo(jenkins_local)
jenkins_commit = "8830d68f5fe21f344be3496984bc4470bfcd0564"
jenkins_file_1 = "core\\src\\main\\java\\hudson\\Plugin.java"
jenkins_file_2 = "test\\src\\test\\java\\hudson\\PluginTest.java"

### Jython

In [3]:
# Initialise and clone Jython repo
jython_remote = "https://github.com/jythontools/jython"
jython_local = "repo/jython"
# Repo.clone_from(jython_remote, jython_local) # Uncomment to clone repo
jython_repo = Repo(jython_local)
jython_commit = "4c337213bd2964bb36cef2d31509b49647ca6f2a"
jython_file_1 = "Lib\\test\\test_java_integration.py"
jython_file_2 = "Lib\\test\\test_new.py"
jython_file_3 = "src\\org\\python\\core\\PyBytecode.java"
jython_file_4 = "src\\org\\python\\core\\PyFunction.java"

### Camel

In [4]:
# Initialise and clone Camel repo
camel_remote = "https://github.com/apache/camel"
camel_local = "repo/camel"
# Repo.clone_from(camel_remote, camel_local) # Uncomment to clone repo
camel_repo = Repo(camel_local)
camel_commit = "2e8f21dec883b083ddcdddd802847b4c378a61a2"
camel_file_1 = "camel-core\\src\\main\\java\\org\\apache\\camel\\processor\\validation\\SchemaReader.java"

## Finding VCC

### Function To Find VCC

In [5]:
def find_vcc(repo, fixing_commit, files):
    commits = []
    for file in files:
        
        check = False
        minus = []
        plus = False

        # Get difference between fixing commit and parent commit to find edited lines.
        data = repo.git.diff("--full-index", (fixing_commit+"^") + ".." + fixing_commit, "--", file).splitlines()
        # Loop through each line of the result of git diff.
        for line in data:
            # If line defines the line number ranges.
            if (len(line) > 0 and line[0] == "@"):
                # If this is the second line range reached, find all potential VCCs for the previous changes.
                if (check):
                    # If there were any lines removed.
                    if (len(minus) > 0):
                        # For each removed line, find the commit that last edited said line and append it to a commits array.
                        for x in minus:
                            minus_blame = repo.git.blame("-L" + str(x) + ",+1", (fixing_commit + "^"), "--", file).splitlines()
                            for x_line in minus_blame:
                                commits.append(x_line.split(" ")[0])
                    # If there were any lines added.
                    if (plus):
                        # Find all the commits that last edited the range of lines around the added lines and append it to a commits array.
                        plus_blame = repo.git.blame("-L" + child_lines, (fixing_commit + "^"), "--", file).splitlines()
                        for y_line in plus_blame:
                            commits.append(y_line.split(" ")[0])
                    minus = []
                    plus = False
                # Extract the parent and child (fixing commit) line numbers.
                parent = line.split(" ")[1][1:].split(",")
                child = line.split(" ")[2][1:].split(",")
                parent_line = int(parent[0])
                child_lines = child[0] + ",+" + child[1]
                check = True
            # If lines ranges always found, then start checking shown lines.
            elif (check):
                # If line begins with a - then it is a removed line, therefore record line number for blame later.
                if (len(line) > 0 and line[0] == "-"):
                    minus.append(parent_line)
                    parent_line = parent_line + 1
                # If line begins with a + then it is an added line, therefore record that a plus number was found for blame later.
                elif (len(line) > 0 and line[0] == "+"):
                    plus = True
                # If regular line, increment line parent_line.
                else:
                    parent_line = parent_line + 1
        # If there is no more lines to read, do final blame and commit checking.
        # If there were any lines removed.
        if (len(minus) > 0):
            # For each removed line, find the commit that last edited said line and append it to a commits array.
            for x in minus:
                minus_blame = repo.git.blame("-L" + str(x) + ",+1", (fixing_commit + "^"), "--", file).splitlines()
                for x_line in minus_blame:
                    commits.append(x_line.split(" ")[0])
        # If there were any lines added.
        if (plus):
            # For each removed line, find the commit that last edited said line and append it to a commits array.
            plus_blame = repo.git.blame("-L" + child_lines, (fixing_commit + "^"), "--", file).splitlines()
            for y_line in plus_blame:
                commits.append(y_line.split(" ")[0])
    
    # Find most reoccurring commit using statistics mode which is then considered the VCC.
    return mode(commits)

### Jenkins

In [6]:
jenkins_vcc = find_vcc(jenkins_repo, jenkins_commit, [jenkins_file_1, jenkins_file_2])
print("VCC for Jenkins: " + jenkins_vcc)

VCC for Jenkins: eb03a42078f


### Jython

In [7]:
jython_vcc = find_vcc(jython_repo, jython_commit, [jython_file_1, jython_file_2, jython_file_3, jython_file_4])
print("VCC for Jython: " + jython_vcc)

VCC for Jython: f78ec9fea8


### Camel

In [8]:
camel_vcc = find_vcc(camel_repo, camel_commit, [camel_file_1])
print("VCC for Camel: " + camel_vcc)

VCC for Camel: 8afc5d175779


## Commit Details

### Jenkins

__Title, Message, Files Affected, Directories Affected, Lines Deleted & Lines Added - With Comments & Blank Lines:__

In [9]:
jenkins_data = jenkins_repo.git.show("--pretty=medium", "--stat", "--dirstat", "-1", jenkins_vcc).splitlines()
for line in jenkins_data:
    print(line)

commit eb03a42078f29dbed3742b8740c95e02890e4545
Author: Jesse Glick <jglick@cloudbees.com>
Date:   Tue Jan 30 12:16:42 2018 -0500

    [SECURITY-705] Path traversal vulnerability in Plugin.doDynamic.

 core/src/main/java/hudson/Plugin.java     | 23 ++++++++++-------------
 test/src/test/java/hudson/PluginTest.java | 15 ++++++++++-----
 2 files changed, 20 insertions(+), 18 deletions(-)
  50.3% core/src/main/java/hudson/
  49.6% test/src/test/java/hudson/


__Title, Message, Files Affected, Directories Affected, Lines Deleted & Lines Added - Without Comments & Blank Lines:__

In [10]:
jenkins_data = jenkins_repo.git.log("--pretty=medium", "--stat", "--dirstat", "--ignore-blank-lines", "-1", jenkins_vcc).splitlines()
for line in jenkins_data:
    print(line)

commit eb03a42078f29dbed3742b8740c95e02890e4545
Author: Jesse Glick <jglick@cloudbees.com>
Date:   Tue Jan 30 12:16:42 2018 -0500

    [SECURITY-705] Path traversal vulnerability in Plugin.doDynamic.

 core/src/main/java/hudson/Plugin.java     | 23 ++++++++++-------------
 test/src/test/java/hudson/PluginTest.java | 15 ++++++++++-----
 2 files changed, 20 insertions(+), 18 deletions(-)
  50.3% core/src/main/java/hudson/
  49.6% test/src/test/java/hudson/


__Time Since Last Modification & Total Number Of Modifications Since Creation:__

In [11]:
file_1 = "core/src/main/java/hudson/Plugin.java"
file_2 = "test/src/test/java/hudson/PluginTest.java"

date_1 = ""
date_2 = ""

total = 0
count = 0
print(file_1 + ":\n")
jenkins_data = jenkins_repo.git.log("--follow", "-n 2", "--format=%ad", jenkins_vcc, file_1).splitlines()
for line in jenkins_data:
    print(line)
    date_2 = date_1
    date_1 = datetime.strptime(line,"%c %z")
    if date_2 != "":
        print("Days Difference: " + str((date_2 - date_1).total_seconds() / 86400))
    
print("")
jenkins_data = jenkins_repo.git.log("--follow", "--oneline", jenkins_vcc, file_1).splitlines()
for line in jenkins_data:
    count += 1
    print(str(count) + ": " + line)

date_1 = ""
date_2 = ""
total += count
count = 0
print("\n")
print(file_2 + ":\n")
jenkins_data = jenkins_repo.git.log("--follow", "-n 2", "--format=%ad", jenkins_vcc, file_2).splitlines()
for line in jenkins_data:
    print(line)
    date_2 = date_1
    date_1 = datetime.strptime(line,"%c %z")
    if date_2 != "":
        print("Days Difference: " + str((date_2 - date_1).total_seconds() / 86400))
    
print("")
jenkins_data = jenkins_repo.git.log("--follow", "--oneline", jenkins_vcc, file_2).splitlines()
for line in jenkins_data:
    count += 1
    print(str(count) + ": " + line)
    
total += count
print("")
print("Total Number of Modifications: " + str(total))
print("Average Number of Modifications: " + str(total / 2))

core/src/main/java/hudson/Plugin.java:

Tue Jan 30 12:16:42 2018 -0500
Sun Feb 12 05:38:30 2017 -0500
Days Difference: 352.2765277777778

1: eb03a42078 [SECURITY-705] Path traversal vulnerability in Plugin.doDynamic.
2: 496703d0fe Spelling fixes in changelogs, Javadoc, logs and UI (#2718)
3: 3a83504e3d Deprecating subclassing of Plugin.
4: 1709ed3a2e Add @Deprecated annotations to things documented as @deprecated.
5: c80741615b Revert "This is failing the animal sniffer test"
6: 3e15684f75 This is failing the animal sniffer test
7: 680c54887b Updating javadoc based on PR feedback
8: 259f5e1780 Updating javadoc of Plugin, LabelAtomProperty, GlobalConfiguration with my observations from working on my plugin
9: f0a29b562e [FIXED SECURITY-155] Do not allow plugin code to be downloaded via doDynamic, only static resources.
10: 71a28615e3 [FIXED SECURITY-131] Recode restOfPath before constructing URLs from it, so it cannot be used for directory traversal.
11: fe8f140f04 Recognize /static/...

__Developers & Their Number Of Commits:__

In [12]:
developers = 0

print(file_1 + ":\n")
jenkins_authors = jenkins_repo.git.shortlog("-sne", jenkins_vcc, file_1).splitlines()
for line in jenkins_authors:
    developers += 1
    print(line)
    
print("")
print(file_2 + ":\n")
jenkins_authors = jenkins_repo.git.shortlog("-sne", jenkins_vcc, file_2).splitlines()
for line in jenkins_authors:
    developers += 1
    print(line)
    
print("")
print("Total number of developers: " + str(developers))
print("Average number of developers: " + str(developers / 2))

core/src/main/java/hudson/Plugin.java:

    17	kohsuke <kohsuke@71c3de6d-444a-0410-be80-ed276b4c234a>
     5	Jesse Glick <jglick@cloudbees.com>
     4	Kohsuke Kawaguchi <kk@kohsuke.org>
     2	Dominique <dominiquebrice@users.noreply.github.com>
     2	Nigel Magnay <nigel.magnay@gmail.com>
     2	jglick <jglick@71c3de6d-444a-0410-be80-ed276b4c234a>
     1	Dave Brosius <dbrosius@mebigfatguy.com>
     1	David Hoover <dhoover@google.com>
     1	Josh Soref <jsoref@users.noreply.github.com>
     1	abayer <abayer@71c3de6d-444a-0410-be80-ed276b4c234a>
     1	imod <domi@fortysix.ch>

test/src/test/java/hudson/PluginTest.java:

     6	Jesse Glick <jglick@cloudbees.com>
     1	Kanstantsin Shautsou <kanstantsin.sha@gmail.com>

Total number of developers: 13
Average number of developers: 6.5


### Jython

__Title, Message, Files Affected, Directories Affected, Lines Deleted & Lines Added - With Comments & Blank Lines:__

In [13]:
jython_data = jython_repo.git.log("--pretty=medium", "--stat", "--dirstat", "-1", jython_vcc).splitlines()
for line in jython_data:
    print(line)

commit f78ec9fea8a51c2c8e91f36db3c604a425cbe2ab
Author: Frank Wierzbicki <fwierzbicki@gmail.com>
Date:   Thu May 19 16:44:23 2011 -0700

    from https://svn.python.org/projects/python/branches/release26-maint/Lib/test/test_new.py@88766

 Lib/test/test_new.py | 343 ++++++++++++++++++++++++---------------------------
 1 file changed, 162 insertions(+), 181 deletions(-)
 100.0% Lib/test/


__Title, Message, Files Affected, Directories Affected, Lines Deleted & Lines Added - Without Comments & Blank Lines:__

In [14]:
jython_data = jython_repo.git.log("--pretty=medium", "--stat", "--dirstat", "--ignore-blank-lines", "-1", jython_vcc).splitlines()
for line in jython_data:
    print(line)

commit f78ec9fea8a51c2c8e91f36db3c604a425cbe2ab
Author: Frank Wierzbicki <fwierzbicki@gmail.com>
Date:   Thu May 19 16:44:23 2011 -0700

    from https://svn.python.org/projects/python/branches/release26-maint/Lib/test/test_new.py@88766

 Lib/test/test_new.py | 343 ++++++++++++++++++++++++---------------------------
 1 file changed, 162 insertions(+), 181 deletions(-)
 100.0% Lib/test/


__Time Since Last Modification & Total Number Of Modifications Since Creation:__

In [15]:
file_1 = "Lib/test/test_new.py"

date_1 = ""
date_2 = ""

total = 0
count = 0
print(file_1 + ":\n")
jython_data = jython_repo.git.log("--follow", "-n 2", "--format=%ad", jython_vcc, file_1).splitlines()
for line in jython_data:
    print(line)
    date_2 = date_1
    date_1 = datetime.strptime(line,"%c %z")
    if date_2 != "":
        print("Days Difference: " + str((date_2 - date_1).total_seconds() / 86400))
    
print("")
jython_data = jython_repo.git.log("--follow", "--oneline", jython_vcc, file_1).splitlines()
for line in jython_data:
    count += 1
    print(str(count) + ": " + line)

total += count
print("")
print("Total Number of Modifications: " + str(total))
print("Average Number of Modifications: " + str(total / 1))

Lib/test/test_new.py:

Thu May 19 16:44:23 2011 -0700
Fri Mar 6 16:15:19 2009 +0000
Days Difference: 804.3118518518519

1: f78ec9fea from https://svn.python.org/projects/python/branches/release26-maint/Lib/test/test_new.py@88766
2: ea1f56be8 Merged revisions 5752,5756-5758,5761-5763,5765-5775,5778-5816,5818-5840,5842-5847,5854,5857-5859,5863-5865,5869,5875,5877,5884-5885,5889-5893,5900-5901,5903-5906,5908,5912-5918,5920,5923,5925,5927,5930-5932,5934-5947,5950-5951,5953-5957,5959,5961,5965-5966,5971-5976,5978-5979,5981-5983,5992,6001,6003,6006-6014,6023,6031-6037,6039-6040,6045-6047,6049-6052,6061-6065,6071 via svnmerge from https://jython.svn.sourceforge.net/svnroot/jython/trunk/jython
3: 8d116419e maybe 'never' was too strong for new.code in Jython.  Disable test for now.
4: e86f33ca4 from http://svn.python.org/projects/python/branches/release25-maint/Lib/test/test_new@52060

Total Number of Modifications: 4
Average Number of Modifications: 4.0


__Developers & Their Number Of Commits:__

In [16]:
developers = 0

print(file_1 + ":\n")
jython_authors = jython_repo.git.shortlog("-sne", jython_vcc, file_1).splitlines()
for line in jython_authors:
    developers += 1
    print(line)
    
print("")
print("Total number of developers: " + str(developers))
print("Average number of developers: " + str(developers / 1))

Lib/test/test_new.py:

     3	Frank Wierzbicki <fwierzbicki@gmail.com>

Total number of developers: 1
Average number of developers: 1.0


### Camel

__Title, Message, Files Affected, Directories Affected, Lines Deleted & Lines Added - With Comments & Blank Lines:__

In [17]:
camel_data = camel_repo.git.show("--pretty=medium", "--stat", "--dirstat", "-1", camel_vcc).splitlines()
for line in camel_data:
    print(line)

commit 8afc5d1757795fde715902067360af5d90f046da
Author: Franz Forsthofer <franz.forsthofer@sap.com>
Date:   Fri Feb 24 13:57:10 2017 +0100

    CAMEL-10894:  DTD handling in the XML Validator corrected

 .../camel/processor/validation/SchemaReader.java   | 15 +++-
 .../validator/ValidatorDtdAccessAbstractTest.java  | 86 ++++++++++++++++++++++
 .../validator/ValidatorDtdAccessOffTest.java       | 61 +++++++++++++++
 .../validator/ValidatorDtdAccessOnTest.java        | 61 +++++++++++++++
 4 files changed, 221 insertions(+), 2 deletions(-)
   6.8% camel-core/src/main/java/org/apache/camel/processor/validation/
  93.1% camel-core/src/test/java/org/apache/camel/component/validator/


__Title, Message, Files Affected, Directories Affected, Lines Deleted & Lines Added - Without Comments & Blank Lines:__

In [18]:
camel_data = camel_repo.git.show("--pretty=medium", "--stat", "--dirstat", "--ignore-blank-lines", "-1", camel_vcc).splitlines()
for line in camel_data:
    print(line)

commit 8afc5d1757795fde715902067360af5d90f046da
Author: Franz Forsthofer <franz.forsthofer@sap.com>
Date:   Fri Feb 24 13:57:10 2017 +0100

    CAMEL-10894:  DTD handling in the XML Validator corrected

 .../camel/processor/validation/SchemaReader.java   | 14 +++-
 .../validator/ValidatorDtdAccessAbstractTest.java  | 86 ++++++++++++++++++++++
 .../validator/ValidatorDtdAccessOffTest.java       | 61 +++++++++++++++
 .../validator/ValidatorDtdAccessOnTest.java        | 61 +++++++++++++++
 4 files changed, 221 insertions(+), 1 deletion(-)
   6.8% camel-core/src/main/java/org/apache/camel/processor/validation/
  93.1% camel-core/src/test/java/org/apache/camel/component/validator/


__Time Since Last Modification & Total Number Of Modifications Since Creation:__

In [19]:
file_1 = "camel-core/src/main/java/org/apache/camel/processor/validation/SchemaReader.java"
file_2 = "camel-core/src/test/java/org/apache/camel/component/validator/ValidatorDtdAccessAbstractTest.java"
file_3 = "camel-core/src/test/java/org/apache/camel/component/validator/ValidatorDtdAccessOffTest.java"
file_4 = "camel-core/src/test/java/org/apache/camel/component/validator/ValidatorDtdAccessOnTest.java"

date_1 = ""
date_2 = ""

total = 0
count = 0
print(file_1 + ":\n")
camel_data = camel_repo.git.log("--follow", "-n 2", "--format=%ad", camel_vcc, "--", file_1).splitlines()
for line in camel_data:
    print(line)
    date_2 = date_1
    date_1 = datetime.strptime(line,"%c %z")
    if date_2 != "":
        print("Days Difference: " + str((date_2 - date_1).total_seconds() / 86400))
    
print("")
camel_data = camel_repo.git.log("--follow", "--oneline", camel_vcc, "--", file_1).splitlines()
for line in camel_data:
    count += 1
    print(str(count) + ": " + line)
    
date_1 = ""
date_2 = ""
total += count
count = 0
print("\n")
print(file_2 + ":\n")
camel_data = camel_repo.git.log("--follow", "-n 2", "--format=%ad", camel_vcc, "--", file_2).splitlines()
for line in camel_data:
    print(line)
    date_2 = date_1
    date_1 = datetime.strptime(line,"%c %z")
    if date_2 != "":
        print("Days Difference: " + str((date_2 - date_1).total_seconds() / 86400))
    
print("")
camel_data = camel_repo.git.log("--follow", "--oneline", camel_vcc, "--", file_2).splitlines()
for line in camel_data:
    count += 1
    print(str(count) + ": " + line)
    
date_1 = ""
date_2 = ""
total += count
count = 0
print("\n")
print(file_2 + ":\n")
camel_data = camel_repo.git.log("--follow", "-n 2", "--format=%ad", camel_vcc, "--", file_3).splitlines()
for line in camel_data:
    print(line)
    date_2 = date_1
    date_1 = datetime.strptime(line,"%c %z")
    if date_2 != "":
        print("Days Difference: " + str((date_2 - date_1).total_seconds() / 86400))
    
print("")
camel_data = camel_repo.git.log("--follow", "--oneline", camel_vcc, "--", file_3).splitlines()
for line in camel_data:
    count += 1
    print(str(count) + ": " + line)
    
date_1 = ""
date_2 = ""
total += count
count = 0
print("\n")
print(file_2 + ":\n")
camel_data = camel_repo.git.log("--follow", "-n 2", "--format=%ad", camel_vcc, "--", file_4).splitlines()
for line in camel_data:
    print(line)
    date_2 = date_1
    date_1 = datetime.strptime(line,"%c %z")
    if date_2 != "":
        print("Days Difference: " + str((date_2 - date_1).total_seconds() / 86400))
    
print("")
camel_data = camel_repo.git.log("--follow", "--oneline", camel_vcc, "--", file_4).splitlines()
for line in camel_data:
    count += 1
    print(str(count) + ": " + line)

total += count
print("")
print("Total Number of Modifications: " + str(total))
print("Average Number of Modifications: " + str(total / 4))

camel-core/src/main/java/org/apache/camel/processor/validation/SchemaReader.java:

Fri Feb 24 13:57:10 2017 +0100
Thu Nov 10 18:22:11 2016 +0100
Days Difference: 105.81596064814815

1: 8afc5d17577 CAMEL-10894:  DTD handling in the XML Validator corrected
3: a7d04c7efd4 CAMEL-9572: correction in SchemaReader
4: 77e079a8386 CAMEL-9572: Validator: clearSchemaCache method refactored
5: 115cb3affbe CAMEL-9517: Validator Endpoint- clearCachedSchema added


camel-core/src/test/java/org/apache/camel/component/validator/ValidatorDtdAccessAbstractTest.java:

Fri Feb 24 13:57:10 2017 +0100
Mon Oct 20 10:33:14 2014 +0800
Days Difference: 858.4332870370371

1: 8afc5d17577 CAMEL-10894:  DTD handling in the XML Validator corrected
2: f8c140f0183 CAMEL-7883 Fixed the CS errors
3: d57f402b7fe fix for CAMEL-7883
4: 72348dfff3b CAMEL-5608: Ensure validator component closes input stream after usage. Align code with the xlst/xpath is doing. Added option to fail if no body or not.
5: 4e171a8630a CAMEL-4342:

__Developers & Their Number Of Commits:__

In [20]:
developers = 0

print(file_1 + ":\n")
camel_authors = camel_repo.git.shortlog("-sne", camel_vcc, "--", file_1).splitlines()
for line in camel_authors:
    developers += 1
    print(line)

print("")
print(file_2 + ":\n")
camel_authors = camel_repo.git.shortlog("-sne", camel_vcc, "--", file_2).splitlines()
for line in camel_authors:
    developers += 1
    print(line)
    
print("")
print(file_3 + ":\n")
camel_authors = camel_repo.git.shortlog("-sne", camel_vcc, "--", file_3).splitlines()
for line in camel_authors:
    developers += 1
    print(line)
    
print("")
print(file_4 + ":\n")
camel_authors = camel_repo.git.shortlog("-sne", camel_vcc, "--", file_4).splitlines()
for line in camel_authors:
    developers += 1
    print(line)
    
print("")
print("Total number of developers: " + str(developers))
print("Average number of developers: " + str(developers / 4))

camel-core/src/main/java/org/apache/camel/processor/validation/SchemaReader.java:

     4	Franz Forsthofer <franz.forsthofer@sap.com>
     1	aldettinger <aldettinger@gmail.com>

camel-core/src/test/java/org/apache/camel/component/validator/ValidatorDtdAccessAbstractTest.java:

     1	Franz Forsthofer <franz.forsthofer@sap.com>

camel-core/src/test/java/org/apache/camel/component/validator/ValidatorDtdAccessOffTest.java:

     1	Franz Forsthofer <franz.forsthofer@sap.com>

camel-core/src/test/java/org/apache/camel/component/validator/ValidatorDtdAccessOnTest.java:

     1	Franz Forsthofer <franz.forsthofer@sap.com>

Total number of developers: 5
Average number of developers: 1.25
