# Load libraries

In [1]:
import numpy as np
import pandas as pd
import os

# Specify git executable file for GitPython in Jupyter Notebook (In IDE, it can still work without this line.)
os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = "C:\Program Files\Git\cmd\git.exe"

import git
from git import RemoteProgress

from git import Repo
import matplotlib.pyplot as plt
import seaborn as sns
import re
%matplotlib inline

# Clone repo from GitHub

Link: https://git-scm.com/docs/git-clone <br>
<b>Note:</b> In case too-long file path issue occurs in Windows, set <code>git config --system core.longpaths true</code>

In [2]:
class Progress(RemoteProgress):
    def update(self, op_code, cur_count, max_count=None, message=''):
        print(self._cur_line)

In [3]:
remote_link = "https://github.com/spring-projects/spring-framework"
local_link = "spring-framework"
# Uncomment to clone
# Repo.clone_from(remote_link, local_link, progress=Progress())

In [4]:
repo = Repo(local_link)
fixing_commit = "246a6db1cad205ca9b6fca00c544ab7443ba202"
previous = fixing_commit + '^'
affected_file = "MappingJackson2HttpMessageConverter.java"

# Question a: show commit title and commit msg

In [6]:
show_data = repo.git.log("--format=%B", "-n", "1",fixing_commit).splitlines()

for line in show_data:
    print(line)

Selector header name is exposed for configuration

Issue: SPR-16732


# Question b: Show all affected files

In [7]:
 affected_file = repo.git.diff( "--name-only", fixing_commit, previous).splitlines()

# for line in affected_file:
print(len(affected_file))

8


# Question c: show all directories are affected

In [8]:
affected_directories = repo.git.diff("--dirstat", fixing_commit, previous).splitlines()

print(len(affected_directories))

7


# Question d: How many lines were deleted?

In [18]:
sum_delete = 0;
# print(previous)
deleted_lines = repo.git.diff(previous, fixing_commit).splitlines()
rule = re.compile('^-$|^-[^-]')

for line in deleted_lines:
    if rule.match(line):
        sum_delete = sum_delete + 1

            
print("Lines deleted: %d" %sum_delete)

#     print(line)

Lines deleted: 34


# Question e: How many lines were added?

In [10]:
sum_add = 0;
# print(previous)
add_lines = repo.git.diff(previous, fixing_commit).splitlines()
rule = re.compile('^\+$|^\+[^\+]')

for line in deleted_lines:
        if rule.match(line):
            sum_add += 1
            
print("Lines added: %d" %sum_add)


Lines added: 144


# Question f: How many lines were deleted (Excluding blank line & comment)

In [11]:
sum_deleted_without = 0;
# print(previous)
delete_lines = repo.git.diff("--ignore-blank-lines",previous, fixing_commit).splitlines()
rule = re.compile('^-$|^-[^-]')
rule1 = re.compile('^[-|\+]\s*\*')
rule2 = re.compile('^[-|\+]\s*\/\/')

for line in deleted_lines:
        if rule.match(line):
            sum_deleted_without += 1
            if rule1.match(line) or rule2.match(line):
                sum_deleted_without -= 1
            
print("Lines added: %d" %sum_deleted_without)

Lines added: 22


# Question g: How many lines were added (Excluding blank line & comment)

In [12]:
sum_added_without = 0;
# print(previous)
add_lines = repo.git.diff("--ignore-blank-lines",previous, fixing_commit).splitlines()
rule = re.compile('^\+$|^\+[^\+]')
rule1 = re.compile('^[-|\+]\s*\*')
rule2 = re.compile('^[-|\+]\s*\/\/')

for line in add_lines:
        if rule.match(line):
            sum_added_without += 1
            if rule1.match(line) or rule2.match(line):
                sum_added_without -= 1
            
print("Lines added: %d" %sum_added_without)

Lines added: 100


# Question h: How many days were between the current fixing commit and the previous commit of each affected file

In [13]:
time_different = 0
sum = 0
count = 0
for line in repo.git.diff("--name-only", fixing_commit, previous).splitlines():
    creation = repo.git.log("-2", "--pretty=%ct","--", line).splitlines();
    if len(creation) == 2:
        count += 1;
        time_different = ((float(creation[0]) - float(creation[1])) / 86400)
        sum = sum + time_different
        print("%s \t%.1f"%(line,((float(creation[0]) - float(creation[1])) / 86400)))
    else:
        print("%s is New file" %line)

avg = sum / count
print("%.1f" %avg)

spring-messaging/src/main/java/org/springframework/messaging/simp/broker/DefaultSubscriptionRegistry.java 	76.3
spring-messaging/src/main/java/org/springframework/messaging/simp/broker/SimpleBrokerMessageHandler.java 	67.0
spring-messaging/src/main/java/org/springframework/messaging/simp/config/SimpleBrokerRegistration.java 	339.0
spring-messaging/src/test/java/org/springframework/messaging/simp/broker/DefaultSubscriptionRegistryTests.java 	85.4
spring-websocket/src/main/java/org/springframework/web/socket/config/MessageBrokerBeanDefinitionParser.java 	110.4
spring-websocket/src/main/resources/org/springframework/web/socket/config/spring-websocket-4.3.xsd 	77.5
spring-websocket/src/test/java/org/springframework/web/socket/config/MessageBrokerBeanDefinitionParserTests.java 	85.4
spring-websocket/src/test/resources/org/springframework/web/socket/config/websocket-config-broker-simple.xml 	111.8
119.1


# Question i: How many times has each affected file of the current fixing commit been modified in the past since their creation (including rename of the file)?

In [14]:
sum_time = 0
count = 0
# implement code
for line in repo.git.diff("--name-only", fixing_commit, previous).splitlines():
    count = count + 1;
    times = repo.git.log("--follow", "--pretty=oneline", "--", line).splitlines();
    sum_time = sum_time + len(times)
    print(" %s: %d " %(line, len(times)))

avg = sum_time / count
print("%d" %avg)

 spring-messaging/src/main/java/org/springframework/messaging/simp/broker/DefaultSubscriptionRegistry.java: 50 
 spring-messaging/src/main/java/org/springframework/messaging/simp/broker/SimpleBrokerMessageHandler.java: 56 
 spring-messaging/src/main/java/org/springframework/messaging/simp/config/SimpleBrokerRegistration.java: 14 
 spring-messaging/src/test/java/org/springframework/messaging/simp/broker/DefaultSubscriptionRegistryTests.java: 30 
 spring-websocket/src/main/java/org/springframework/web/socket/config/MessageBrokerBeanDefinitionParser.java: 70 
 spring-websocket/src/main/resources/org/springframework/web/socket/config/spring-websocket-4.3.xsd: 37 
 spring-websocket/src/test/java/org/springframework/web/socket/config/MessageBrokerBeanDefinitionParserTests.java: 72 
 spring-websocket/src/test/resources/org/springframework/web/socket/config/websocket-config-broker-simple.xml: 21 
43


# Question j: Which developers have modified each affected file since its creation? 

In [16]:
# implement code
count = 0
for line in repo.git.diff("--name-only", fixing_commit, previous).splitlines():
    print(" %s"%line)
    authors = repo.git.log("--follow", "--pretty=%aN", "--", line).splitlines()
    for author in set(authors): # set for not repeating the same author
        print(" %s" %author)
        count = count + 1;

avg = count / len(affected_file)
print("%.1f" %avg)

 spring-messaging/src/main/java/org/springframework/messaging/simp/broker/DefaultSubscriptionRegistry.java
 Phillip Webb
 Stephane Nicoll
 igor-suhorukov
 Sebastien Deleuze
 Rossen Stoyanchev
 Spring Operator
 Juergen Hoeller
 Alexander Riss
 Bao Tran
 spring-messaging/src/main/java/org/springframework/messaging/simp/broker/SimpleBrokerMessageHandler.java
 Phillip Webb
 Andy Wilkinson
 Stephane Nicoll
 Christoph Dreis
 Eric Dahl
 Rossen Stoyanchev
 Sebastien Deleuze
 Alan Hong
 Spring Operator
 Brian Clozel
 Juergen Hoeller
 Alexander Riss
 spring-messaging/src/main/java/org/springframework/messaging/simp/config/SimpleBrokerRegistration.java
 Phillip Webb
 Rossen Stoyanchev
 Sebastien Deleuze
 Spring Operator
 Brian Clozel
 Juergen Hoeller
 spring-messaging/src/test/java/org/springframework/messaging/simp/broker/DefaultSubscriptionRegistryTests.java
 Phillip Webb
 Phil Webb
 Rossen Stoyanchev
 Sebastien Deleuze
 Spring Operator
 Sam Brannen
 Juergen Hoeller
 spring-websocket/src/main/j

# Question k:

In [15]:
# implement code
allAuthors = []
count = 0
for line in repo.git.diff("--name-only", fixing_commit, previous).splitlines():
    allAuthors += repo.git.log("--follow", "--pretty=%aN", "--", line).splitlines()
allAuthors = list(set(allAuthors))
log = repo.git.log("--pretty=%aN")
for author in allAuthors:
    count = count + 1
    print(" %s: %d" %(author, len(re.findall(author, log))))
    
print("%d" %count)

 Rob Winch: 86
 igor-suhorukov: 39
 Eric Dahl: 1
 Rossen Stoyanchev: 3309
 Bao Tran: 1
 Phillip Webb: 607
 Sebastien Deleuze: 726
 Spring Operator: 8
 Hanope: 1
 Phil Webb: 1
 Сергей Цыпанов: 10
 Alan Hong: 1
 Juergen Hoeller: 5977
 Sam Brannen: 2448
 Alexander Riss: 1
 Johnny Lim: 42
 Philippe Marschall: 22
 Artem Bilan: 2
 Andy Wilkinson: 45
 Stephane Nicoll: 766
 Christoph Dreis: 13
 Brian Clozel: 550
22
