In [1]:
from perceval.backends.core.git import Git
import elasticsearch
import elasticsearch_dsl
import matplotlib.pyplot as plt
import pandas as pd

Run the following command on terminal to load the elastic search with raw and enriched indices.

p2o.py --enrich --index git_raw --index-enrich git_enriched -e http://localhost:9200 --no_inc --debug git https://github.com/grimoirelab/perceval.git

As per the microTask given, only enriched index is used here

In [2]:
es = elasticsearch.Elasticsearch(['http://localhost:9200/'])

In [3]:
s = elasticsearch_dsl.Search(using=es, index='git_enriched')

Get only the fields required using elasticsearch_dsl

In [4]:
request = s.source(['hash', 'commit_date', 'author_name'])


In [5]:
result = request.execute()

#### For load optimization and other reasons, the result obtained here has just 10 records. To obtain all the the records the following step is done 

In [6]:
result.hits.total
request = request[0:result.hits.total]
result = request.execute()

In [7]:
len(result)

1016

In [8]:
result = result.hits.hits

In [9]:
result[0]

{'_id': 'f3891cdcee5caa65add370ccb83ff7066edbe5be',
 '_index': 'git_enriched',
 '_score': 1.0,
 '_source': {'author_name': 'Santiago Dueñas',
  'commit_date': '2015-11-18T14:41:17',
  'hash': 'b0f6eb81d9b1dc5f77dce9954744016dfbb3cb4a'},
 '_type': 'items'}

#### Algorithm :
Use two dictionaries, one (dictionary of authors to total commits) for keeping track of new authors with their total commits to the repository.
And another(dictionary of months to authors and commit numbers), for keeping track of every month a commit was made with the authors who are new and the number of commits they made in that month.

Check if month is present in the dictionary of months to authors and commit numbers if month is present. Then check if author is present in the same dictionary, if present then increase his commit count in that month. Else if he is not present, then check if he is a new author, if yes, then add him in the dictionary. 
Eitherways, increase commit count of author in the dictionary of authors to their total commit count.

In [10]:
#all authors with their commit count
authors = {}

#new committers every month
new_authors_per_month = {}

for commit in result:

    #extract commit date
    date = commit['_source']['commit_date'].split('-')
    #Extract just month and year
    month = date[1]+' '+date[0]
    #extract author
    author = commit['_source']['author_name']

    if month in new_authors_per_month :
        if author in new_authors_per_month[month] :
            new_authors_per_month[month][author] = new_authors_per_month[month][author] + 1
        elif author not in authors and author not in new_authors_per_month[month]:
            new_authors_per_month[month][author] = 1
    else:
        if author not in authors:
            new_authors_per_month[month]= {author : 1}

    if author in authors :
        authors[author] = authors[author] + 1
    else :
        authors[author] = 1
    

In [11]:
new_authors_per_month

{'01 2016': {'Jesus M. Gonzalez-Barahona': 3},
 '01 2017': {'Stephan Barth': 1},
 '01 2018': {'Israel Herraiz': 1},
 '02 2016': {'Alberto Martín': 26},
 '02 2018': {'Miguel Ángel Fernández': 3},
 '03 2016': {'camillem': 2},
 '05 2016': {'quan': 3},
 '06 2016': {'J. Manrique Lopez de la Fuente': 1},
 '09 2016': {'Luis Cañas Díaz': 1},
 '09 2017': {'Valerio Cosentino': 5, 'valerio cosentino': 6},
 '10 2017': {'valerio': 2},
 '11 2015': {'Santiago Dueñas': 23},
 '11 2017': {'David Esler': 1, 'David Pose Fernández': 1},
 '12 2015': {'Alvaro del Castillo': 3},
 '12 2017': {'david': 1}}

In [12]:
authors

{'Alberto Martín': 51,
 'Alvaro del Castillo': 45,
 'David Esler': 1,
 'David Pose Fernández': 1,
 'Israel Herraiz': 1,
 'J. Manrique Lopez de la Fuente': 1,
 'Jesus M. Gonzalez-Barahona': 18,
 'Luis Cañas Díaz': 1,
 'Miguel Ángel Fernández': 3,
 'Santiago Dueñas': 708,
 'Stephan Barth': 1,
 'Valerio Cosentino': 169,
 'camillem': 2,
 'david': 1,
 'quan': 5,
 'valerio': 2,
 'valerio cosentino': 6}

In [13]:
custom_table = []
for each_month in new_authors_per_month:
    table = {}
    for author in new_authors_per_month[each_month] :
        table['Month<Month Year>'] = each_month
        table['Author'] = author
        table['No of commits in that month'] = new_authors_per_month[each_month][author]
        table['Total commits by author'] = authors[author]
    custom_table.append(table)

In [14]:
df = pd.DataFrame.from_records(custom_table)

In [15]:
display(df)

Unnamed: 0,Author,Month<Month Year>,No of commits in that month,Total commits by author
0,Santiago Dueñas,11 2015,23,708
1,Alvaro del Castillo,12 2015,3,45
2,Jesus M. Gonzalez-Barahona,01 2016,3,18
3,Alberto Martín,02 2016,26,51
4,quan,05 2016,3,5
5,Luis Cañas Díaz,09 2016,1,1
6,Valerio Cosentino,09 2017,5,169
7,Miguel Ángel Fernández,02 2018,3,3
8,camillem,03 2016,2,2
9,valerio,10 2017,2,2


In [16]:
df.to_csv('New authors.csv')