# Git Repository Author Analysis and Email Extraction.

## Import libraries

In [309]:
import pandas as pd
import matplotlib.pyplot as plt

## Upload path

In [310]:
upload_path = "./output/"

## Load data from a CSV file

In [311]:
data = pd.read_csv("./data/log.csv")

In [None]:
data.head()

## Rename authors for consistency

In [None]:
data.groupby("Author Name").count()

### rename if necessary

In [316]:
data.loc[data["Author Name"].isin(["Name 1", "Name 2"]), "Author Name"] = "Main Name"

### check

In [None]:
data.groupby("Author Name").count()

## Count the number of commits per author

In [338]:
data["Commit nb"] = data["Commit Hash"]

In [339]:
commit_counts = data.groupby('Author Name')['Commit nb'].count().reset_index()

In [None]:
commit_counts.rename(columns={"Commit nb": "Commit Count"}, inplace=True)
commit_counts

## Plot a histogram to visualize the commit counts for each author
 

In [None]:
display(commit_counts["Author Name"])
display(commit_counts["Commit Commit Count"])

### Export the histogram as an image file (e.g., PNG)

In [None]:
plt.figure(figsize=(12, 4))
plt.bar(commit_counts["Author Name"], commit_counts["Commit Count"])
plt.xlabel('Author Name')
plt.ylabel('Commit Count')
plt.title('Commit Count per Author')
plt.xticks(rotation=0)

for x, y in zip(commit_counts["Author Name"], commit_counts["Commit Count"]):
    plt.text(x, y, str(y), ha="center", va="bottom")
    
plt.savefig(f'{upload_path}commit_count_histogram.png')
plt.show()
print("Plot exported.")

## Generate a pie chart to show the contribution of all authors

In [343]:
# Data
author_names = author_data["Author Name"]
commit_counts = author_data["Commit nb"]

### Export the pie chart as an image file (e.g., PNG)

In [None]:
# Create a pie chart to show the contribution of all authors
plt.figure(figsize=(6, 6))  # Adjust the figure size as needed
plt.pie(commit_counts, labels=author_names, autopct='%1.1f%%', startangle=140)
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.title('Contribution of Authors')

# Add a legend to the right
# plt.legend(author_names, loc='center left', bbox_to_anchor=(1, 1))

plt.savefig(f'{upload_path}contribution_pie_chart.png')

print("Plot exported.")

plt.show()

## Extract and display author emails

### Display author emails

In [None]:
email_data = data.groupby(["Author Name", "Author Email"])['Commit nb'].count()

# Get each author's emails
author_emails = {}
for author_name, _ in email_data.index:
    if author_name not in author_emails:
        author_emails[author_name] = email_data.loc[author_name].index.tolist()

# Display the emails by author name with better formatting
for author_name, emails in author_emails.items():
    print("------------------------------------------------")
    print(f"Author Name: {author_name}")
    print("Author Emails:")
    for email in emails:
        print(f"  - {email}")
    print("------------------------------------------------")
    print()


###  Get author name and email

In [346]:
email_data = data.groupby(["Author Name", "Author Email"])['Commit nb'].count()

author_emails = {}
for author_name, _ in email_data.index:
    if author_name not in author_emails:
        author_emails[author_name] = email_data.loc[author_name].index.tolist()

# author_emails

### Export in author_emails.txt

In [None]:
# Export author emails to a text file
with open(f'{upload_path}author_emails.txt', 'w') as file:
    for author_name, emails in author_emails.items():
        file.write(f"Author: {author_name}\n")
        file.write("Emails:\n")
        for email in emails:
            file.write(f"  - {email}\n")
        file.write("\n")
        
print("Author emails exported.")