# Merged PR's per week data fetch

In [None]:

# You can optionally exclude an author.

from typing import Dict
import requests
from datetime import datetime, timedelta

# Replace these with your own values
repository_owner = "ElektraInitiative"
repository_name = "PermaplanT"
excluded_author = "-"  # Replace with the author's name you want to exclude

base_url = f"https://api.github.com/repos/{repository_owner}/{repository_name}"

# Fetch all pull requests using pagination
pulls_url = f"{base_url}/pulls?state=closed&per_page=100"  # Set per_page to 100
all_pulls = []
page = 1
while True:
    response = requests.get(f"{pulls_url}&page={page}")
    pulls_data = response.json()
    if not pulls_data:  # Stop pagination if there are no more results
        break
    all_pulls.extend(pulls_data)
    page += 1

# Print the total number of pull requests fetched
print(f"Total pull requests fetched: { len(all_pulls)}")

# Analyze pull request data
merged_pulls = []
for pull in all_pulls:
    state = pull.get("state", "")
    author = pull.get("user", {}).get("login", "")
    base_ref = pull["base"]["ref"] if "base" in pull and "ref" in pull["base"] else None
    if state == "closed" and base_ref == "master" and author != excluded_author:
        merged_pulls.append(pull)

# Print the total number of pull requests fetched
print(f"Total pull requests fetched excluding author: { len(merged_pulls)}")

merge_counts: Dict[datetime.date, int] = {} # type: ignore
for pull in merged_pulls:
    merged_at_str = pull.get("merged_at")
    if merged_at_str:
        merged_at = datetime.strptime(merged_at_str, "%Y-%m-%dT%H:%M:%SZ")
        week_start = (merged_at - timedelta(days=merged_at.weekday())).date()
        if week_start in merge_counts:
            merge_counts[week_start] += 1
        else:
            merge_counts[week_start] = 1
            
# Print merge counts per week
for week, count in merge_counts.items():
    print(f"Week starting {week}: {count} merges")


# Merged PR's per week plot

In [None]:
from typing import List
import matplotlib.pyplot as plt
from datetime import datetime
from matplotlib.lines import Line2D

# Reformat data from above block
data = {date.strftime("%Y-%m-%d"): count for date, count in merge_counts.items()}

# Convert date strings to datetime objects
dates = [datetime.strptime(date, "%Y-%m-%d") for date in data.keys()]
merge_counts: List[int] = list(data.values()) # type: ignore

# Create the plot
plt.figure(figsize=(10, 6))
plt.plot(dates, merge_counts, marker='o')

# Color the 8th data point differently (index 7)
highlight_color = 'red'
plt.plot(dates[8], merge_counts[8], marker='o', color=highlight_color)
# Create a red dot legend item
legend_element = Line2D([0], [0], marker='o', color='w', label='New Deployment Pipeline', markersize=10, markerfacecolor='red')
plt.legend(handles=[legend_element], loc='lower right')  # Use handles to add custom legend items

plt.xlabel('Week')
plt.ylabel('Number of Merges')
# plt.title('Merged Pull Requests per Week')
plt.xticks(rotation=45)
plt.tight_layout()

# Display the plot
plt.show()

# PermaplanT release frequency analysis

In [None]:
import matplotlib.pyplot as plt
from datetime import datetime

# Provided release dates
release_dates = [
    "2023-04-19",
    "2023-04-19",
    "2023-04-22",
    "2023-04-22",
    "2023-06-29",
    "2023-07-12",
    "2023-07-18",
    "2023-07-28",
    "2023-07-28",
    "2023-08-03",
    "2023-08-07"
]

# Convert release dates to datetime objects
release_dates = [datetime.strptime(date, "%Y-%m-%d") for date in release_dates]

# Sort the dates
release_dates.sort()

# Calculate the time difference between releases
release_diff = [(release_dates[i+1] - release_dates[i]).days for i in range(len(release_dates)-1)]

# Plotting
plt.figure(figsize=(10, 6))
plt.bar(release_dates[:-1], release_diff, color='skyblue')
plt.xlabel('Release Dates')
plt.ylabel('Days Between Releases')
# plt.title('Release Frequency')
plt.xticks(rotation=45)
plt.tight_layout()

plt.show()
