In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from github import Github

In [None]:
# GitHub Token
token = "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"

# GitHub repositories
repositories = [
    "openai/openai-cookbook",
    "openai/openai-python",
    "openai/openai-quickstart-python",
    "milvus-io/pymilvus",
    "SeleniumHQ/selenium",
    "golang/go",
    "google/go-github",
    "angular/material",
    "angular/angular-cli",
    "SebastianM/angular-google-maps",
    "d3/d3",
    "facebook/react",
    "tensorflow/tensorflow",
    "keras-team/keras",
    "pallets/flask"
]

# GitHub API client
g = Github(token)

In [None]:
# Function to get time series data for an issue metric
def get_time_series_data(repo_name, metric):
    repo = g.get_repo(repo_name)
    time_series = []

    # Get issue metric data
    if metric == "issues_created":
        for issue in repo.get_issues(state="all"):
            time_series.append({"date": issue.created_at.date(), "count": 1})
    elif metric == "issues_closed":
        for issue in repo.get_issues(state="all"):
            if issue.closed_at:
                time_series.append({"date": issue.closed_at.date(), "count": 1})

    # Convert data to DataFrame
    df = pd.DataFrame(time_series)
    df = df.groupby("date").sum()
    return df

In [None]:
# Function to plot and save forecast
def plot_and_save_forecast(forecast, metric):
    fig, ax = plt.subplots()
    ax.plot(forecast.index, forecast["yhat"], label="Forecast")
    ax.fill_between(forecast.index, forecast["yhat_lower"], forecast["yhat_upper"], alpha=0.3)
    ax.set_title(f"{metric.capitalize()} Forecast")
    ax.set_xlabel("Date")
    ax.set_ylabel("Count")
    ax.legend()
    plt.savefig(f"{metric}_forecast.png")
    plt.close()

In [None]:
# Perform forecasting for each metric
for metric in ["issues_created", "issues_closed", "pulls", "commits", "branches", "contributors", "releases"]:
    all_repo_df = pd.DataFrame()

    # Get time series data for each repository
    for repo_name in repositories:
        df = get_time_series_data(repo_name, metric)
        df["repo_name"] = repo_name
        all_repo_df = pd.concat([all_repo_df, df])

    # Pivot the DataFrame to have repositories as columns
    all_repo_df = all_repo_df.pivot(columns="repo_name", values="count")

    # Fill missing values with 0
    all_repo_df = all_repo_df.fillna(0)

    # Perform time series forecasting for each repository
    for repo_name in repositories:
        # Get data for the repository
        repo_data = all_repo_df[repo_name].reset_index()

        # Set the date as the index
        repo_data = repo_data.set_index("date")

        # Perform time series forecasting using SARIMAX
        model = sm.tsa.SARIMAX(repo_data, order=(1, 1, 1), seasonal_order=(0, 0, 0, 0))
        results = model.fit()

        # Make future predictions
        future = pd.date_range(start=repo_data.index[-1], periods=365, freq="D")
        forecast = results.get_forecast(steps=len(future))
        forecast_df = forecast.summary_frame()

        # Plot and save forecast
        plot_and_save_forecast(forecast_df, metric)