# Setup cell

In [None]:
# Setup cell
import os

from dotenv import load_dotenv
import pandas as pd
import logging

import bitbucket
from metrics import Metrics
from platform_analysis import get_most_present_owner, get_unique_owner_number
from platforms import Platforms
import github
import gitea_forgejo
import gitlab
import platform_analysis

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Load environment variables from .env file
load_dotenv()

# Load tokens from environment variables
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
GITLAB_TOKEN = os.getenv("GITLAB_TOKEN")
GITEA_TOKEN = os.getenv("GITEA_TOKEN")
FORGEJO_TOKEN = os.getenv("FORGEJO_TOKEN")
BITBUCKET_TOKEN = os.getenv("BITBUCKET_TOKEN")

# Set up headers for API requests
GITHUB_HEADERS = {"Authorization": f"token {GITHUB_TOKEN}"}
GITEA_HEADERS = {"Authorization": f"token {GITEA_TOKEN}"}
FORGEJO_HEADERS = {"Authorization": f"token {FORGEJO_TOKEN}"}
GITLAB_HEADERS = {"Authorization": f"Bearer {GITLAB_TOKEN}"}
BITBUCKET_HEADERS = {"Authorization": f"Bearer {BITBUCKET_TOKEN}"}

# Initialize platform instances
github_instance = github.GitHub(GITHUB_HEADERS)
gitea_instance = gitea_forgejo.GiteaForgejo(GITEA_HEADERS)
forgejo_instance = gitea_forgejo.GiteaForgejo(FORGEJO_HEADERS)
gitlab_instance = gitlab.Gitlab(GITLAB_HEADERS)
bitbucket_instance = bitbucket.Bitbucket(BITBUCKET_HEADERS)

# Fetching Repositories from Github

In [None]:
# Fetch a given number of GitHub repositories
data = github_instance.fetch_repositories(200)
file_name = "Github_data/github_1k_repositories.csv"

# Save the gathered data to a CSV file
if data:
    df = pd.DataFrame(data)
    df.to_csv(file_name, index=False)
    logger.info(f"Saved repositories to '{file_name}'.")
else:
    logger.error("No repositories were fetched.")

### Select Clonable Github repositories and add Git metrics to the dataframe

In [None]:
input_file = "Github_data/github_1k_repositories.csv"
output_file = "Github_data/github_repositories_metrics.csv"
# Load the csv file that contains the repositories
df = pd.read_csv(input_file)

df = github_instance.select_clonable_repositories(df, Platforms.GITHUB, 100)

# Save updated CSV
df.to_csv(output_file, index=False)
logger.info(f"Done! Updated file saved as {output_file}")

### Add Platform specific metrics to the dataframe

In [None]:
input_file = "Github_data/github_repositories_metrics.csv"
output_file = "Github_data/github_repositories_metrics.csv"

# Load the csv file that contains the repositories
df = pd.read_csv(input_file)

# Add a chosen metric for each repository of the DataFrame
github_instance.add_metric(df, Metrics.ISSUE)
github_instance.add_metric(df, Metrics.PULL_REQUEST)

# Save updated CSV
df.to_csv(output_file, index=False)
logger.info(f"Done! Updated file saved as {output_file}")

# Fetching Repositories from Gitea

In [None]:
# Fetch a given number of pages of 50 Gitea repositories
data = gitea_instance.fetch_repositories(200, platform=Platforms.GITEA)
file_name = "Gitea_data/gitea_1k_repositories.csv"

# Save the gathered data to a CSV file
if data:
    df = pd.DataFrame(data)
    df.to_csv(file_name, index=False)
    logger.info(f"Saved repositories to '{file_name}'.")
else:
    logger.error("No repositories were fetched.")

### Select Clonable Gitea repositories and add Git metrics to the dataframe

In [None]:
input_file = "Gitea_data/gitea_10k_repositories.csv"
output_file = "Gitea_data/gitea_repositories_metrics.csv"

df = pd.read_csv(input_file)
df = gitea_instance.select_clonable_repositories(df, Platforms.GITEA, 1000)

df.to_csv(output_file, index=False)
logger.info(f"Done! Updated file saved as {output_file}")

### Add Platform specific metrics to the dataframe

In [None]:
input_file = "Gitea_data/gitea_repositories_metrics.csv"
output_file = "Gitea_data/gitea_repositories_metrics.csv"

# Load the csv file that contains the repositories
df = pd.read_csv(input_file)

# Add chosen metric for each repository of the DataFrame
gitea_instance.add_metric(df, Platforms.GITEA, Metrics.ISSUE)
gitea_instance.add_metric(df, Platforms.GITEA, Metrics.PULL_REQUEST)

# Save updated CSV
df.to_csv(output_file, index=False)
logger.info(f"Done! Updated file saved as {output_file}")

# Fetching Repositories from Forgejo

In [None]:
# Fetch a given number of pages of 50 Forgejo repositories
data = forgejo_instance.fetch_repositories(200, platform=Platforms.FORGEJO)
file_name = "Forgejo_data/forgejo_1k_repositories.csv"

# Save the gathered data to a CSV file
if data:
    df = pd.DataFrame(data)
    df.to_csv(file_name, index=False)
    logger.info(f"Saved repositories to '{file_name}'.")
else:
    logger.error("No repositories were fetched.")

### Select Clonable Forgejo repositories and add Git metrics to the dataframe

In [None]:
input_file = "Forgejo_data/forgejo_10k_repositories.csv"
output_file = "Forgejo_data/forgejo_1k_repositories.csv"

df = pd.read_csv(input_file)
df = forgejo_instance.select_clonable_repositories(df, Platforms.FORGEJO, 1000)

df.to_csv(output_file, index=False)
logger.info(f"Done! Updated file saved as {output_file}")

### Add Platform specific metrics to the dataframe

In [None]:
input_file = "Forgejo_data/forgejo_repositories_metrics.csv"
output_file = "Forgejo_data/forgejo_repositories_metrics.csv"

# Load the csv file that contains the repositories
df = pd.read_csv(input_file)

# Add chosen metric for each repository of the DataFrame
forgejo_instance.add_metric(df, Platforms.FORGEJO, Metrics.ISSUE)
forgejo_instance.add_metric(df, Platforms.FORGEJO, Metrics.PULL_REQUEST)

# Save updated CSV
df.to_csv(output_file, index=False)
logger.info(f"Done! Updated file saved as {output_file}")

# Fetching Repositories from Gitlab

In [None]:
# Fetch a given number of pages of 100 Gitlab repositories
data = gitlab_instance.fetch_repositories(200)
file_name = "Gitlab_data/gitlab_1k_repositories.csv"

# Save the gathered data to a CSV file
if data:
    df = pd.DataFrame(data)
    df.to_csv(file_name, index=False)
    logger.info(f"Saved repositories to '{file_name}'.")
else:
    logger.error("No repositories were fetched.")

### Select Clonable GitLab repositories and add Git metrics to the dataframe

In [None]:
input_file = "Gitlab_data/gitlab_10k_repositories.csv"
output_file = "Gitlab_data/gitlab_1k_repositories.csv"

df = pd.read_csv(input_file)
df = gitlab_instance.select_clonable_repositories(df, Platforms.GITLAB, 1000)

df.to_csv(output_file, index=False)
logger.info(f"Done! Updated file saved as {output_file}")

### Adding Platform specific metrics to the dataframe

In [None]:
input_file = "Gitlab_data/gitlab_repositories_metrics.csv"
output_file = "Gitlab_data/gitlab_repositories_metrics.csv"

# Load the csv file that contains the repositories
df = pd.read_csv(input_file)

# Add chosen metric for each repository of the DataFrame
gitlab_instance.add_metric(df, Metrics.ISSUE)
gitlab_instance.add_metric(df, Metrics.PULL_REQUEST)

# Save updated CSV
df.to_csv(output_file, index=False)
logger.info(f"Done! Updated file saved as {output_file}")

# Fetching Repositories from Bitbucket

In [None]:
# Fetch a given number of pages of 100 Bitbucket repositories
data = bitbucket_instance.fetch_repositories(100)
file_name = "Bitbucket_data/bitbucket_10k_repositories.csv"

# Save the gathered data to a CSV file
if data:
    df = pd.DataFrame(data)
    df.to_csv(file_name, index=False)
    logger.info(f"Saved repositories to '{file_name}'.")
else:
    logger.error("No repositories were fetched.")

### Sample 1000 clonable repositories

In [None]:
input_file = "Bitbucket_data/bitbucket_10k_repositories.csv"
output_file = "Bitbucket_data/bitbucket_1k_repositories.csv"

df = pd.read_csv(input_file)
df = bitbucket_instance.select_clonable_repositories(df, Platforms.BITBUCKET, 1000)

df.to_csv(output_file, index=False)
logger.info(f"Done! Updated file saved as {output_file}")

### Cloning Bitbucket repositories and adding Git metrics to the dataframe

In [None]:
input_file = "Bitbucket_data/bitbucket_10k_repositories.csv"
output_file = "Bitbucket_data/bitbucket_repositories_metrics.csv"
# Load the csv file that contains the repositories
df = pd.read_csv(input_file)

df = bitbucket_instance.select_clonable_repositories(df, Platforms.BITBUCKET, 500)

# Save updated CSV
df.to_csv(output_file, index=False)
logger.info(f"Done! Updated file saved as {output_file}")

### Adding Platform specific metrics to the dataframe

In [None]:
input_file = "Bitbucket_data/bitbucket_repositories_metrics.csv"
output_file = "Bitbucket_data/bitbucket_repositories_metrics.csv"

# Load the csv file that contains the repositories
df = pd.read_csv(input_file)

# Add chosen metric for each repository of the DataFrame
bitbucket_instance.add_metric(df, Platforms.BITBUCKET, Metrics.STAR)
bitbucket_instance.add_metric(df, Platforms.BITBUCKET, Metrics.FORK)
bitbucket_instance.add_metric(df, Platforms.BITBUCKET, Metrics.PULL_REQUEST)
#bitbucket_instance.add_metric(df, Platforms.BITBUCKET, Metrics.ISSUE)
#gitlab_instance.add_metric(df, Metrics.COMMIT)
#gitlab_instance.add_metric(df, Metrics.CONTRIBUTOR)
#gitlab_instance.add_metric(df, Metrics.LANGUAGE)
#gitlab_instance.add_metric(df, Metrics.LICENSE)

# Save updated CSV
df.to_csv(output_file, index=False)
logger.info(f"Done! Updated file saved as {output_file}")

# First Statistical analysis of the data

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_1k_repositories.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_1k_repositories.csv")
#df_bitbucket = pd.read_csv("Bitbucket_data/bitbucket_repositories_metrics.csv")
df_gitea = pd.read_csv("Gitea_data/gitea_1k_repositories.csv")
df_forgejo = pd.read_csv("Forgejo_data/forgejo_1k_repositories.csv")

#get_clone_error_number(df_github, df_gitlab, df_bitbucket, df_gitea, df_forgejo)
get_most_present_owner(df_github, df_gitlab, df_gitea, df_forgejo)
get_unique_owner_number(df_github, df_gitlab, df_gitea, df_forgejo)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics_with_date_bias.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_repositories_metrics_with_date_bias.csv")
#df_bitbucket = pd.read_csv("Bitbucket_data/bitbucket_repositories_metrics.csv")
df_gitea = pd.read_csv("Gitea_data/gitea_repositories_metrics_with_date_bias.csv")
df_forgejo = pd.read_csv("Forgejo_data/forgejo_repositories_metrics_with_date_bias.csv")

# Create the plot
platform_analysis.plot_step_lines(df_github, df_gitlab, df_gitea, df_forgejo)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_1k_repositories.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_1k_repositories.csv")
#df_bitbucket = pd.read_csv("Bitbucket_data/bitbucket_repositories_metrics.csv")
df_gitea = pd.read_csv("Gitea_data/gitea_1k_repositories.csv")
df_forgejo = pd.read_csv("Forgejo_data/forgejo_1k_repositories.csv")

# Create the plot
platform_analysis.plot_step_lines(df_github, df_gitlab, df_gitea, df_forgejo)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_repositories_metrics.csv")
df_bitbucket = pd.read_csv("Bitbucket_data/bitbucket_repositories_metrics.csv")
df_gitea = pd.read_csv("Gitea_data/gitea_repositories_metrics.csv")
df_forgejo = pd.read_csv("Forgejo_data/forgejo_repositories_metrics.csv")

# Create the plot
platform_analysis.plot_alphanumeric_distribution(Metrics.MAIN_LANGUAGE,df_github, df_gitlab, df_bitbucket, df_gitea, df_forgejo)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_repositories_metrics.csv")
df_bitbucket = pd.read_csv("Bitbucket_data/bitbucket_repositories_metrics.csv")
df_gitea = pd.read_csv("Gitea_data/gitea_repositories_metrics.csv")
df_forgejo = pd.read_csv("Forgejo_data/forgejo_repositories_metrics.csv")

# Create the plot
platform_analysis.plot_alphanumeric_distribution(Metrics.LANGUAGE_DISTRIBUTION,df_github, df_gitlab, df_bitbucket, df_gitea, df_forgejo)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_repositories_metrics.csv")
df_bitbucket = pd.read_csv("Bitbucket_data/bitbucket_repositories_metrics.csv")
df_gitea = pd.read_csv("Gitea_data/gitea_repositories_metrics.csv")
df_forgejo = pd.read_csv("Forgejo_data/forgejo_repositories_metrics.csv")

# Create the plot
platform_analysis.plot_alphanumeric_distribution(Metrics.LICENSE,df_github, df_gitlab, df_bitbucket, df_gitea, df_forgejo)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics_with_date_bias.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_repositories_metrics_with_date_bias.csv")
df_bitbucket = pd.read_csv("Bitbucket_data/bitbucket_repositories_metrics_with_date_bias.csv")
df_gitea = pd.read_csv("Gitea_data/gitea_repositories_metrics_with_date_bias.csv")
df_forgejo = pd.read_csv("Forgejo_data/forgejo_repositories_metrics_with_date_bias.csv")

# Create the plot
platform_analysis.plot_numeric_distribution(df_github, df_gitlab, df_bitbucket, df_gitea, df_forgejo, Metrics.COMMIT)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_repositories_metrics.csv")
df_bitbucket = pd.read_csv("Bitbucket_data/bitbucket_repositories_metrics.csv")
df_gitea = pd.read_csv("Gitea_data/gitea_repositories_metrics.csv")
df_forgejo = pd.read_csv("Forgejo_data/forgejo_repositories_metrics.csv")

# Create the plot
platform_analysis.plot_numeric_distribution(df_github, df_gitlab, df_bitbucket, df_gitea, df_forgejo, Metrics.SIZE)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_repositories_metrics.csv")
df_bitbucket = pd.read_csv("Bitbucket_data/bitbucket_repositories_metrics.csv")
df_gitea = pd.read_csv("Gitea_data/gitea_repositories_metrics.csv")
df_forgejo = pd.read_csv("Forgejo_data/forgejo_repositories_metrics.csv")

# Create the plot
platform_analysis.plot_numeric_distribution(df_github, df_gitlab, df_bitbucket, df_gitea, df_forgejo, Metrics.CONTRIBUTOR)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_repositories_metrics.csv")
df_bitbucket = pd.read_csv("Bitbucket_data/bitbucket_repositories_metrics.csv")
df_gitea = pd.read_csv("Gitea_data/gitea_repositories_metrics.csv")
df_forgejo = pd.read_csv("Forgejo_data/forgejo_repositories_metrics.csv")

# Create the plot
platform_analysis.plot_numeric_distribution(df_github, df_gitlab, df_bitbucket, df_gitea, df_forgejo, Metrics.BRANCH)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_repositories_metrics.csv")
df_bitbucket = pd.read_csv("Bitbucket_data/bitbucket_repositories_metrics.csv")
df_gitea = pd.read_csv("Gitea_data/gitea_repositories_metrics.csv")
df_forgejo = pd.read_csv("Forgejo_data/forgejo_repositories_metrics.csv")

# Create the plot
platform_analysis.plot_numeric_distribution(df_github, df_gitlab, df_bitbucket, df_gitea, df_forgejo, Metrics.ISSUE)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_repositories_metrics.csv")
df_bitbucket = pd.read_csv("Bitbucket_data/bitbucket_repositories_metrics.csv")
df_gitea = pd.read_csv("Gitea_data/gitea_repositories_metrics.csv")
df_forgejo = pd.read_csv("Forgejo_data/forgejo_repositories_metrics.csv")

# Create the plot
platform_analysis.plot_numeric_distribution(df_github, df_gitlab, df_bitbucket, df_gitea, df_forgejo, Metrics.PULL_REQUEST)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_repositories_metrics.csv")
df_bitbucket = pd.read_csv("Bitbucket_data/bitbucket_repositories_metrics.csv")
df_gitea = pd.read_csv("Gitea_data/gitea_repositories_metrics.csv")
df_forgejo = pd.read_csv("Forgejo_data/forgejo_repositories_metrics.csv")

# Create the plot
platform_analysis.plot_numeric_distribution(df_github, df_gitlab, df_bitbucket, df_gitea, df_forgejo, Metrics.UPDATED)

# Correlation between our metrics

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_repositories_metrics.csv")
df_bitbucket = pd.read_csv("Bitbucket_data/bitbucket_repositories_metrics.csv")
df_gitea = pd.read_csv("Gitea_data/gitea_repositories_metrics.csv")
df_forgejo = pd.read_csv("Forgejo_data/forgejo_repositories_metrics.csv")

print("GITHUB")
platform_analysis.create_correlation_matrix(df_github)
print("GITLAB")
platform_analysis.create_correlation_matrix(df_gitlab)
print("BITBUCKET")
platform_analysis.create_correlation_matrix(df_bitbucket)
print("GITEA")
platform_analysis.create_correlation_matrix(df_gitea)
print("FORGEJO")
platform_analysis.create_correlation_matrix(df_forgejo)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_repositories_metrics.csv")
df_bitbucket = pd.read_csv("Bitbucket_data/bitbucket_repositories_metrics.csv")
df_gitea = pd.read_csv("Gitea_data/gitea_repositories_metrics.csv")
df_forgejo = pd.read_csv("Forgejo_data/forgejo_repositories_metrics.csv")

print("GITHUB")
platform_analysis.plot_lorenz_curve(df_github['#commits'])
print("GITLAB")
platform_analysis.plot_lorenz_curve(df_gitlab['#commits'])
print("BITBUCKET")
platform_analysis.plot_lorenz_curve(df_bitbucket['#commits'])
print("GITEA")
platform_analysis.plot_lorenz_curve(df_gitea['#commits'])
print("FORGEJO")
platform_analysis.plot_lorenz_curve(df_forgejo['#commits'])

# Statistical analysis using propensity score matching

### Size comparison

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics_with_date_bias.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_repositories_metrics_with_date_bias.csv")

# Perform matching and create the plot
platform_analysis.propensity_score_matching(df_github, df_gitlab, Metrics.CONTRIBUTOR, with_replacement=True)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics_with_date_bias.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_repositories_metrics_with_date_bias.csv")

df = pd.concat([df_github, df_gitlab], ignore_index=True)
# Perform matching and create the plot
platform_analysis.run_psm_analysis(df, covariates=["#branches", "#contributors", "size"]) #ajouter les dates de création

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_bitbucket = pd.read_csv("Bitbucket_data/bitbucket_repositories_metrics.csv")

# Perform matching and create the plot
platform_analysis.propensity_score_matching(df_github, df_bitbucket, Metrics.SIZE, scale="log", max_difference=0.1)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_gitea = pd.read_csv("Gitea_data/gitea_repositories_metrics.csv")

# Perform matching and create the plot
platform_analysis.propensity_score_matching(df_github, df_gitea, Metrics.SIZE, scale="log", max_difference=0.1)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_forgejo = pd.read_csv("Forgejo_data/forgejo_repositories_metrics.csv")

# Perform matching and create the plot
platform_analysis.propensity_score_matching(df_github, df_forgejo, Metrics.SIZE, scale="log", max_difference=0.1)

### Commit comparison

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_repositories_metrics.csv")

# Perform matching and create the plot
platform_analysis.propensity_score_matching(df_github, df_gitlab, Metrics.COMMIT, scale="log", max_difference=0.1)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_bitbucket = pd.read_csv("Bitbucket_data/bitbucket_repositories_metrics.csv")

# Perform matching and create the plot
platform_analysis.propensity_score_matching(df_github, df_bitbucket, Metrics.COMMIT, scale="log", max_difference=0.1)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_gitea = pd.read_csv("Gitea_data/gitea_repositories_metrics.csv")

# Perform matching and create the plot
platform_analysis.propensity_score_matching(df_github, df_gitea, Metrics.COMMIT, scale="log", max_difference=0.1)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_forgejo = pd.read_csv("Forgejo_data/forgejo_repositories_metrics.csv")

# Perform matching and create the plot
platform_analysis.propensity_score_matching(df_github, df_forgejo, Metrics.COMMIT, scale="log", max_difference=0.1)

### Issue comparison

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_repositories_metrics.csv")

# Perform matching and create the plot
platform_analysis.propensity_score_matching(df_github, df_gitlab, Metrics.ISSUE, scale="log", max_difference=0.1)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_gitea = pd.read_csv("Gitea_data/gitea_repositories_metrics.csv")

# Perform matching and create the plot
platform_analysis.propensity_score_matching(df_github, df_gitea, Metrics.ISSUE, scale="log", max_difference=0.1)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_forgejo = pd.read_csv("Forgejo_data/forgejo_repositories_metrics.csv")

# Perform matching and create the plot
platform_analysis.propensity_score_matching(df_github, df_forgejo, Metrics.ISSUE, scale="log", max_difference=0.1)

### Pull request comparison

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_gitlab = pd.read_csv("Gitlab_data/gitlab_repositories_metrics.csv")

# Perform matching and create the plot
platform_analysis.propensity_score_matching(df_github, df_gitlab, Metrics.PULL_REQUEST, scale="log", max_difference=0.1)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_gitea = pd.read_csv("Gitea_data/gitea_repositories_metrics.csv")

# Perform matching and create the plot
platform_analysis.propensity_score_matching(df_github, df_gitea, Metrics.PULL_REQUEST, scale="linear", max_difference=0.1)

In [None]:
# Load Data
df_github = pd.read_csv("Github_data/github_repositories_metrics.csv")
df_forgejo = pd.read_csv("Forgejo_data/forgejo_repositories_metrics.csv")

# Perform matching and create the plot
platform_analysis.propensity_score_matching(df_github, df_forgejo, Metrics.PULL_REQUEST, scale="log", max_difference=0.1)