In [1]:
import requests
from datetime import datetime, timedelta
import os

# Get the GitHub token from the environment
github_token = os.getenv("GITHUB_TOKEN")

if not github_token:
    raise EnvironmentError("GITHUB_TOKEN is not set in the environment.")

# Define your repository and authentication details
# Define GitHub owner and token
ORG = "TeamMoegMC"  # Replace with the user or organization name
OWNER = "TeamMoegMC"  # Replace with the owner of the repo
REPO = "FrostedHeart"  # Replace with your repo name
TOKEN = github_token  # Replace with your GitHub token

WHITE_LIST_REPOS = [
        "TWRStartupScript",
        "FrostedHeart",
        "Thermopolium",
        "TWRAutoUpdate",
        "TWRProjectTracker",
        "TSSAutoPack",
        "TssapConfigs",
        "ImmersiveIndustry",
        "SteamPowered",
        "CharcoalPit2",
        "StoneAge",
        "TheWinterRescue"
    ]

WAIVED_USERS = [
    "YueSha",
    "duck_egg",
    "khjxiaogu",
    "Hydracor",
    "dasb"
]

QUITED_USERS = [
    # "大家好啊我是黑桃",
    # "KilaBash",
    # "GuardianWorld",
    # "Collin Marando",
    # "ewoudje"
]

APPENDING_CONTRIBUTORS = {
    "past_year": [],
    "past_half_year": [],
    "past_three_months": [],
    "past_month": ["lanshan"]
}

# Headers for authentication
HEADERS = {"Authorization": f"token {TOKEN}"}

In [6]:
def analyze_task_contributors(file_path):
    """
    Analyzes contributor activity from a CSV file and returns contributors grouped
    by their activity in the past year, half year, three months, and one month.
    
    Args:
    - file_path (str): Path to the CSV file containing task data with 'USER_ID' and 'COMPLETE_TIME'.

    Returns:
    - dict: Contributors grouped by activity periods.
    """
    import pandas as pd
    from datetime import datetime, timedelta

    # Load the data
    df = pd.read_csv(file_path)
    
    # Convert COMPLETE_TIME from Unix epoch to datetime, ignore missing values
    df['COMPLETE_TIME'] = pd.to_datetime(df['COMPLETE_TIME'], unit='ms', errors='coerce')
    
    # Current time
    current_time = datetime.utcnow()

    # Define the time ranges
    time_ranges = {
        "past_year": current_time - timedelta(days=365),
        "past_half_year": current_time - timedelta(days=182),
        "past_three_months": current_time - timedelta(days=91),
        "past_month": current_time - timedelta(days=30)
    }

    # Filter contributors based on time ranges
    contributors_by_time = {}
    for label, cutoff in time_ranges.items():
        contributors = df.loc[
            (df['COMPLETE_TIME'] >= cutoff), 'USER_ID'
        ].dropna().unique()
        contributors_by_time[label] = list(contributors)

    return contributors_by_time

In [8]:
import requests
from datetime import datetime, timedelta

# GitHub API base URLs
# REPOS_URL = f"https://api.github.com/users/{OWNER}/repos"  # For individual user repos
REPOS_URL = f"https://api.github.com/orgs/{ORG}/repos"  # Uncomment for organization repos

HEADERS = {"Authorization": f"token {TOKEN}"}

def fetch_all_repos():
    """Fetch all repositories for the owner."""
    repos = []
    page = 1
    print("[INFO] Fetching repositories...")
    while True:
        response = requests.get(REPOS_URL, headers=HEADERS, params={"page": page, "per_page": 100})
        if response.status_code != 200:
            print(f"[ERROR] Error fetching repos: {response.status_code}, {response.text}")
            break
        data = response.json()
        if not data:
            break
        repos.extend(data)
        print(f"[INFO] Retrieved {len(data)} repositories on page {page}.")
        page += 1
    print(f"[INFO] Total repositories fetched: {len(repos)}.")
    return [repo['name'] for repo in repos]

def fetch_commits_for_repo(repo_name):
    """Fetch all commits for a specific repository using pagination."""
    commits_url = f"https://api.github.com/repos/{OWNER}/{repo_name}/commits"
    commits = []
    page = 1
    print(f"[INFO] Fetching commits for repository: {repo_name}...")
    while True:
        response = requests.get(commits_url, headers=HEADERS, params={"page": page, "per_page": 100})
        if response.status_code != 200:
            print(f"[ERROR] Error fetching commits for {repo_name}: {response.status_code}, {response.text}")
            break
        data = response.json()
        if not data:
            break
        commits.extend(data)
        print(f"[INFO] Retrieved {len(data)} commits on page {page} for {repo_name}.")
        page += 1
    print(f"[INFO] Total commits fetched for {repo_name}: {len(commits)}.")
    return commits

def analyze_contributors_all_repos():
    """Analyze contributors across all repositories for the owner."""
    # Fetch all repositories
    repos = fetch_all_repos()
    # remove blacklist repos
    repos = [repo for repo in repos if repo in WHITE_LIST_REPOS]

    contributors_by_time = {"past_year": set(), "past_half_year": set(), "past_three_months": set(), "past_month": set()}
    
    # Current time
    current_time = datetime.utcnow()

    # Define time ranges
    time_ranges = {
        "past_year": current_time - timedelta(days=365),
        "past_half_year": current_time - timedelta(days=182),
        "past_three_months": current_time - timedelta(days=91),
        "past_month": current_time - timedelta(days=30),
    }

    # Fetch and analyze commits for each repository
    print("[INFO] Starting commit analysis...")
    for repo in repos:
        commits = fetch_commits_for_repo(repo)
        for commit in commits:
            try:
                author = commit['commit']['author']['name']
                timestamp = datetime.strptime(commit['commit']['author']['date'], "%Y-%m-%dT%H:%M:%SZ")
                for period, cutoff in time_ranges.items():
                    if timestamp >= cutoff:
                        contributors_by_time[period].add(author)
            except KeyError:
                continue
        print(f"[INFO] Finished analyzing commits for {repo}.")
    
    # Convert sets to lists for output
    contributors_by_time = {key: list(value) for key, value in contributors_by_time.items()}
    print("[INFO] Contributor analysis complete.")
    return contributors_by_time

# Example usage
# github_contributors_by_time = analyze_contributors_all_repos()
# print(github_contributors_by_time)



In [9]:
# A constant map of user aliases in the task data and their corresponding GitHub usernames
# Infer from
'''
Task
{'past_year': ['khjxiaogu', 'GMG', 'Dsanilen', 'gugusb', 'Hydracor', 'winged-hussars', '大家好啊我是黑桃', 'IBlessu', 'ceary', 'Huaji__Qinmi', 'YueSha', 'duck_egg', 'ewoudje'], 'past_half_year': ['Dsanilen', 'winged-hussars', 'IBlessu', 'ewoudje', 'duck_egg'], 'past_three_months': ['Dsanilen'], 'past_month': ['Dsanilen']}
GitHub
{'past_year': ['khjxiaogu', 'dasb', 'GuardianWorld', 'duck_egg', 'Yuqi154', 'jian-wei-liu', 'ceary741', 'ewoudje', 'Winged-Hussars', 'IBlessus', 'Collin Marando', 'Qi-Month', 'ceary', 'yuesha-yc', 'alphagem618', 'goumo', 'Huaji__Qinmi', 'Jacky Wang', 'Gugusb', 'khj xiaogu', 'Yichen Wang', 'KilaBash', 'H. “Lyūke” Monaggem'], 'past_half_year': ['khjxiaogu', 'dasb', 'Qi-Month', 'khj xiaogu', 'Yichen Wang', 'yuesha-yc', 'alphagem618', 'KilaBash', 'goumo', 'duck_egg', 'Yuqi154', 'H. “Lyūke” Monaggem', 'IBlessus', 'jian-wei-liu'], 'past_three_months': ['khjxiaogu', 'dasb', 'Qi-Month', 'khj xiaogu', 'Yichen Wang', 'yuesha-yc', 'alphagem618', 'KilaBash', 'goumo', 'duck_egg', 'Yuqi154', 'H. “Lyūke” Monaggem', 'jian-wei-liu'], 'past_month': ['khjxiaogu', 'dasb', 'Qi-Month', 'Yichen Wang', 'yuesha-yc', 'alphagem618', 'KilaBash', 'goumo', 'duck_egg', 'Yuqi154', 'H. “Lyūke” Monaggem']}
'''
TASK_USER_TO_GITHUB_ALIAS_MAP = {
    "YueSha": "yuesha-yc",
    "YueSha": "Yichen Wang",
    "khjxiaogu": ["khjxiaogu", "khj xiaogu"],
    "GMG": "goumo",
    "Dsanilen": "Dsanilen", # no github
    "gugusb": "Gugusb",
    "Hydracor": "H. “Lyūke” Monaggem",
    "winged-hussars": "Winged-Hussars",
    "大家好啊我是黑桃": "大家好啊我是黑桃", # no github
    "IBlessu": "IBlessus",
    "ceary": ["ceary", "ceary741"],
    "Huaji__Qinmi": "Huaji__Qinmi", # no github
    "duck_egg": "duck_egg",
    "ewoudje": "ewoudje",
    "GuardianWorld": "GuardianWorld", # no task
    "Yuqi154": "Yuqi154", # no task
    "jian-wei-liu": "jian-wei-liu", # no task
    "alphagem618": "alphagem618", # no task
    "KilaBash": "KilaBash", # no task
    "dasb": "dasb", # no task
    "Qi-Month": "Qi-Month", # no task
    "Collin Marando": "Collin Marando", # no task
    "Jacky Wang": "Jacky Wang", # no task
}

In [10]:
def merge_contributor_dicts(task_to_github_map, task_contributors, github_contributors):
    """
    Merges task and GitHub contributor data into a unified dictionary keyed by task user IDs.

    Args:
    - task_to_github_map (dict): A mapping of task user IDs to GitHub aliases.
    - task_contributors (dict): Contributors grouped by time periods from task system.
    - github_contributors (dict): Contributors grouped by time periods from GitHub system.

    Returns:
    - dict: A merged dictionary with task user IDs as keys and activity periods as values.
    """
    # Initialize the merged dictionary
    merged_contributors = {period: set() for period in task_contributors.keys()}
    
    # Reverse the mapping to map GitHub aliases to task user IDs
    github_to_task_map = {}
    for task_id, aliases in task_to_github_map.items():
        if isinstance(aliases, list):
            for alias in aliases:
                github_to_task_map[alias] = task_id
        else:
            github_to_task_map[aliases] = task_id

    # Process task contributors
    for period, task_ids in task_contributors.items():
        for task_id in task_ids:
            merged_contributors[period].add(task_id)

    # Process GitHub contributors
    for period, github_ids in github_contributors.items():
        for github_id in github_ids:
            if github_id in github_to_task_map:
                task_id = github_to_task_map[github_id]
                merged_contributors[period].add(task_id)

    # Convert sets to lists for the final output
    merged_contributors = {period: list(contributors) for period, contributors in merged_contributors.items()}
    
    return merged_contributors

In [12]:
def remove_redundant_contributors(contributors_by_time):
    """
    Removes names from longer time period lists if the name is already in a shorter time period list.

    Args:
    - contributors_by_time (dict): Dictionary with time periods as keys and lists of contributors as values.

    Returns:
    - dict: Updated dictionary with redundant contributors removed.
    """
    # Define time periods in descending order of duration
    time_periods = ["past_year", "past_half_year", "past_three_months", "past_month"]
    
    # Maintain a set of contributors already included in shorter time periods
    seen_contributors = set()
    
    for period in time_periods[::-1]:  # Start from the shortest period
        contributors = contributors_by_time.get(period, [])
        # Remove contributors already seen in shorter periods
        contributors_by_time[period] = [
            contributor for contributor in contributors if contributor not in seen_contributors
        ]
        # Add current contributors to the seen set
        seen_contributors.update(contributors_by_time[period])
    
    return contributors_by_time

In [13]:
def filter_waived_contributors(contributors_by_time, waived_users):
    """
    Filters out waived users from the contributor lists.

    Args:
    - contributors_by_time (dict): Dictionary with time periods as keys and lists of contributors as values.
    - waived_users (list): List of user IDs to be waived from the contributor lists.

    Returns:
    - dict: Updated dictionary with waived users removed.
    """
    for period, contributors in contributors_by_time.items():
        contributors_by_time[period] = [contributor for contributor in contributors if contributor not in waived_users]
    return contributors_by_time

In [14]:
def format_contributors_for_output_en(contributors_by_time):
    """
    Formats the contributors data for output in a human-readable format.

    Args:
    - contributors_by_time (dict): Dictionary with time periods as keys and lists of contributors as values.

    Returns:
    - str: A formatted string representing the contributors data.
    """
    output = ""
    for period, contributors in contributors_by_time.items():
        if contributors:
            output += f"Contributors in the {period}:\n"
            for contributor in contributors:
                output += f"- {contributor}\n"
            output += "\n"
    return output

In [15]:
# chinese version
# map periods also to chinese
PERIODS = {
    "past_year": "过去一年",
    "past_half_year": "过去半年",
    "past_three_months": "过去三个月",
    "past_month": "过去一个月",
}

def format_contributors_for_output(contributors_by_time):
    """
    Formats the contributors data for output in a human-readable format.

    Args:
    - contributors_by_time (dict): Dictionary with time periods as keys and lists of contributors as values.

    Returns:
    - str: A formatted string representing the contributors data.
    """
    output = ""
    for period, contributors in contributors_by_time.items():
        if contributors:
            output += f"{PERIODS[period]}的贡献者:\n"
            for contributor in contributors:
                output += f"- {contributor}\n"
            output += "\n"
    return output

In [16]:
def calculate_awards(contributors, total_awards):
    """
    Calculate the awards for each contributor based on the contribution period.
    
    :param contributors: A dictionary with keys as contribution periods and values as lists of contributors.
    :param total_awards: The total amount of awards to be distributed this month.
    :return: A dictionary with users as keys and the corresponding award amount as values.
    """
    # Define the weight for each contribution period
    weights = {
        "past_month": 12,
        "past_three_months": 6,
        "past_half_year": 3,
        "past_year": 1
    }
    
    # Calculate the total weight
    total_weight = sum(weights[period] * len(contributors[period]) for period in contributors)
    
    # Calculate the value of one weight unit
    unit_value = total_awards / total_weight
    
    # Calculate awards for each user
    awards = {}
    for period, users in contributors.items():
        for user in users:
            awards[user] = round(weights[period] * unit_value, 2)
    
    return awards

# Example usage

total_awards = 1000
calculate_awards(reward_contributors, total_awards)


NameError: name 'reward_contributors' is not defined

In [21]:
# Create Function to Generate the Final Formatted Output
# Set a param to switch between English and Chinese output
# Use markdown format for better readability
# Remember to change line

def generate_final_output(original_contributors, contributors, awards, total_reward, total_usage, language="en"):
    """
    Generate the final formatted output with contributors and their awards.
    
    :param original_contributors: A dictionary with keys as contribution periods and values as lists of contributors, these are before waiving.
    :param contributors: A dictionary with keys as contribution periods and values as lists of contributors.
    :param awards: A dictionary with users as keys and the corresponding award amount as values.
    :param total_reward: The total amount of awards to be distributed this month.
    :param total_usage: The total amount of money used for maintaining the project.
    :param language: The language of the output, either "en" for English or "zh" for Chinese.
    :return: A formatted string representing the final output.
    """
    # Define the language-specific strings
    if language == "en":
        periods = {
            "past_year": "Past Year",
            "past_half_year": "Past Half Year",
            "past_three_months": "Past Three Months",
            "past_month": "Past Month",
        }
        awards_text = "The Winter Rescue Contributor Awards Report"

        # Generate the output
        # Get current year and month
        now = datetime.now()
        current_year_month = f"{now.year}-{now.strftime('%B')}" if language == "en" else f"{now.year}年{now.month}月"
        
        # Generate the output
        output = f"# {awards_text} ({current_year_month})\n\n"

        # Add section for original contributors
        output += f"## Contributors for the Month\n\n"
        for period, users in original_contributors.items():
            output += f"### {periods[period]}\n\n"
            for user in users:
                output += f"- **{user}**\n"
            output += "\n"

        # Add section for those contributors who waived
        output += f"## Contributors Who Waived Awards for the Month\n\n"
        for user in WAIVED_USERS:
            output += f"- **{user}**\n"

        # Add section for those contributors who quited
        output += f"## Contributors Who Quited for the Month\n\n"
        for user in QUITED_USERS:
            output += f"- **{user}**\n"

        # Add section for total usage and total awards
        output += f"## Total Usage and Awards for the Month\n\n"
        output += f"- **Total Usage**: ¥{total_usage}\n\n"
        output += f"- **Total Awards**: ¥{total_reward}\n"

        output += f"## Awards Distribution \n\n"
        for period, users in contributors.items():
            output += f"### {periods[period]}\n\n"
            for user in users:
                award = awards.get(user, 0)
                output += f"- **{user}**: ¥{award}\n"
            output += "\n"
        

    elif language == "zh":
        periods = {
            "past_year": "过去一年",
            "past_half_year": "过去半年",
            "past_three_months": "过去三个月",
            "past_month": "过去一个月",
        }
        awards_text = "冬季救援贡献者情况汇报"

        # Generate the output
        # Get current year and month
        now = datetime.now()
        current_year_month = f"{now.year}-{now.strftime('%B')}" if language == "en" else f"{now.year}年{now.month}月"
        
        # Generate the output
        output = f"# {awards_text} ({current_year_month})\n\n"

        # Add section for original contributors
        output += f"## 本月贡献者情况\n\n"
        for period, users in original_contributors.items():
            output += f"### {periods[period]}\n\n"
            for user in users:
                output += f"- **{user}**\n"
            output += "\n"

        # Add section for those contributors who waived
        output += f"## 本月自愿放弃奖励的贡献者\n\n"
        for user in WAIVED_USERS:
            output += f"- **{user}**\n"

        # Add section for those contributors who quited
        output += f"## 本月退出的贡献者\n\n"
        for user in QUITED_USERS:
            output += f"- **{user}**\n"

        # Add section for total usage and total awards
        output += f"## 本月总奖励和总支出\n\n"
        output += f"- **总支出**: ¥{total_usage}\n\n"
        output += f"- **总奖励**: ¥{total_reward}\n"

        output += f"## 奖励分配情况\n\n"
        for period, users in contributors.items():
            output += f"### {periods[period]}\n\n"
            for user in users:
                award = awards.get(user, 0)
                output += f"- **{user}**: ¥{award}\n"
            output += "\n"

    # Save as md file
    with open(f"{awards_text}_{current_year_month}.md", "w") as file:
        file.write(output)

    from markdown_pdf import Section, MarkdownPdf

    pdf = MarkdownPdf()
    # each section starts from new page in pdf file
    pdf.add_section(Section(output))
    pdf.save(f"{awards_text}_{current_year_month}.pdf")
    
    return output


In [22]:
def produce_markdown_report(file_path, total_awards, total_usage, language="en", appending_contributors={}):
    """
    Produce a markdown report for contributors and their awards.

    Args:
    - file_path (str): Path to the CSV file containing task data with 'USER_ID' and 'COMPLETE_TIME'.
    - total_awards (int): The total amount of awards to be distributed this month.
    - total_usage (int): The total amount of money used for maintaining the project.
    - language (str): The language of the output, either "en" for English or "zh" for Chinese.

    Returns:
    - str: A formatted string representing the final output.
    """
    # Analyze task contributors
    task_contributors_by_time = analyze_task_contributors(file_path)
    
    # Analyze GitHub contributors
    github_contributors_by_time = analyze_contributors_all_repos()
    
    # Merge contributors
    merged_contributors = merge_contributor_dicts(
        TASK_USER_TO_GITHUB_ALIAS_MAP, task_contributors_by_time, github_contributors_by_time
    )
    
    # Remove redundant contributors
    deduplicated_contributors = remove_redundant_contributors(merged_contributors.copy())
    
    # Append additional contributors
    for period, contributors in appending_contributors.items():
        deduplicated_contributors[period].extend(contributors)
    
    # Filter waived contributors
    filtered_contributors = filter_waived_contributors(deduplicated_contributors.copy(), WAIVED_USERS)
    
    # Filter quited contributors
    reward_contributors = filter_waived_contributors(filtered_contributors.copy(), QUITED_USERS)
    
    # Calculate awards
    awards = calculate_awards(reward_contributors, total_awards)
    
    # Generate final output
    final_output = generate_final_output(
        deduplicated_contributors, reward_contributors, awards, total_awards, total_usage, language
    )
    
    return final_output

# Example usage
file_path = 'task_assignee_20241204.csv'
total_awards = 1000
total_usage = 700
final_output = produce_markdown_report(file_path, total_awards, total_usage, language="zh", appending_contributors=APPENDING_CONTRIBUTORS)
print(final_output)
# english version
en_final_output = produce_markdown_report(file_path, total_awards, total_usage, language="en")
print(en_final_output)

[INFO] Fetching repositories...
[INFO] Retrieved 30 repositories on page 1.
[INFO] Total repositories fetched: 30.
[INFO] Starting commit analysis...
[INFO] Fetching commits for repository: CharcoalPit2...
[INFO] Retrieved 100 commits on page 1 for CharcoalPit2.
[INFO] Retrieved 3 commits on page 2 for CharcoalPit2.
[INFO] Total commits fetched for CharcoalPit2: 103.
[INFO] Finished analyzing commits for CharcoalPit2.
[INFO] Fetching commits for repository: FrostedHeart...
[INFO] Retrieved 100 commits on page 1 for FrostedHeart.
[INFO] Retrieved 100 commits on page 2 for FrostedHeart.
[INFO] Retrieved 100 commits on page 3 for FrostedHeart.
[INFO] Retrieved 100 commits on page 4 for FrostedHeart.
[INFO] Retrieved 100 commits on page 5 for FrostedHeart.
[INFO] Retrieved 100 commits on page 6 for FrostedHeart.
[INFO] Retrieved 100 commits on page 7 for FrostedHeart.
[INFO] Retrieved 100 commits on page 8 for FrostedHeart.
[INFO] Retrieved 100 commits on page 9 for FrostedHeart.
[INFO] Re