## Generates the poule sheets for Judo+ tournaments  
#### Work by [Gonçalo Sousa](https://github.com/Sousa1909) for [AJDS](https://github.com/AJD-Santarem)  

#### Script 1
---

In [None]:
import pandas as pd
from openpyxl import load_workbook
import os
import yaml
import random

# Names for the output PDFs
tournament_code = "REPLACE_ME"
output_folder = "Output/" + tournament_code

# Read the Excel file (Masculine and Feminine sheets) without skipping rows
df_masc = pd.read_excel('Input/REPLACE_ME.xlsx', sheet_name='MASC')
df_fem = pd.read_excel('Input/REPLACE_ME.xlsx', sheet_name='FEM')

# Create Input Folder if it doesn't exist
if not os.path.exists("Input"):
    os.makedirs("Input")
    print("Input folder does not exist. Generating...")

Table Manipulation

In [106]:
# Function to process the data (drop the first row, set column names, and sort)
def process_table(df):
    # Drop the first row ( TITLE )
    df = df.drop(index=[0])

    # Set new column names
    df.columns = ['NAME', 'AGE_TIER', 'BIRTH_YEAR', 'WEIGHT', 'CLUB']

    # Sort by CLUB (ascending) and WEIGHT (ascending)
    df = df.sort_values(by=['AGE_TIER', 'BIRTH_YEAR', 'WEIGHT'], ascending=[True, True, True])

    return df

# Process both tables
df_masc = process_table(df_masc)
df_fem = process_table(df_fem)

Athlete Grouping and Draw

In [None]:
# Helper function to check if a group has an athlete from the same club
def has_same_club(group, club):
    return any(athlete['CLUB'] == club for athlete in group)

# Function to divide athletes into ideal group sizes (3, 4, or 5 if necessary)
def divide_into_groups(athletes, column_order, target_group_sizes=[4, 3]):
    random.shuffle(athletes)
    result_groups = []

    i = 0
    while i < len(athletes):
        remaining = len(athletes) - i

        # Try to form a group with preferred sizes (4 or 3)
        for size in target_group_sizes:
            if remaining >= size or remaining == 2:  # Handle the case where only 2 athletes remain
                group = []
                for athlete in athletes[i:i + size]:
                    if has_same_club(group, athlete['CLUB']):
                        # Try adding the athlete to an existing group if possible
                        added = False
                        if result_groups:
                            for prev_group in result_groups:
                                if len(prev_group) < 5 and not has_same_club(prev_group.to_dict(orient='records'), athlete['CLUB']):
                                    prev_group = pd.concat([prev_group, pd.DataFrame([athlete])])
                                    added = True
                                    break
                        if not added:
                            group.append(athlete)
                    else:
                        group.append(athlete)
                # Convert the group to DataFrame and add to results
                group_df = pd.DataFrame(group)
                result_groups.append(group_df[column_order])
                i += size
                break
        else:
            # Case where only 1 athlete is left
            group = athletes[i:]
            group_df = pd.DataFrame(group)
            result_groups.append(group_df[column_order])
            break

    # Handle a lone athlete by merging them with another group
    if len(result_groups[-1]) == 1:
        lone_athlete = result_groups.pop()
        for group in result_groups:
            if len(group) < 5:
                result_groups[result_groups.index(group)] = pd.concat([group, lone_athlete], ignore_index=True)
                break

    return result_groups


# Function to group athletes by 4kg weight ranges and detect outliers
def group_by_weight_and_divide(df, weight_range):
    column_order = column_order = ['NAME', 'AGE_TIER', 'BIRTH_YEAR', 'WEIGHT', 'CLUB']
    df = df.sort_values(by='WEIGHT').reset_index(drop=True)
    
    groups = []
    outliers = []
    current_group = []
    current_limit = df.iloc[0]['WEIGHT'] + weight_range
    
    for _, row in df.iterrows():
        # Check if the current athlete's weight is within the allowed range
        if row['WEIGHT'] <= current_limit:
            current_group.append(row)
        else:
            # Check if the current group has valid members to form a subgroup
            if len(current_group) > 1:
                groups.append(current_group)
            else:
                # If only one athlete is left in a group, consider them as a potential outlier
                outliers.extend(current_group)
            
            # Start a new group with the current athlete
            current_group = [row]
            current_limit = row['WEIGHT'] + weight_range
    
    # Handle the last group
    if len(current_group) > 1:
        groups.append(current_group)
    else:
        outliers.extend(current_group)

    # Divide each weight group into subgroups of 3, 4, or 5
    final_groups = []
    for group in groups:
        athlete_list = [row for row in group]
        divided_groups = divide_into_groups(athlete_list, column_order=column_order)
        final_groups.extend(divided_groups)

    return final_groups, outliers


# Function to handle "Benjamim" age tier by consecutive birth years
def group_benjamim_by_birth_year(df, weight_range=4.0):
    grouped_by_years = []
    all_outliers = []

    min_year = df['BIRTH_YEAR'].min()
    max_year = df['BIRTH_YEAR'].max()

    for year in range(min_year, max_year, 2):
        subset = df[(df['BIRTH_YEAR'] == year) | (df['BIRTH_YEAR'] == year + 1)]
        if not subset.empty:
            groups_by_weight, outliers = group_by_weight_and_divide(subset, weight_range)
            grouped_by_years.append((f"{year}-{year + 1}", groups_by_weight))
            all_outliers.extend(outliers)

    return grouped_by_years, all_outliers

# Group athletes by AGE_TIER and weight, with outlier detection
def group_by_age_and_weight(df, weight_range=4.0):
    grouped_by_age_tier = []
    all_outliers = []

    for age_tier, group in df.groupby('AGE_TIER'):
        print(f"Grouping athletes in {age_tier}...")
        
        if age_tier == "Benjamim":
            groups_by_year, outliers = group_benjamim_by_birth_year(group, weight_range)
            grouped_by_age_tier.extend(groups_by_year)
            all_outliers.extend(outliers)
        else:
            groups_by_weight, outliers = group_by_weight_and_divide(group, weight_range)
            grouped_by_age_tier.append((age_tier, groups_by_weight))
            all_outliers.extend(outliers)

    return grouped_by_age_tier, all_outliers

def save_outliers_to_yaml(outliers, gender):
    output_yaml_file = f'{output_folder}/.outliers_{gender}.yaml'
    
    # Structure to hold outliers data for YAML
    outliers_data = [
        {
            'Name': athlete['NAME'],
            'Weight': athlete['WEIGHT'],
            'Club': athlete['CLUB'],
            'Age Tier': athlete['AGE_TIER'],
            'Birth Year': athlete['BIRTH_YEAR']
            
        }
        for athlete in outliers
    ]

    # Save outliers data as YAML
    with open(output_yaml_file, 'w', encoding='utf-8') as f:
        yaml.dump({'Outliers': outliers_data}, f, allow_unicode=True, sort_keys=False)

    print(f"Outliers for {gender} have been saved to {output_yaml_file}.")

# Save grouped athletes to YAML with outliers
def save_grouped_athletes_to_yaml(groups_by_age, outliers, gender):
    os.makedirs(output_folder, exist_ok=True)
    output_yaml_file = f'{output_folder}/.grouped_athletes_{gender}.yaml'

    grouped_data = {}

    for age_tier, groups in groups_by_age:
        if "-" in age_tier and age_tier.split("-")[0].isdigit():
            age_tier = f"Benjamim {age_tier}"
        grouped_data[age_tier] = []  # Initialize each age tier section
        for i, group in enumerate(groups):
            group_dict = {
                f'Group {i + 1}': [
                    {
                        'Name': athlete['NAME'],
                        'Weight': athlete['WEIGHT'],
                        'Club': athlete['CLUB'],
                        'Birth Year': athlete['BIRTH_YEAR'],
                    }
                    for _, athlete in group.iterrows()
                ]
            }
            grouped_data[age_tier].append(group_dict)

    # Add outliers to the YAML structure with complete information
    if outliers:
        save_outliers_to_yaml(outliers, gender)

    # Save the structured data as YAML
    with open(output_yaml_file, 'w', encoding='utf-8') as f:
        yaml.dump(grouped_data, f, allow_unicode=True, sort_keys=False)

    print(f"Grouped athletes for {gender} have been saved to {output_yaml_file}.")

# Process both tables and include outliers in the output
groups_fem, outliers_fem = group_by_age_and_weight(df_fem)
save_grouped_athletes_to_yaml( groups_fem, outliers_fem, 'fem')

groups_mas, outliers_mas = group_by_age_and_weight(df_masc)
save_grouped_athletes_to_yaml( groups_mas, outliers_mas, 'mas')
