# Start

In [0]:
# Essential Imports
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
import os
import warnings

# Niche Imports
import chardet

# Tools
def load_csv(csv_file):
    with open(csv_file, "rb") as f:
        raw_data = f.read()
        result = chardet.detect(raw_data)
    
    # print(result)  # Check detected encoding
    df = pd.read_csv(csv_file, encoding=result["encoding"])
    return df

In [0]:
comp_dir = '/Workspace/Repos/aeo-chiyou-ip@aeondic.onmicrosoft.com/kaggles/March_Machine_Learning_Mania_2025/data'
mens_df = load_csv(os.path.join(comp_dir, 'MTeams.csv'))
womens_df = load_csv(os.path.join(comp_dir, 'WTeams.csv'))
for gender in ['Mens', 'Womens']:
    print(f"Investigating {gender} Team")
    if gender == 'Mens':
        df = mens_df
    else:
        df = womens_df
    display(df.describe())
    display(df.head())

# Durations

In [0]:
# Plot the durations of each team
for gender in ['Mens']:
    print(f"Investigating {gender} Team")
    df = load_csv(os.path.join(comp_dir, 'MTeams.csv'))

    # Calculate the width of the bar
    df['Widths'] = df['LastD1Season'] - df['FirstD1Season']
    

    # # Plot bars
    fig, ax = plt.subplots(figsize=(15,60))
    # ax.barh(df['TeamName'], df['Widths'], left=df['FirstD1Season'], color='blue', edgecolor='black')
    # ax.invert_yaxis()
    # # Labels and grid
    # ax.set_xlabel("Value")
    # ax.set_ylabel("Bars")
    # ax.set_title("Horizontal Bars from Start to End Values")
    # ax.grid(axis='x', linestyle='--', alpha=0.7)
    # plt.tight_layout()
    
    # plt.show()

    sns.barplot(
        data=df,
        y="TeamName",        # Y-axis (categorical variable)
        x="Widths",          # X-axis (bar length)
        hue=None,            # No grouping
        orient="h",          # Horizontal bars
        color="blue",        # Bar color
        edgecolor="black",   # Border color
        ax=ax                # Use the existing axis
    )
    # Offset each bar by the start
    for i, (start, width) in enumerate(zip(df["FirstD1Season"], df["Widths"])):
        ax.patches[i].set_x(start)  # Shift bar to start position
    ax.set_xlim(df["FirstD1Season"].min(), df["LastD1Season"].max())  # Fit all bars correctly
    # Show x-axis at the top as well
    ax.xaxis.set_ticks_position("both")  # Show ticks on both top and bottom
    ax.xaxis.set_label_position("top")   # Move x-axis label to the top
    ax.tick_params(axis="x", which="both", labeltop=True, labelbottom=True)  # Show tick labels at the top
    ax.spines["top"].set_visible(True)   # Show the top spine (border)
    ax.set_xlabel("")
    plt.title('Durations of Mens NCAA Teams')
    plt.show()