In [None]:
import pandas as pd
import numpy as np
import os

# Load the data
teams = pd.read_csv('../Data/MTeams.csv')
regular_season = pd.read_csv('../Data/MRegularSeasonDetailedResults.csv')

# Filter for seasons 2019-2024
regular_season = regular_season[regular_season['Season'] >= 2019]

# Function to calculate team statistics for a single season
def calculate_team_stats(team_id, season_data):
    """Calculate all requested statistics for a team in a specific season"""
    
    # Filter games where team participated (as winner or loser)
    team_games_won = season_data[season_data['WTeamID'] == team_id]
    team_games_lost = season_data[season_data['LTeamID'] == team_id]
    total_games = len(team_games_won) + len(team_games_lost)
    
    if total_games == 0:
        return None  # No games in this season
    
    stats = {}
    # Put TeamID and Season at the beginning for better readability
    stats['TeamID'] = team_id
    stats['Season'] = season_data['Season'].iloc[0] if len(season_data) > 0 else None
    stats['Games'] = total_games
    
    # Points per game
    points_scored = team_games_won['WScore'].sum() + team_games_lost['LScore'].sum()
    stats['PointsPerGame'] = points_scored / total_games
    
    # Points allowed per game
    points_allowed = team_games_won['LScore'].sum() + team_games_lost['WScore'].sum()
    stats['PointsAllowedPerGame'] = points_allowed / total_games
    
    # Field goal percentages
    fgm = team_games_won['WFGM'].sum() + team_games_lost['LFGM'].sum()
    fga = team_games_won['WFGA'].sum() + team_games_lost['LFGA'].sum()
    fgm3 = team_games_won['WFGM3'].sum() + team_games_lost['LFGM3'].sum()
    fga3 = team_games_won['WFGA3'].sum() + team_games_lost['LFGA3'].sum()
    
    stats['FG%'] = fgm / fga if fga > 0 else 0
    stats['3PT%'] = fgm3 / fga3 if fga3 > 0 else 0
    stats['2PT%'] = (fgm - fgm3) / (fga - fga3) if (fga - fga3) > 0 else 0
    stats['3PTAttemptsPerGame'] = fga3 / total_games
    
    # Rebounds
    offensive_rebounds = team_games_won['WOR'].sum() + team_games_lost['LOR'].sum()
    defensive_rebounds = team_games_won['WDR'].sum() + team_games_lost['LDR'].sum()
    total_rebounds = offensive_rebounds + defensive_rebounds
    
    stats['ReboundsPerGame'] = total_rebounds / total_games
    stats['OffensiveReboundsPerGame'] = offensive_rebounds / total_games
    stats['DefensiveReboundsPerGame'] = defensive_rebounds / total_games
    
    # Opponent rebounds
    opp_offensive_rebounds = team_games_won['LOR'].sum() + team_games_lost['WOR'].sum()
    opp_defensive_rebounds = team_games_won['LDR'].sum() + team_games_lost['WDR'].sum()
    opp_total_rebounds = opp_offensive_rebounds + opp_defensive_rebounds
    
    stats['OppReboundsPerGame'] = opp_total_rebounds / total_games
    stats['OppOffensiveReboundsPerGame'] = opp_offensive_rebounds / total_games
    
    # Turnovers and steals
    turnovers = team_games_won['WTO'].sum() + team_games_lost['LTO'].sum()
    opp_turnovers = team_games_won['LTO'].sum() + team_games_lost['WTO'].sum()
    steals = team_games_won['WStl'].sum() + team_games_lost['LStl'].sum()
    
    stats['TurnoversPerGame'] = turnovers / total_games
    stats['ForcedTurnoversPerGame'] = opp_turnovers / total_games
    stats['StealsPerGame'] = steals / total_games
    
    # Blocks
    blocks = team_games_won['WBlk'].sum() + team_games_lost['LBlk'].sum()
    stats['BlocksPerGame'] = blocks / total_games
    
    # Free throws
    fta = team_games_won['WFTA'].sum() + team_games_lost['LFTA'].sum()
    ftm = team_games_won['WFTM'].sum() + team_games_lost['LFTM'].sum()
    
    stats['FTAttemptsPerGame'] = fta / total_games
    stats['FT%'] = ftm / fta if fta > 0 else 0

    
    # Win percentage
    stats['Wins'] = len(team_games_won)
    stats['Losses'] = len(team_games_lost)
    stats['WinPct'] = len(team_games_won) / total_games if total_games > 0 else 0
    
    return stats

# Process all teams for seasons 2019-2024
all_stats = []

# Get unique seasons
seasons = sorted(regular_season['Season'].unique())

# Process each team for each season
for team_id in teams['TeamID']:
    team_name = teams[teams['TeamID'] == team_id]['TeamName'].iloc[0]
    
    for season in seasons:
        season_data = regular_season[regular_season['Season'] == season]
        team_stats = calculate_team_stats(team_id, season_data)
        
        if team_stats:  # If team played in this season
            # Place TeamName near the beginning of the dictionary for visibility
            team_stats_with_name = {'TeamName': team_name}
            team_stats_with_name.update(team_stats)
            all_stats.append(team_stats_with_name)

# Create DataFrame of all team stats
team_stats_df = pd.DataFrame(all_stats)

# Save the processed data as CSV and JSON
team_stats_df.to_csv('../prepared_data/processed_team_stats_2019_2024.csv', index=False)
team_stats_df.to_json('../prepared_data/processed_team_stats_2019_2024.json', orient='records')

# Output JSON directly for preview (first 5 records)
json_sample = team_stats_df.head(10).to_json(orient='records', indent=2)
print("Sample of processed team statistics (first 10 records):")
print(json_sample)

# Print a simple list of teams and seasons for quick reference
print("\nTeams and seasons included in the dataset:")
team_season_summary = team_stats_df[['TeamName', 'Season', 'Wins', 'Losses', 'WinPct']].head(20)
print(team_season_summary.to_string(index=False))

Sample of processed team statistics (first 10 records):
[
  {
    "TeamName":"Abilene Chr",
    "TeamID":1101,
    "Season":2019,
    "Games":29,
    "PointsPerGame":71.724137931,
    "PointsAllowedPerGame":64.8965517241,
    "FG%":0.4588014981,
    "3PT%":0.3832116788,
    "2PT%":0.4981024668,
    "3PTAttemptsPerGame":18.8965517241,
    "ReboundsPerGame":31.9655172414,
    "OffensiveReboundsPerGame":9.0689655172,
    "DefensiveReboundsPerGame":22.8965517241,
    "OppReboundsPerGame":32.5862068966,
    "OppOffensiveReboundsPerGame":8.8275862069,
    "TurnoversPerGame":11.6551724138,
    "ForcedTurnoversPerGame":15.724137931,
    "StealsPerGame":8.0,
    "BlocksPerGame":2.5517241379,
    "FTAttemptsPerGame":19.0689655172,
    "FT%":0.7233273056,
    "PossessionsPerGame":66.8853448276,
    "Wins":23,
    "Losses":6,
    "WinPct":0.7931034483,
    "EstPointsOffTurnoversPerGame":14.3324401573,
    "EstPointsAllowedOffTurnoversPerGame":9.6123212963,
    "EstSecondChancePointsPerGame":6.8075