# Production Returning for 2024-25 Season
- Book will calculate how much of each teams statistical production (goals, assists, pen, +/-) was lost in the offseason and how much is returning

In [89]:
# Dependencies

import os
import sys
import pandas as pd
import numpy as np


# path to TEMP folder
temp_folder = os.path.join(os.getcwd(), '..', 'TEMP')
# Data folder
data_folder = os.path.join(os.getcwd(), '..', 'data')
# Image folder
img_folder = os.path.join(os.getcwd(), '..', 'images')
# Logo folder
logo_folder = os.path.join(os.getcwd(), '..', 'images', 'logos')

## Path to 2023-24 stats (csv)
stats_file = os.path.join(data_folder, 'player_stats_2023_v1.csv')
stats_2023_df = pd.read_csv(stats_file)


################################################################################
roster_file = os.path.join(data_folder, 'roster_2024_current_v2.csv') # PATH TO THE ROSTER FILE
roster_2024_df = pd.read_csv(roster_file) # read as dataframe

# Path to school info table (csv)
school_info_file = os.path.join(data_folder, 'arena_school_info.csv')
school_info_df = pd.read_csv(school_info_file)

## Dictionaries and Constants

In [90]:
## Conference Membership

atlantic = ['Air Force', "American Intl", 'Army', 'Bentley', 'Canisius', 'Holy Cross', 'Mercyhurst', 
            'Niagara', 'RIT', 'Robert Morris', 'Sacred Heart', 'St Lawrence']

big_ten = ['Michigan', 'Michigan State', 'Minnesota', 'Notre Dame', 'Ohio State', 'Penn State', 'Wisconsin']

ccha = ['Augustana', 'Bemidji State', 'Bowling Green', 'Ferris State', 'Lake Superior', 'Michigan Tech', 
        'Minnesota State', 'Northern Michigan', 'St Thomas']

ecac = ['Brown', 'Clarkson', 'Colgate', 'Cornell', 'Dartmouth', 'Harvard', 'Princeton', 'Quinnipiac',
        'Rensselaer', 'St Lawrence', 'Union', 'Yale']

hockey_east = ['Boston College', 'Boston University', 'Connecticut', 'Maine', 'Massachusetts', 'Mass Lowell',
                'Merrimack', 'New Hampshire', 'Northeastern', 'Providence', 'Vermont']

nchc = ['Arizona State', 'Colorado College', 'Denver', 'Miami', 'Minnesota Duluth', 'North Dakota', 'Omaha', 'St Cloud State',
        'Western Michigan']

independents = ['Alaska Anchorage', 'Alaska', 'Lindenwood', 'Long Island', 'Stonehill']

# Create a dictionary of {Team: logo_abv} for each team with .png added to the end
logo_mapping = {}

for index, row in school_info_df.iterrows():
    logo_mapping[row['Team']] = row['logo_abv'] + '.png'

# print(logo_mapping)

### Data Modifications
- make sure columns and format matach between the stats df and the new roster df

In [91]:



# Remove - from Team names
stats_2023_df['Team'] = stats_2023_df['Team'].str.replace('-', ' ')
# roster_2024_df['Team'] = roster_2024_df['Team'].str.replace('-', ' ')
stats_2023_df['Team'] = stats_2023_df['Team'].str.replace('.', '')
stats_2023_df['Team'] = stats_2023_df['Team'].str.replace("'", '')

stats_2023_df.head()

Unnamed: 0,Clean_Player,Team,G,A,Pts,plus_minus,Sh,PIM,Games_Played
0,A.J. Hodges,Bentley,6,9,15,-1,57,2,29
1,A.J. Macaulay,Alaska,5,10,15,9,44,14,34
2,AJ Casperson,Long Island,0,1,1,1,7,2,12
3,Aaron Bohlinger,Massachusetts,3,5,8,1,22,4,34
4,Aaron Grounds,Long Island,1,2,3,-5,14,16,11


In [92]:


# Rename Current Team to Team
roster_2024_df = roster_2024_df.rename(columns={'Current Team': 'Team'})

# Create ne column called CLean_Player by merging First and Last Name
roster_2024_df['Clean_Player'] = roster_2024_df['First_Name'] + ' ' + roster_2024_df['Last_Name']
# Strip leading and trailing spaces
roster_2024_df['Clean_Player'] = roster_2024_df['Clean_Player'].str.strip()

# If 

roster_2024_df.head()

# EXPORT ROSTER FILE TO TEMP FOLDER
# roster_2024_df.to_csv(os.path.join(temp_folder, 'roster_2024_transformed.csv'), index=False)


Unnamed: 0,Team,Last_Name,First_Name,No,Position,Yr,Ht,Wt,DOB,Hometown,Height_Inches,Draft_Year,NHL_Team,D_Round,Last Team,League,City,State_Province,Country,Clean_Player
0,Lake Superior,Barone,Adam,6,Defensemen,Fr,6-1,174,5/6/2004,"Sault Ste. Marie, Ont.",73,,,,Trail,BCHL,Sault Ste. Marie,Ont.,Canada,Adam Barone
1,Lake Superior,Blanchett,Jack,16,Defensemen,So,5-11,185,5/12/2003,"Monroe, Mich.",71,,,,Powell,BCHL,Monroe,Mich.,USA,Jack Blanchett
2,Lake Superior,Brown,Mike,3,Defensemen,Jr,6-2,209,4/3/2001,"Belmont, Mass.",74,,,,Merrimack,,Belmont,Mass.,USA,Mike Brown
3,Lake Superior,Bushy,Evan,5,Defensemen,So,6-1,195,3/26/2002,"Mankato, Minn.",73,,,,Trail,BCHL,Mankato,Minn.,USA,Evan Bushy
4,Lake Superior,Conrad,Jacob,4,Defensemen,Fr,5-11,180,5/18/2002,"Green Bay, Wis.",71,,,,Fairbanks,NAHL,Green Bay,Wis.,USA,Jacob Conrad


In [93]:
# Step 1: Match the players from 2023 stats to 2024 roster based on 'Clean_Player'
# We'll merge the two DataFrames on 'Clean_Player' to identify returning players
merged_df = pd.merge(stats_2023_df, roster_2024_df[['Clean_Player', 'Team']], on='Clean_Player', how='left', suffixes=('_2023', '_2024'))

# Step 2: Classify players as "Returning" or "Departed"
merged_df['Status'] = merged_df['Team_2024'].notna().map({True: 'Returning', False: 'Departed'})

# HOTFIX TO DEAL WITH TRANSFERRED PLAYERS
# If Team_2023 is not equal to Team_2024, then the player transferred - set Status to 'Departed'
merged_df.loc[merged_df['Team_2023'] != merged_df['Team_2024'], 'Status'] = 'Departed'

# Step 3: Calculate total production for each group (Returning and Departed) by team
team_production = merged_df.groupby(['Team_2023', 'Status']).agg({
    'G': 'sum',
    'A': 'sum',
    'Sh': 'sum',
    'PIM': 'sum',
    'Pts': 'sum',
    'Games_Played': 'sum'
}).reset_index()

# Step 4: Pivot the table to make "Returning" and "Departed" production easy to compare for each team
team_production_pivot = team_production.pivot(index='Team_2023', columns='Status', values=['G', 'A', 'Pts', 'Sh', 'PIM', 'Games_Played']).fillna(0)

# Flatten the MultiIndex for clarity
team_production_pivot.columns = ['_'.join(col).strip() for col in team_production_pivot.columns.values]



In [94]:
team_production_pivot.head(35)
# merged_df.head(30)

# team_production_pivot.head()

# Check Michigan State for example
# merged_df[merged_df['Team_2023'] == 'Michigan State']



Unnamed: 0_level_0,G_Departed,G_Returning,A_Departed,A_Returning,Pts_Departed,Pts_Returning,Sh_Departed,Sh_Returning,PIM_Departed,PIM_Returning,Games_Played_Departed,Games_Played_Returning
Team_2023,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Air Force,53,62,90,97,143,159,541,635,230,196,322,436
Alaska,74,36,127,68,201,104,654,371,325,197,395,283
Alaska Anchorage,44,51,84,72,128,123,541,451,246,191,351,331
American Intl,52,67,101,92,153,159,606,580,238,196,454,347
Arizona State,70,59,91,129,161,188,546,578,213,366,331,424
Army,33,65,71,108,104,173,319,734,125,236,221,519
Augustana,29,59,46,102,75,161,254,648,91,187,206,471
Bemidji State,46,71,79,124,125,195,418,713,76,171,232,529
Bentley,14,81,20,150,34,231,174,892,69,302,133,565
Boston College,85,98,118,205,203,303,555,753,101,280,274,541
