# Goal: Create Interactive Map of 2025 NCAA Participants

In [7]:
### Setup and Dependencies

### Import constants from config.py
from config import *

### Dependencies
import pandas as pd
import numpy as np
import seaborn as sns
import sqlite3  # Assuming SQL connection for database operations


import matplotlib.pyplot as plt
import matplotlib.font_manager as font_manager
import matplotlib.image as mpimg
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from matplotlib.legend_handler import HandlerTuple
from matplotlib.legend_handler import HandlerBase


### FILE PATHS
TEMP_FOLDER = '../TEMP/'
DATA_FOLDER = '../data/'

## 2024-25 Full Roster Path
roster_path = DATA_FOLDER + 'roster_2025_current_march_25_v4_ex20250325.csv'

full_roster = pd.read_csv(roster_path) # load roster as dataframe
# full_roster.info() # Check to make sure it loaded correctly

In [8]:
### Filter Roster to only teams in the NCAA Tournament
print(ncaa_team_list_2025) # Check the list of Tourney teams from the config file

# Filter to only teams in the NCAA Tournament
roster_ncaa = full_roster[full_roster['Current Team'].isin(ncaa_team_list_2025)]

roster_ncaa.rename(columns={'Current Team':'Team'}, inplace=True) # Rename 'Current_Team' to 'Team' for consistency

# Create a new 'Player' column that combines the player's first and last name
roster_ncaa['Player'] = roster_ncaa['First_Name'] + ' ' + roster_ncaa['Last_Name']

roster_ncaa['Player'] = roster_ncaa['Player'].str.strip() # Strip any leading or trailing white space

# roster_ncaa.info() # Check to make sure it loaded correctly

['Michigan State', 'Cornell', 'Boston University', 'Ohio State', 'Western Michigan', 'Minnesota State', 'Minnesota', 'Massachusetts', 'Boston College', 'Bentley', 'Providence', 'Denver', 'Maine', 'Penn State', 'Connecticut', 'Quinnipiac']


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  roster_ncaa.rename(columns={'Current Team':'Team'}, inplace=True) # Rename 'Current_Team' to 'Team' for consistency
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  roster_ncaa['Player'] = roster_ncaa['First_Name'] + ' ' + roster_ncaa['Last_Name']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  roster_ncaa['Player'] = roster_ncaa['Player'].str.strip() # Str

## Add Current Season Stats to Roster

In [None]:
### Load the player_ytd stats table from the database

# connect to the database
conn = sqlite3.connect(recent_clean_db)
# Check the table name in db
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(cursor.fetchall())


# Load the player_ytd stats table from the database
player_ytd = pd.read_sql_query("SELECT * FROM player_stats_ytd", conn)
player_ytd.rename(columns={'Clean_Player':'Player'}, inplace=True) # Rename 'Clean Player' to 'Player' for consistency
player_ytd['Player'] = player_ytd['Player'].str.strip() # Strip leading and trailing whitespace from the 'Player' column
# player_ytd.info() # Check to make sure it loaded correctly

# Close the connection
conn.close()

### Merge the player_ytd stats table with the roster_ncaa table on Player and Team columns
# roster_ncaa_ytd = pd.merge(roster_ncaa, player_ytd, on=['Player', 'Team'], how='left')
roster_ncaa_ytd = pd.merge(roster_ncaa, player_ytd, on='Player', how='left')

roster_ncaa_ytd.rename(columns={'Team_x':'Team'}, inplace=True)# Rename Team_x back to Team
roster_ncaa_ytd.rename(columns={'Team_y':'Team_from_db'}, inplace=True) # Rename Team_y to Team_from_db

# roster_ncaa_ytd.info() # Check to make sure it merged correctly
# roster_ncaa_ytd.head() # Head of the merged table

[('player_stats_ytd',), ('master_roster',), ('advanced_metrics',), ('game_details',), ('goalie_stats',), ('line_chart',), ('linescore',), ('penalty_summary',), ('player_stats',), ('scoring_summary',)]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 443 entries, 0 to 442
Data columns (total 33 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Team            443 non-null    object 
 1   Last_Name       443 non-null    object 
 2   First_Name      443 non-null    object 
 3   No              443 non-null    int64  
 4   Position        443 non-null    object 
 5   Yr              443 non-null    object 
 6   Ht              443 non-null    object 
 7   Wt              443 non-null    int64  
 8   DOB             443 non-null    object 
 9   Hometown        443 non-null    object 
 10  Height_Inches   443 non-null    int64  
 11  Draft_Year      115 non-null    float64
 12  NHL_Team        115 non-null    object 
 13  D_Round       

Unnamed: 0,Team,Last_Name,First_Name,No,Position,Yr,Ht,Wt,DOB,Hometown,...,Pts,plus_minus,Sh,TOI_sec,PIM,FOW,FOL,Games_Played,FO%,TOI
0,Connecticut,Carabin,Nick,27,Defensemen,Gr,5-10,195,4/30/2000,"Mahwah, N.J.",...,8.0,7.0,58.0,41423.0,16.0,0.0,0.0,37.0,,11:30:23
1,Connecticut,Fitzgerald,Kevin,3,Defensemen,Fr,5-10,150,10/11/2004,"Bath, Ont.",...,,,,,,,,,,
2,Connecticut,Gustafsson Nyberg,Viking,6,Defensemen,So,6-6,205,9/21/2003,"Stockholm, Sweden",...,10.0,5.0,35.0,40821.0,27.0,0.0,0.0,37.0,,11:20:21
3,Connecticut,Janviriya,Kai,11,Defensemen,Fr,5-8,170,4/2/2005,"Bloomfield, Mich.",...,14.0,7.0,44.0,43621.0,6.0,0.0,0.0,37.0,,12:07:01
4,Connecticut,Messineo,Tom,15,Defensemen,Jr,6-0,190,5/2/2002,"Westwood, Mass.",...,8.0,19.0,41.0,42881.0,20.0,0.0,0.0,37.0,,11:54:41


In [10]:
## How many players show no games played?
no_games = roster_ncaa_ytd[roster_ncaa_ytd['Games_Played'].isnull()]
print(len(no_games))
print(no_games)



30
                 Team   Last_Name First_Name  No     Position  Yr    Ht   Wt  \
1         Connecticut  Fitzgerald      Kevin   3   Defensemen  Fr  5-10  150   
9         Connecticut       Swift      Bauer   5   Defensemen  So   6-0  185   
30   Western Michigan    Gallatin      Grady   2   Defensemen  Fr   6-4  195   
37   Western Michigan       Brown     Connor  12     Forwards  Fr   5-9  190   
42   Western Michigan    Humphrey       Ryan  28     Forwards  Fr   6-1  185   
44   Western Michigan      Kusler       Ryan  21     Forwards  Fr   6-0  180   
55   Western Michigan     Laursen       Kirk   1  Goaltenders  Jr   6-4  175   
93              Maine       Morse      Brian   7   Defensemen  Fr   6-1  174   
112             Maine     Stewart       Gage  35  Goaltenders  Fr   6-1  190   
141           Bentley    Erickson       Jack   1  Goaltenders  Fr   6-1  174   
169        Ohio State      Herbst     Reilly  34  Goaltenders  Sr   6-0  185   
174     Massachusetts   Lieberman    