# Players by Location
- create a map using folium that plots the hometown location of every US (and maybe Canadian) player in D1 (2024-25 season)

In [319]:
## Dependencies

## System Libraries
import sys
import os
# Data handling
import pandas as pd
import geopandas as gpd
# Plotting and visualization
import matplotlib.pyplot as plt
from PIL import Image
## Map visualization
import folium
from folium.plugins import MarkerCluster
from folium.plugins import HeatMap
from folium.features import CustomIcon

### ROSTERFILE
# Path to the roster file
roster_path = os.path.join('..', 'data', 'roster_2024_current_v3.csv')
roster_df = pd.read_csv(roster_path) # Load the roster file

### SCHOOL INFO TABLE FOR LOGO PATHS
school_info_path = os.path.join('..', 'data', 'arena_school_info.csv')
school_info_df = pd.read_csv(school_info_path) # Load school info

# Path to logo folder
logo_folder = os.path.join('..', 'images', 'logos')

### SHAPEFILES
# Path to .geojson file with State Boundries
geojson_path = os.path.join('..', 'data', 'vault', 'combined-us-canada.geojson')
# Load the states shapefile
gdf_states = gpd.read_file(geojson_path)

# Path to shapefile with all US counties
shapefile_path = os.path.join('..', 'data', 'vault', 'cb_2018_us_county_500k.shp')
gdf = gpd.read_file(shapefile_path)
# Set the initial CRS (assuming it's in EPSG:4326, but you may need to verify the original CRS)
gdf = gdf.set_crs(epsg=4326)

## CHECK SHAPEFILES FOR COMPATIBILITY
# Set the CRS for both dataframes if it's missing
if gdf.crs is None:
    gdf.set_crs(epsg=4326, inplace=True)  # Assuming coordinates are in WGS 84 (lat/lon)

if gdf_states.crs is None:
    gdf_states.set_crs(epsg=4326, inplace=True)  # Assuming coordinates are in WGS 84 (lat/lon)



# Check the first few rows of the DataFrames
# roster_df.head()
# gdf_states.head()
# gdf.head()
school_info_df.head()



Unnamed: 0,Team,Arena,Capacity,Sheet_width,Sheet_length,School,Latitude,Longitude,hex1,hex2,hex3,simp_color,logo_abv,Unnamed: 13
0,Air Force,Cadet Ice Arena,2470,200,85,Air Force,39.013739,-104.883727,3087,8a8d8f,,,afa,
1,Alaska,Carlson Center,4595,200,100,Alaska,64.842124,-147.763841,236192,ffcd00,,,akf,
2,Alaska Anchorage,Avis Alaska Sports Complex,800,200,85,Alaska-Anchorage,61.205536,-149.872737,00583d,ffc425,,,aka,aka
3,American Intl,MassMutual Center,6866,200,85,American Int'l,42.118003,-72.554326,0,ffb60f,,,aic,
4,Arizona State,Mullett Arena,5000,200,85,Arizona State,33.447156,-111.910867,8c1d40,ffc627,,,asu,


## Prep The Data

In [320]:
# Clean up and prepare the data for mapping

# Drop rows with missing geographic data
geo_columns = ['City', 'State_Province', 'Country']
roster_cleaned_df = roster_df.dropna(subset=geo_columns)

# Create a simplified dataframe with relevant location columns for easier mapping
roster_cleaned_df = roster_cleaned_df[['Current Team', 'First_Name', 'Last_Name', 'City', 'State_Province', 'Country']]

# Group by City, State_Province, and Country to count the number of players from each location
location_counts = roster_cleaned_df.groupby(['City', 'State_Province', 'Country']).size().reset_index(name='Player_Count')


## CHECK DATA TRANSFORMATION
# # Display the cleaned and grouped dataframe
# location_counts.head()
# # Sort the location counts in descending order
location_counts_sorted = location_counts.sort_values(by='Player_Count', ascending=False)
location_counts_sorted.head(10) # Display


Unnamed: 0,City,State_Province,Country,Player_Count
116,Calgary,Alberta,Canada,48
850,Toronto,Ontario,Canada,27
943,Winnipeg,Manitoba,Canada,16
587,North Vancouver,British Columbia,Canada,14
233,Edmonton,Alberta,Canada,13
811,Stockholm,Sweden,Sweden,13
246,Espoo,Finland,Finland,12
524,Mississauga,Ontario,Canada,12
231,Edina,Minnesota,USA,12
602,Oakville,Ontario,Canada,11


### Geocode Conversion
- 9-26-24 NOTE - Some issuse spoted with Geocoder. Canton, MI is being assigned a lat and long somewhere in Lansing.
    - MSU's Russian player comes from far Eastern Russia and does not appear on map at all
- takes the names of places and converts to lat long coordinates
- uses a rate limiter to avoid overloading service
- takes about 15 min to run - output is saved in the data folder - load from there 

In [321]:
## LCHECK FOR AND LOAD GEOCODED DATA BEFORE RUNNING - THIS TAKES 15+ MINUTES

# from geopy.geocoders import Nominatim
# from geopy.extra.rate_limiter import RateLimiter

# # Initialize geocoder
# geolocator = Nominatim(user_agent="college_hockey_map")

# # Create a rate-limited geocode function to avoid overloading the service
# geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

# # Function to geocode a city and state combination
# def geocode_location(row):
#     try:
#         location_str = f"{row['City']}, {row['State_Province']}, {row['Country']}"
#         location = geocode(location_str)
#         if location:
#             return pd.Series([location.latitude, location.longitude])
#         else:
#             return pd.Series([None, None])
#     except Exception as e:
#         return pd.Series([None, None])

# # Apply the geocode function to the data
# location_counts[['Latitude', 'Longitude']] = location_counts.apply(geocode_location, axis=1)

# # Filter out rows with missing coordinates
# location_counts_cleaned = location_counts.dropna(subset=['Latitude', 'Longitude'])

# # Display cleaned data with coordinates
# location_counts_cleaned.head()


In [322]:
## Save Geocoded Data to CSV to avoid having to run geocoding repeatedly

# # Save the cleaned and geocoded data to a CSV file
# output_path = os.path.join('..', 'data', 'player_geocoded_location_counts.csv')
# location_counts_cleaned.to_csv(output_path, index=False)



#### Load the previously geocoded table from csv

In [323]:
## Path to the geocoded data
geocoded_data_path = os.path.join('..', 'data', 'player_geocoded_location_counts.csv')
location_counts_cleaned = pd.read_csv(geocoded_data_path)

# Check the first few rows of the geocoded data
# location_counts_cleaned.head()

### Add Player name, team, ect data into the location counts

#### Add 2023 stats to current roster
- Going to do this in a seperate notebook for ease

In [324]:
# path_to_stats = os.path.join('..', 'data', 'player_stats_2023_v1.csv')
# stats_df = pd.read_csv(path_to_stats) # Load the stats file

# # Remove . from the 'Clean_Player' column
# stats_df['Clean_Player'] = stats_df['Clean_Player'].str.replace('.', '')

# # Split into first and last name
# # If just 2 words, split into first and last name. if three words, first name is first word, last name is last two word

# def split_name(row):
#     name_parts = row['Clean_Player'].split()
#     if len(name_parts) == 2:
#         return pd.Series([name_parts[0], name_parts[1]])
#     elif len(name_parts) == 3:
#         return pd.Series([name_parts[0], name_parts[1] + ' ' + name_parts[2]])
#     else:
#         return pd.Series([None, None])

# stats_df[['First_Name', 'Last_Name']] = stats_df.apply(split_name, axis=1)
# # Remove any commas from the name columns
# stats_df['First_Name'] = stats_df['First_Name'].str.replace(',', '')
# roster_cleaned_df['First_Name'] = roster_cleaned_df['First_Name'].str.replace(',', '')
# stats_df['Last_Name'] = stats_df['Last_Name'].str.replace(',', '')
# # Remove . from the 'Last_Name' and 'First_Name' columns
# stats_df['Last_Name'] = stats_df['Last_Name'].str.replace('.', '')
# stats_df['First_Name'] = stats_df['First_Name'].str.replace('.', '')

# # Filter to two word last names
# stats_df = stats_df[stats_df['Last_Name'].str.split().str.len() == 2]

# # rename Current Team to 'Team'
# roster_cleaned_df.rename(columns={'Current Team': 'Team'}, inplace=True)

# stats_df.head(20) # Check table
# roster_cleaned_df.head(20) # Check table



In [325]:
# ## Merge 2023 Stats with 2024 Roster
# # Merge the roster data with the stats data on the 'First_Name', 'Last_Name' and 'Team' columns

# prelim_merge_df = pd.merge(roster_cleaned_df, stats_df, on=['First_Name', 'Last_Name', 'Team'], how='outer')

# # Print report on the merge
# print(f"Roster data shape: {roster_cleaned_df.shape}")
# print(f"Stats data shape: {stats_df.shape}")

# print(f"Merged data shape: {prelim_merge_df.shape}")

# # print the first few rows of the merged data
# prelim_merge_df.head()

In [326]:
# Merge roster_df with location_counts_cleaned
merged_df = pd.merge(roster_df, location_counts_cleaned, 
                     on=['City', 'State_Province', 'Country'], how='inner')

# Prepare the tooltip text with a header row for each location
merged_df['Tooltip'] = merged_df.apply(
    lambda row: f"Name - Position - Year - Team<br>{row['First_Name']} {row['Last_Name']} - {row['Position']} - {row['Yr']} - {row['Current Team']}", axis=1
)

# Display the merged dataframe to check if the tooltips are correctly generated
merged_df[['First_Name', 'Last_Name', 'Tooltip', 'Latitude', 'Longitude']].head()

# merged_df.head()

Unnamed: 0,First_Name,Last_Name,Tooltip,Latitude,Longitude
0,Adam,Barone,Name - Position - Year - Team<br> Adam Barone ...,46.52391,-84.320068
1,Jack,Blanchett,Name - Position - Year - Team<br> Jack Blanche...,41.915336,-83.513566
2,Mike,Brown,Name - Position - Year - Team<br> Mike Brown -...,42.39593,-71.178665
3,Evan,Bushy,Name - Position - Year - Team<br> Evan Bushy -...,44.163466,-93.999351
4,Jacob,Conrad,Name - Position - Year - Team<br> Jacob Conrad...,44.512638,-88.012579


## Start Mapping

#### Map with team logo as markers
- thurs 9-26-24 start

In [327]:
# roster_df.head()
merged_df.head()


Unnamed: 0,Current Team,Last_Name,First_Name,No,Position,Yr,Ht,Wt,DOB,Hometown,...,D_Round,Last Team,League,City,State_Province,Country,Player_Count,Latitude,Longitude,Tooltip
0,Lake Superior,Barone,Adam,6,Defensemen,Fr,1-Jun,174,5/6/2004,"Sault Ste. Marie, Ont.",...,,Trail,BCHL,Sault Ste. Marie,Ontario,Canada,2,46.52391,-84.320068,Name - Position - Year - Team<br> Adam Barone ...
1,Lake Superior,Blanchett,Jack,16,Defensemen,So,11-May,185,5/12/2003,"Monroe, Mich.",...,,Powell,BCHL,Monroe,Michigan,USA,1,41.915336,-83.513566,Name - Position - Year - Team<br> Jack Blanche...
2,Lake Superior,Brown,Mike,3,Defensemen,Jr,2-Jun,209,4/3/2001,"Belmont, Mass.",...,,Merrimack,,Belmont,Massachusetts,USA,1,42.39593,-71.178665,Name - Position - Year - Team<br> Mike Brown -...
3,Lake Superior,Bushy,Evan,5,Defensemen,So,1-Jun,195,3/26/2002,"Mankato, Minn.",...,,Trail,BCHL,Mankato,Minnesota,USA,1,44.163466,-93.999351,Name - Position - Year - Team<br> Evan Bushy -...
4,Lake Superior,Conrad,Jacob,4,Defensemen,Fr,11-May,180,5/18/2002,"Green Bay, Wis.",...,,Fairbanks,NAHL,Green Bay,Wisconsin,USA,3,44.512638,-88.012579,Name - Position - Year - Team<br> Jacob Conrad...


In [328]:
import math

# Function to apply a circular offset to markers with the same location
def add_circular_offset(lat, lon, count, index, radius=0.006):
    """
    Distributes markers in a circular pattern around a central point.
    The radius increases slightly with the number of markers to prevent overlap.
    """
    # Calculate angle in radians (360 degrees divided by number of markers)
    angle = (360 / count) * index
    radians = math.radians(angle)

    # Dynamic adjustment of the radius: the more markers, the larger the radius
    dynamic_radius = radius * (1 + (count / 5))  # Scale the radius based on the number of markers

    # Offset latitude and longitude using circular placement
    lat_offset = lat + (dynamic_radius * math.cos(radians))  # Offset based on cosine
    lon_offset = lon + (dynamic_radius * math.sin(radians))  # Offset based on sine

    return lat_offset, lon_offset


###### REFACTOR W 01 PREVIEW

In [350]:
# Assign unique index per player in each city group
merged_df['city_group_index'] = merged_df.groupby(['City', 'State_Province', 'Country']).cumcount()

# Assign 'Player_Count' per city directly to 'merged_df' using 'transform'
merged_df['Player_Count'] = merged_df.groupby(['City', 'State_Province', 'Country'])['First_Name'].transform('count')

# Set Logo Size (tuple of width and height in pixels)
logo_size = (55, 55)  # Adjust as needed

import math

def create_map_with_team_logos(merged_df, school_info_df, logo_folder, gdf_states, map_center=[45.0, -93.0], zoom_start=4):
    # Initialize the map
    folium_map = folium.Map(location=map_center, zoom_start=zoom_start, tiles='Cartodb dark_matter')

    # ---- ADD CHOROPLETH LAYER ----
    # Create 'state_counts_df' from 'merged_df'
    state_counts = merged_df['State_Province'].value_counts()
    state_counts_df = pd.DataFrame(state_counts).reset_index()
    state_counts_df.columns = ['State_Province', 'Player_Count']

    # Convert the GeoDataFrame to GeoJSON using __geo_interface__
    geojson_data = gdf_states.__geo_interface__

    # Add the Choropleth directly to the map with a name for LayerControl
    folium.Choropleth(
        geo_data=geojson_data,
        data=state_counts_df,
        columns=['State_Province', 'Player_Count'],
        key_on='feature.properties.name',  # Adjust this if necessary
        fill_color='YlGn',
        fill_opacity=0.5,
        line_opacity=0.2,
        legend_name='Number of Players by State/Province',
        name='Choropleth'  # Add name for LayerControl
    ).add_to(folium_map)


    ############## NEW CODE ##############
    # ---- ADD STATE LABELS LAYER ----
    # Merge 'state_counts_df' with 'gdf_states' to get centroids
    gdf_states_subset = gdf_states[['name', 'geometry']]  # Adjust 'name' if your GeoDataFrame has a different column name
    state_counts_gdf = gdf_states_subset.merge(state_counts_df, left_on='name', right_on='State_Province')

    # Calculate centroids
    state_counts_gdf['centroid'] = state_counts_gdf.geometry.centroid

    # Create a FeatureGroup for the labels
    labels_layer = folium.FeatureGroup(name='Players by State')

    # Add labels to the labels_layer
    for idx, row in state_counts_gdf.iterrows():
        # Get centroid coordinates
        lat = row['centroid'].y
        lon = row['centroid'].x
        # Get player count
        player_count = row['Player_Count']
        # Create a text label
        label = folium.Marker(
            location=[lat, lon],
            icon=folium.DivIcon(
                html=f'''
                    <div style="
                        font-size: 16px;
                        color: black;
                        text-align: center;
                        
                        padding: 2px;
                        
                    ">
                        {player_count}
                    </div>
                '''
            )
        )
        labels_layer.add_child(label)

    # Add the labels_layer to the map
    labels_layer.add_to(folium_map)

########### NOT GBIG FAN OF COLOR SCHEME
    # # ---- ADD HEATMAP LAYER ----
    # # Create heat_data from merged_df
    # heat_data = [[row['Latitude'], row['Longitude']] for idx, row in merged_df.iterrows()]

    # # Create a FeatureGroup for the heatmap layer
    # heatmap_layer = folium.FeatureGroup(name='Heatmap')

    # # Define a custom gradient for better color transitions
    # custom_gradient = {
    #     0.2: '#ADD8E6',  # Light Blue for low intensity
    #     0.4: '#00FF00',  # Green for mid-low intensity
    #     0.6: '#FFFF00',  # Yellow for mid-high intensity
    #     0.8: '#FFA500',  # Orange for high intensity
    #     1.0: '#FF0000'   # Red for maximum intensity
    # }

    # # Add the HeatMap to the FeatureGroup with adjusted parameters
    # HeatMap(
    #     heat_data, 
    #     radius=15,                # Increase radius for smoother heat blobs
    #     blur=15,                  # Slightly increase blur to smooth transitions
    #     max_intensity=100,         # Adjust max intensity for better scaling
    #     gradient=custom_gradient, # Use the custom gradient
    #     min_opacity=0.4           # Slight opacity for low intensity
    # ).add_to(heatmap_layer)

    # # Add the heatmap layer to the map
    # heatmap_layer.add_to(folium_map)
    
    ## ORIG CODE ###
    # ---- ADD HEATMAP LAYER ----
    # Create heat_data from merged_df
    heat_data = [[row['Latitude'], row['Longitude']] for idx, row in merged_df.iterrows()]

    # Create a FeatureGroup for the heatmap layer
    heatmap_layer = folium.FeatureGroup(name='Heatmap')

    # Add the HeatMap to the FeatureGroup
    HeatMap(heat_data, radius=15, blur=10, max_intensity=20).add_to(heatmap_layer)

    # Add the heatmap layer to the map
    heatmap_layer.add_to(folium_map)

    # ---- MARKER CLUSTER LAYER ----
    cluster_group = folium.FeatureGroup(name='Individual Players', control=True, show=False)
    marker_cluster = MarkerCluster(
        spiderfy_on_max_zoom=True,
        show_coverage_on_hover=False,
        max_cluster_radius=30,
        disableClusteringAtZoom=14,
        animateAddingMarkers=True,
        zoomToBoundsOnClick=True
    ).add_to(cluster_group)

    # Compute the mean latitude and longitude for centering the map
    Latitude = merged_df['Latitude'].mean()
    Longitude = merged_df['Longitude'].mean()

    # Create the map centered on the computed mean Latitude and Longitude
    map_instance = folium.Map(location=[Latitude, Longitude], zoom_start=12)

    # Add the cluster group to the map but initially hidden
    map_instance.add_child(cluster_group)

    # Define a custom script to toggle the visibility of the cluster group on zoom
    map_instance.get_root().html.add_child(folium.Element(f'''
        <script>
            var clusterLayer = {cluster_group.get_name()};
            var map = {map_instance.get_name()};
            map.on('zoomend', function() {{
                if (map.getZoom() >= 14) {{
                    if (!map.hasLayer(clusterLayer)) {{
                        map.addLayer(clusterLayer);
                    }}
                }} else {{
                    if (map.hasLayer(clusterLayer)) {{
                        map.removeLayer(clusterLayer);
                    }}
                }}
            }});
        </script>
    '''))


    # Loop through the merged_df to place markers
    for idx, row in merged_df.iterrows():
        # Retrieve team and logo information
        team_name = row['Current Team']
        logo_info = school_info_df[school_info_df['Team'] == team_name]['logo_abv'].values

        if len(logo_info) > 0:
            logo_abv = logo_info[0]
            logo_path = os.path.join(logo_folder, f"{logo_abv}.png")

            if os.path.exists(logo_path):
                logo_icon = CustomIcon(logo_path, icon_size=logo_size)

                player_count = row['Player_Count']
                current_index = row['city_group_index']

                # Apply circular offset for overlapping markers
                if player_count > 1:
                    lat_offset, lon_offset = add_circular_offset(
                        row['Latitude'], row['Longitude'], player_count, current_index
                    )
                else:
                    lat_offset, lon_offset = row['Latitude'], row['Longitude']  # No offset if only one player

                # Enhance the tooltip with player information, including hometown
                tooltip_html = f"""
                <div style="font-size: 12px; font-family: Arial;">
                    <strong>{row['First_Name']} {row['Last_Name']}</strong><br>
                    {row['Yr']} - {row['Position']}<br>
                    Team: {row['Current Team']}<br>
                    Hometown: {row['Hometown']}
                </div>
                """

                # Add player marker with the custom logo icon and enhanced tooltip
                folium.Marker(
                    location=[lat_offset, lon_offset],
                    tooltip=folium.Tooltip(tooltip_html),
                    icon=logo_icon
                ).add_to(marker_cluster)

    # Add the marker cluster layer to the map
    cluster_group.add_to(folium_map)

    # ---- ADD LAYER CONTROL ----
    folium.LayerControl().add_to(folium_map)

    # Return the map after processing all markers
    return folium_map

# Assuming 'gdf_states' is already defined in your code
enhanced_player_map = create_map_with_team_logos(merged_df, school_info_df, logo_folder, gdf_states)

# Save the map to an HTML file for visualization
enhanced_map_file_path = os.path.join('..', 'TEMP', 'player_origin_map_with_team_logos_v3.1.html')
enhanced_player_map.save(enhanced_map_file_path)



  state_counts_gdf['centroid'] = state_counts_gdf.geometry.centroid


In [330]:
# # Assign unique index per player in each city group
# merged_df['city_group_index'] = merged_df.groupby(['City', 'State_Province', 'Country']).cumcount()

# # Assign 'Player_Count' per city directly to 'merged_df' using 'transform'
# merged_df['Player_Count'] = merged_df.groupby(['City', 'State_Province', 'Country'])['First_Name'].transform('count')

# # Set Logo Size (tuple of width and height in pixels)
# logo_size = (40, 40)  # Adjust as needed

# import math

# # Function to apply a circular offset to markers with the same location
# def add_circular_offset(lat, lon, count, index, radius=0.005):
#     """
#     Distributes markers in a circular pattern around a central point.
#     The radius increases slightly with the number of markers to prevent overlap.
#     """
#     # Calculate angle in radians (360 degrees divided by number of markers)
#     angle = (360 / count) * index
#     radians = math.radians(angle)

#     # Dynamic adjustment of the radius: the more markers, the larger the radius
#     dynamic_radius = radius * (1 + (count / 5))  # Scale the radius based on the number of markers

#     # Offset latitude and longitude using circular placement
#     lat_offset = lat + (dynamic_radius * math.cos(radians))  # Offset based on cosine
#     lon_offset = lon + (dynamic_radius * math.sin(radians))  # Offset based on sine

#     return lat_offset, lon_offset

# # def create_map_with_team_logos(merged_df, school_info_df, logo_folder, map_center=[45.0, -93.0], zoom_start=4):
# def create_map_with_team_logos(merged_df, school_info_df, logo_folder, gdf_states, map_center=[45.0, -93.0], zoom_start=4):

#     # Initialize the map
#     folium_map = folium.Map(location=map_center, zoom_start=zoom_start, tiles='cartodb positron')

#     # ---- ADD CHOROPLETH LAYER ----
#     # Create 'state_counts_df' from 'merged_df'
#     state_counts = merged_df['State_Province'].value_counts()
#     state_counts_df = pd.DataFrame(state_counts).reset_index()
#     state_counts_df.columns = ['State_Province', 'Player_Count']

#     # Convert the GeoDataFrame to GeoJSON using __geo_interface__
#     geojson_data = gdf_states.__geo_interface__

#     # Create a FeatureGroup for the choropleth layer
#     choropleth_layer = folium.FeatureGroup(name='Choropleth')

#     # Add the Choropleth to the FeatureGroup
#     folium.Choropleth(
#         geo_data=geojson_data,  # Use GeoJSON directly from GeoDataFrame
#         data=state_counts_df,
#         columns=['State_Province', 'Player_Count'],  # Columns from the player count dataframe
#         key_on='feature.properties.name',  # Match this to the column in the GeoDataFrame (likely 'name')
#         fill_color='YlGn',  # Color scale, can be adjusted
#         fill_opacity=0.7,
#         line_opacity=0.2,
#         legend_name='Number of Players by State/Province'
#     ).add_to(choropleth_layer)

#     # Add the choropleth layer to the map
#     choropleth_layer.add_to(folium_map)

#     # ---- ADD HEATMAP LAYER ----
#     # Create heat_data from merged_df
#     heat_data = [[row['Latitude'], row['Longitude']] for idx, row in merged_df.iterrows()]

#     # Create a FeatureGroup for the heatmap layer
#     heatmap_layer = folium.FeatureGroup(name='Heatmap')

#     # Add the HeatMap to the FeatureGroup
#     HeatMap(heat_data, radius=15, blur=10, max_intensity=20).add_to(heatmap_layer)

#     # Add the heatmap layer to the map
#     heatmap_layer.add_to(folium_map)



#     # Marker Cluster Layer
#     cluster_group = folium.FeatureGroup(name='Toggle Clusters')
#     marker_cluster = MarkerCluster(
#         spiderfy_on_max_zoom=True,
#         show_coverage_on_hover=False,
#         max_cluster_radius=30,
#         disableClusteringAtZoom=14,
#         animateAddingMarkers=True,
#     ).add_to(cluster_group)

#     # Loop through the merged_df to place markers
#     for idx, row in merged_df.iterrows():
#         # Retrieve team and logo information
#         team_name = row['Current Team']
#         logo_info = school_info_df[school_info_df['Team'] == team_name]['logo_abv'].values

#         if len(logo_info) > 0:
#             logo_abv = logo_info[0]
#             logo_path = os.path.join(logo_folder, f"{logo_abv}.png")

#             if os.path.exists(logo_path):
#                 logo_icon = CustomIcon(logo_path, icon_size=logo_size)

#                 player_count = row['Player_Count']
#                 current_index = row['city_group_index']

#                 # Apply circular offset for overlapping markers
#                 if player_count > 1:
#                     lat_offset, lon_offset = add_circular_offset(
#                         row['Latitude'], row['Longitude'], player_count, current_index
#                     )
#                 else:
#                     lat_offset, lon_offset = row['Latitude'], row['Longitude']  # No offset if only one player

#                 # Enhance the tooltip with player information, including hometown
#                 tooltip_html = f"""
#                 <div style="font-size: 12px; font-family: Arial;">
#                     <strong>{row['First_Name']} {row['Last_Name']}</strong><br>
#                     {row['Yr']} - {row['Position']}<br>
#                     Team: {row['Current Team']}<br>
#                     Hometown: {row['Hometown']}
#                 </div>
#                 """

#                 # Add player marker with the custom logo icon and enhanced tooltip
#                 folium.Marker(
#                     location=[lat_offset, lon_offset],  # Use circular offset location if needed
#                     tooltip=folium.Tooltip(tooltip_html),  # Use enhanced HTML tooltip
#                     icon=logo_icon  # Custom team logo icon
#                 ).add_to(marker_cluster)

#     # Add the marker cluster layer to the map
#     cluster_group.add_to(folium_map)

#     # ---- ADD LAYER CONTROL ----
#     folium.LayerControl().add_to(folium_map)


#     # Return the map after processing all markers
#     return folium_map


# # Assuming 'gdf_states' is already defined in your code
# enhanced_player_map = create_map_with_team_logos(merged_df, school_info_df, logo_folder, gdf_states)

# # Save the map to an HTML file for visualization
# enhanced_map_file_path = os.path.join('..', 'TEMP', 'player_origin_map_with_team_logos_v.3.1html')
# enhanced_player_map.save(enhanced_map_file_path)

# # # Example usage of the function
# # enhanced_player_map = create_map_with_team_logos(merged_df, school_info_df, logo_folder)

# # # Save the map to an HTML file for visualization
# # enhanced_map_file_path = os.path.join('..', 'TEMP', 'player_origin_map_with_team_logos_v2.4.html')
# # enhanced_player_map.save(enhanced_map_file_path)


In [331]:
######################### OLD SPAGHETTI CODE ############################
################ COMMENTED OUT FOR REFERENCE ############################
########## NOTE: THIS CODE IS FUNCTIONAL BUT NOT WORKING AS INTENDED ############################

# ## HOTFIX - ASSIGN UNIQUE INDEX TO EACH PLAYER IN CITY GROUP

# # Track how many players come from the same city
# grouped_df = merged_df.groupby(['City', 'State_Province', 'Country']).size().reset_index(name='Player_Count')

# # Assign unique index per player in each city group
# merged_df['city_group_index'] = merged_df.groupby(['City', 'State_Province', 'Country']).cumcount()

# # Grouped_df has 'Player_Count' per city
# grouped_df = merged_df.groupby(['City', 'State_Province', 'Country']).size().reset_index(name='Player_Count')

# # Merge Player_Count into merged_df
# merged_df = merged_df.merge(grouped_df, on=['City', 'State_Province', 'Country'], how='left')


# # # HOTFIX - Copy rosterr_df[Current Team] to a new column called 'Team'
# # roster_df['Team'] = roster_df['Current Team']

# ## Set Logo Size (tuple of width and height in pixels)
# logo_size = (40, 40)  # Adjust as needed

# import math

# # Function to apply a circular offset to markers with the same location
# def add_circular_offset(lat, lon, count, index, radius=0.005):
#     """
#     Distributes markers in a circular pattern around a central point.
#     The radius increases slightly with the number of markers to prevent overlap.
#     """
#     # Calculate angle in radians (360 degrees divided by number of markers)
#     angle = (360 / count) * index
#     radians = math.radians(angle)

#     # Dynamic adjustment of the radius: the more markers, the larger the radius
#     dynamic_radius = radius * (1 + (count / 5))  # Scale the radius based on the number of markers

#     # Offset latitude and longitude using circular placement
#     lat_offset = lat + (dynamic_radius * math.cos(radians))  # Offset based on cosine
#     lon_offset = lon + (dynamic_radius * math.sin(radians))  # Offset based on sine

#     return lat_offset, lon_offset

# def create_map_with_team_logos(merged_df, school_info_df, logo_folder, map_center=[45.0, -93.0], zoom_start=4):
#     # Initialize the map
#     folium_map = folium.Map(location=map_center, zoom_start=zoom_start, tiles='cartodb positron')

#     # Marker Cluster Layer
#     cluster_group = folium.FeatureGroup(name='Toggle Clusters')
#     marker_cluster = MarkerCluster(
#         spiderfy_on_max_zoom=True,
#         show_coverage_on_hover=False,
#         max_cluster_radius=20,
#         disableClusteringAtZoom=14,
#         animateAddingMarkers=True,
#     ).add_to(cluster_group)

#     # Loop through the merged_df to place markers
#     for idx, row in merged_df.iterrows():
#         # Retrieve team and logo information
#         team_name = row['Current Team']
#         logo_info = school_info_df[school_info_df['Team'] == team_name]['logo_abv'].values

#         if len(logo_info) > 0:
#             logo_abv = logo_info[0]
#             logo_path = os.path.join(logo_folder, f"{logo_abv}.png")

#             if os.path.exists(logo_path):
#                 logo_icon = CustomIcon(logo_path, icon_size=logo_size)

#                 player_count = row['Player_Count']
#                 current_index = row['city_group_index']

#                 # Apply circular offset for overlapping markers
#                 if player_count > 1:
#                     lat_offset, lon_offset = add_circular_offset(
#                         row['Latitude'], row['Longitude'], player_count, current_index
#                     )
#                 else:
#                     lat_offset, lon_offset = row['Latitude'], row['Longitude']  # No offset if only one player

#                 # Enhance the tooltip with player information, including hometown
#                 tooltip_html = f"""
#                 <div style="font-size: 12px; font-family: Arial;">
#                     <strong>{row['First_Name']} {row['Last_Name']}</strong><br>
#                     {row['Yr']} - {row['Position']}<br>
#                     Team: {row['Current Team']}<br>
#                     Hometown: {row['Hometown']}
#                 </div>
#                 """

#                 # Add player marker with the custom logo icon and enhanced tooltip
#                 folium.Marker(
#                     location=[lat_offset, lon_offset],  # Use circular offset location if needed
#                     tooltip=folium.Tooltip(tooltip_html),  # Use enhanced HTML tooltip
#                     icon=logo_icon  # Custom team logo icon
#                 ).add_to(marker_cluster)

#     # Add the marker cluster layer to the map
#     cluster_group.add_to(folium_map)

#     # Return the map after processing all markers
#     return folium_map


# ### OLD CODE
# # def add_circular_offset(lat, lon, count, index, radius=0.025):
# #     # Calculate angle in radians (360 degrees divided by number of markers)
# #     angle = (360 / count) * index
# #     radians = math.radians(angle)

# #     # Offset latitude and longitude using circular placement
# #     lat_offset = lat + (radius * math.cos(radians))  # Offset based on cosine
# #     lon_offset = lon + (radius * math.sin(radians))  # Offset based on sine

# #     return lat_offset, lon_offset


# # def create_map_with_team_logos(merged_df, school_info_df, logo_folder, map_center=[45.0, -93.0], zoom_start=4):
# #     # Initialize the map
# #     folium_map = folium.Map(location=map_center, zoom_start=zoom_start, tiles='cartodb positron')

# #     # 1. Marker Cluster Layer
# #     cluster_group = folium.FeatureGroup(name='Toggle Clusters')
# #     marker_cluster = MarkerCluster(spiderfy_on_max_zoom=True, 
# #                                 show_coverage_on_hover=False, 
# #                                 max_cluster_radius=20,
# #                                 disableClusteringAtZoom=14,
# #                                 animateAddingMarkers=True,
# #                                 ).add_to(cluster_group)

# #     # Track how many players come from the same city
# #     grouped_df = merged_df.groupby(['City', 'State_Province', 'Country']).size().reset_index(name='Player_Count')

# #     # Loop through the merged_df to place markers
# #     for city_idx, row in merged_df.iterrows():
# #         # Retrieve team and logo information
# #         team_name = row['Current Team']
# #         logo_info = school_info_df[school_info_df['Team'] == team_name]['logo_abv'].values
        
# #         if len(logo_info) > 0:
# #             logo_abv = logo_info[0]
# #             logo_path = os.path.join(logo_folder, f"{logo_abv}.png")

# #             if os.path.exists(logo_path):
# #                 logo_icon = CustomIcon(logo_path, icon_size=logo_size)

# #                 # Retrieve how many players come from this city
# #                 city_group = grouped_df[(grouped_df['City'] == row['City']) &
# #                                         (grouped_df['State_Province'] == row['State_Province']) &
# #                                         (grouped_df['Country'] == row['Country'])]

# #                 player_count = city_group['Player_Count'].values[0]

# #                 # Use consistent index for offset (based on city_idx modulo player_count)
# #                 current_index = city_idx % player_count  # Ensure a unique index per player

# #                 # Apply circular offset for overlapping markers
# #                 if player_count > 1:
# #                     lat_offset, lon_offset = add_circular_offset(row['Latitude'], row['Longitude'], player_count, current_index)
# #                 else:
# #                     lat_offset, lon_offset = row['Latitude'], row['Longitude']  # No offset if only one player

# #                 # Enhance the tooltip with player information, including hometown
# #                 tooltip_html = f"""
# #                 <div style="font-size: 12px; font-family: Arial;">
# #                     <strong>{row['First_Name']} {row['Last_Name']}</strong><br>
# #                     {row['Yr']} - {row['Position']}<br>
# #                     Team: {row['Current Team']}<br>
# #                     Hometown: {row['Hometown']}
# #                 </div>
# #                 """

# #                 # Add player marker with the custom logo icon and enhanced tooltip
# #                 folium.Marker(
# #                     location=[lat_offset, lon_offset],  # Use circular offset location if needed
# #                     tooltip=folium.Tooltip(tooltip_html),  # Use enhanced HTML tooltip
# #                     icon=logo_icon  # Custom team logo icon
# #                 ).add_to(marker_cluster)



# #     # Add the marker cluster layer to the map
# #     cluster_group.add_to(folium_map)

# #     # Return the map after processing all markers
# #     return folium_map


# # Example usage of the function
# enhanced_player_map = create_map_with_team_logos(merged_df, school_info_df, logo_folder)

# # Save the map to an HTML file for visualization
# enhanced_map_file_path = os.path.join('..', 'TEMP', 'player_origin_map_with_team_logos_v.1.html')
# enhanced_player_map.save(enhanced_map_file_path)

#### Very Simple V0.1 Map

In [332]:

# # Function to create the player origin map with tooltips
# def create_player_origin_map_with_tooltip(data, map_center=[45.0, -93.0], zoom_start=4):
#     # Map settings block for customization
#     folium_map = folium.Map(location=map_center, zoom_start=zoom_start, tiles='cartodb positron')
    
#     # Create MarkerCluster
#     marker_cluster = MarkerCluster(disableClusteringAtZoom=10).add_to(folium_map)

#     # Add player markers with tooltips to the MarkerCluster
#     for _, row in data.iterrows():
#         folium.Marker(
#             location=[row['Latitude'], row['Longitude']],
#             tooltip=row['Tooltip']
#         ).add_to(marker_cluster)
    
#     return folium_map

# # Create the player origins map with tooltips
# player_map = create_player_origin_map_with_tooltip(merged_df)

# # # Save the map to an HTML file for visualization
# # map_file_path = os.path.join('..', 'TEMP', 'player_origin_map_v1.html')
# # player_map.save(map_file_path)

# # map_file_path


### Map with Custom Java for Cluster behavior
- not behaving well - probably not worth the time to get smooth

In [333]:
# # Function to create the player origin map with custom cluster zoom levels
# def create_player_origin_map_with_tooltip(data, map_center=[45.0, -93.0], zoom_start=4):
#     # Map settings block for customization
#     folium_map = folium.Map(location=map_center, zoom_start=zoom_start, tiles='cartodb positron')
    
#     # Create MarkerCluster without adding it to the map right away
#     marker_cluster = MarkerCluster().add_to(folium.FeatureGroup(name="Clusters"))

#     # Add player markers with tooltips to the MarkerCluster
#     for _, row in data.iterrows():
#         folium.Marker(
#             location=[row['Latitude'], row['Longitude']],
#             tooltip=row['Tooltip']
#         ).add_to(marker_cluster)

#     # Custom JavaScript to toggle marker clusters based on zoom level
#     toggle_cluster_js = """
#     function toggleClusters(map) {
#         var clusterGroup = map._layers[Object.keys(map._layers).find(key => map._layers[key].options && map._layers[key].options.spiderfyOnMaxZoom !== undefined)];
        
#         map.on('zoomend', function () {
#             var currentZoom = map.getZoom();
            
#             // Define the zoom range where clusters should be shown
#             var minZoom = 5;  // Set the zoom level when clusters appear
#             var maxZoom = 7;  // Set the zoom level when clusters disappear again
            
#             if (currentZoom < minZoom || currentZoom > maxZoom) {
#                 map.removeLayer(clusterGroup);
#             } else {
#                 map.addLayer(clusterGroup);
#             }
#         });

#         // Hide the clusters initially
#         map.removeLayer(clusterGroup);
#     }
#     """

#     # Add the JavaScript to the map
#     folium_map.get_root().html.add_child(folium.Element(f'<script>{toggle_cluster_js}</script>'))

#     # Call the function that runs the zoom toggle functionality
#     folium_map.add_child(folium.Element(f'<script>toggleClusters({{map_name}});</script>'.format(map_name=folium_map.get_name())))

#     # Add the marker cluster group to the map
#     marker_cluster.add_to(folium_map)

#     return folium_map


### Aggrigate data on state by state basis 
- will use to color specific states - regions

In [334]:

state_counts = location_counts_cleaned['State_Province'].value_counts()
# Create df of the state counts
state_counts_df = pd.DataFrame(state_counts).reset_index()
state_counts_df.columns = ['State_Province', 'Player_Count']
state_counts_df.head()

# location_counts_cleaned.head()



Unnamed: 0,State_Province,Player_Count
0,Minnesota,93
1,Ontario,83
2,Massachusetts,75
3,New York,69
4,Michigan,65


### Code below is the example of next steps from GPI
#### To-DO
- get geodata (state-province boundries, possibly also another layer - county, ect)
- GEOJSON format

In [335]:
## Open geoJSON file with US states to check
gdf_states.head()

Unnamed: 0,name,id,CNTRY,TYPE,geometry
0,Alberta,CA-AB,Canada,Province,"POLYGON ((-109.99940 48.99310, -111.17410 48.9..."
1,British Columbia,CA-BC,Canada,Province,"MULTIPOLYGON (((-124.15370 49.53120, -124.3623..."
2,Manitoba,CA-MB,Canada,Province,"POLYGON ((-88.94850 56.85120, -89.98060 56.216..."
3,New Brunswick,CA-NB,Canada,Province,"POLYGON ((-64.03100 46.01270, -64.15950 45.964..."
4,Newfoundland and Labrador,CA-NL,Canada,Province,"MULTIPOLYGON (((-54.09370 49.74440, -53.98060 ..."


In [336]:
import folium
from folium.plugins import HeatMap, MarkerCluster
import os

# Function to create the map with a choropleth and heatmap as separate layers
def create_choropleth_heatmap_map(data, state_counts_df, gdf_states, map_center=[45.0, -93.0], zoom_start=4, base_map="cartodb positron"):
    # Initialize the map with a selectable base map
    folium_map = folium.Map(location=map_center, zoom_start=zoom_start, tiles=base_map)
    

    # 2. Choropleth Layer (State/Province Shading)
    # Convert the GeoDataFrame to GeoJSON using __geo_interface__
    geojson_data = gdf_states.__geo_interface__

    folium.Choropleth(
        geo_data=geojson_data,  # Use GeoJSON directly from GeoDataFrame
        data=state_counts_df,
        columns=['State_Province', 'Player_Count'],  # Columns from the player count dataframe
        key_on='feature.properties.name',  # Match this to the column in the GeoDataFrame (likely 'name')
        fill_color='YlGn',  # Color scale, can be adjusted
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name='Number of Players by State/Province'
    ).add_to(folium_map)  # Add directly to folium_map, not a FeatureGroup
    
    # 1. Heatmap Layer
    heat_data = [[row['Latitude'], row['Longitude']] for _, row in data.iterrows()]
    heatmap_layer = folium.FeatureGroup(name='Heatmap')
    HeatMap(heat_data, radius=15, blur=10, max_intensity=20).add_to(heatmap_layer)
    heatmap_layer.add_to(folium_map)

    # 3. Marker Cluster Layer
    cluster_group = folium.FeatureGroup(name='Toggle Clusters')
    marker_cluster = MarkerCluster().add_to(cluster_group)

    # Add player markers with tooltips to the MarkerCluster
    for _, row in data.iterrows():
        folium.Marker(
            location=[row['Latitude'], row['Longitude']],
            tooltip=row['Tooltip']
        ).add_to(marker_cluster)
    
    cluster_group.add_to(folium_map)

    # 4. Add Layer Control to allow toggling the heatmap, choropleth, and clusters
    folium.LayerControl().add_to(folium_map)

    return folium_map


# Example usage of the function
# Assuming gdf_states is your loaded GeoDataFrame and merged_df contains the player data
enhanced_player_map = create_choropleth_heatmap_map(merged_df, state_counts_df, gdf_states, base_map="cartodbpositron")

# Save the map to an HTML file for visualization
enhanced_map_file_path = os.path.join('..', 'TEMP', 'player_origin_map_v1.0.html')
enhanced_player_map.save(enhanced_map_file_path)


In [337]:
# import folium
# from folium.plugins import HeatMap, MarkerCluster
# import os

# Function to create the map with a choropleth and heatmap as separate layers
def create_choropleth_heatmap_map(data, state_counts_df, gdf_states, map_center=[45.0, -93.0], zoom_start=4):
    # Initialize the map
    folium_map = folium.Map(location=map_center, zoom_start=zoom_start, tiles='cartodb positron')

    # 1. Heatmap Layer
    heat_data = [[row['Latitude'], row['Longitude']] for _, row in data.iterrows()]
    heatmap_layer = folium.FeatureGroup(name='Heatmap')
    HeatMap(heat_data, radius=5, blur=5, max_intensity=12).add_to(heatmap_layer)
    heatmap_layer.add_to(folium_map)

    # 2. Choropleth Layer (State/Province Shading)
    # Convert the GeoDataFrame to GeoJSON using __geo_interface__
    geojson_data = gdf_states.__geo_interface__

    choropleth_layer = folium.FeatureGroup(name='Choropleth')
    folium.Choropleth(
        geo_data=geojson_data,  # Use GeoJSON directly from GeoDataFrame
        data=state_counts_df,
        columns=['State_Province', 'Player_Count'],  # Columns from the player count dataframe
        key_on='feature.properties.name',  # Match this to the column in the GeoDataFrame (likely 'name')
        fill_color='YlGnBu',  # Color scale, can be adjusted
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name='Number of Players by State/Province'
    ).add_to(choropleth_layer)
    
    choropleth_layer.add_to(folium_map)

    # 3. Marker Cluster Layer
    cluster_group = folium.FeatureGroup(name='Toggle Clusters')
    marker_cluster = MarkerCluster().add_to(cluster_group)

    # Add player markers with tooltips to the MarkerCluster
    for _, row in data.iterrows():
        folium.Marker(
            location=[row['Latitude'], row['Longitude']],
            tooltip=row['Tooltip']
        ).add_to(marker_cluster)
    
    cluster_group.add_to(folium_map)

    # 4. Add Layer Control to allow toggling the heatmap, choropleth, and clusters
    folium.LayerControl().add_to(folium_map)

    return folium_map


# Example usage of the function
# Assuming gdf_states is your loaded GeoDataFrame and merged_df contains the player data
enhanced_player_map = create_choropleth_heatmap_map(merged_df, state_counts_df, gdf_states)

# Save the map to an HTML file for visualization
enhanced_map_file_path = os.path.join('..', 'TEMP', 'player_origin_map_v8.1.html')
enhanced_player_map.save(enhanced_map_file_path)


AssertionError: Choropleth must be added to a Map object.

In [None]:
############ EAMPLE CODE

import json
import geopandas as gpd
from folium.plugins import HeatMap

# Load a world GeoJSON file (assuming we're focusing on Canada/USA for now) to apply fill colors to states/provinces
# Unfortunately, I cannot access a remote GeoJSON file, but you can use one locally, for example:
# geojson_path = 'path_to_geojson_file'

# For demonstration, we'll simulate this step

# Now, let's create a map with choropleth and heatmap

def create_choropleth_heatmap_map(data, map_center=[45.0, -93.0], zoom_start=4):
    # Initialize the map
    folium_map = folium.Map(location=map_center, zoom_start=zoom_start, tiles='cartodb positron')
    
    # Heatmap layer
    heat_data = [[row['Latitude'], row['Longitude']] for _, row in data.iterrows()]
    HeatMap(heat_data, radius=5, blur=5, max_intensity=12).add_to(folium_map)
    
    # ## Assuming we have a GeoJSON file with state/province boundaries, we would use it here
    # folium.Choropleth(
    #     geo_data=geojson_path,  # Provide the path to a geojson file with state/province boundaries
    #     data=state_counts_df,
    #     columns=['State_Province', 'Player_Count'],
    #     key_on='feature.properties.NAME',  # Adjust this to match the key in the GeoJSON file
    #     fill_color='YlGnBu',
    #     fill_opacity=0.7,
    #     line_opacity=0.2,
    #     legend_name='Number of Players by State/Province'
    # ).add_to(folium_map)

    # Create a FeatureGroup to hold the clusters, and add MarkerCluster to it
    cluster_group = folium.FeatureGroup(name='Toggle Clusters')
    marker_cluster = MarkerCluster().add_to(cluster_group)

    # Add player markers with tooltips to the MarkerCluster
    for _, row in data.iterrows():
        folium.Marker(
            location=[row['Latitude'], row['Longitude']],
            tooltip=row['Tooltip']
        ).add_to(marker_cluster)
    
    # Add the FeatureGroup (with clusters) to the map
    cluster_group.add_to(folium_map)

    # Add LayerControl to allow toggling the clusters on and off
    folium.LayerControl().add_to(folium_map)

    return folium_map



# Create the enhanced map with choropleth and heatmap
enhanced_player_map = create_choropleth_heatmap_map(merged_df)

# Save the map to an HTML file for visualization
# enhanced_map_file_path = '/mnt/data/college_hockey_player_origin_map_enhanced.html'


enhanced_map_file_path = os.path.join('..', 'TEMP', 'player_origin_map_v1.html')

enhanced_player_map.save(enhanced_map_file_path)


