# Notebook to investigate snow types

# Step 0 Up Notebook 

In [17]:
import pandas as pd
import geopandas as gpd
from shapely import wkt
from snowML.datapipe.utils import data_utils as du 
from snowML.datapipe.utils import snow_types as st


Note: These hucs had errors ['17010101', '17010104', '17010105', '17010106', '17010205', '17010206', '17020001', '17020002', '17020006', '17020007', '17080001', '17100101', '17100106', '17110002']

# Step 1 - Load Static Variables  

In [2]:
f = "Static_All_Region_17.csv"
b = "snowml-silver"
df_static = du.s3_to_df(f, b)
print(df_static.shape)
df_static.head(2)

(8922, 6)


Unnamed: 0,huc_id,huc_name,geometry,Predominant_Snow,Mean Elevation,Mean_Forest_Cover
0,170101010101,Canada,POLYGON ((-115.11965812821985 50.0205547483679...,,2189.891357,
1,170101010102,Canada,POLYGON ((-115.02112744989009 50.1144774042793...,,2100.375244,


In [3]:
# drop nas
df_static_slim = df_static.dropna(subset=["Predominant_Snow"])
print(df_static_slim.shape)
df_static_slim.head(2)

(7857, 6)


Unnamed: 0,huc_id,huc_name,geometry,Predominant_Snow,Mean Elevation,Mean_Forest_Cover
107,170101020101,Upper Pleasant Valley Fisher River,POLYGON ((-114.76310422074341 48.1921956349155...,Montane Forest,1237.956177,38.905585
108,170101020102,Island Creek,POLYGON ((-114.83065259758746 48.2522771362874...,Montane Forest,1240.726685,48.555556


In [4]:
# drop nas all
df_static_slim2 = df_static.dropna()
print(df_static_slim2.shape)
df_static_slim2.head(2)

(7322, 6)


Unnamed: 0,huc_id,huc_name,geometry,Predominant_Snow,Mean Elevation,Mean_Forest_Cover
107,170101020101,Upper Pleasant Valley Fisher River,POLYGON ((-114.76310422074341 48.1921956349155...,Montane Forest,1237.956177,38.905585
108,170101020102,Island Creek,POLYGON ((-114.83065259758746 48.2522771362874...,Montane Forest,1240.726685,48.555556


# Step 2 - Group by Huc 8 Column 

In [5]:
# chhose which df to analyze and plot
df = df_static_slim2

In [6]:
df["huc8"] = df["huc_id"].astype(str).str[:8]
df.head(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["huc8"] = df["huc_id"].astype(str).str[:8]


Unnamed: 0,huc_id,huc_name,geometry,Predominant_Snow,Mean Elevation,Mean_Forest_Cover,huc8
107,170101020101,Upper Pleasant Valley Fisher River,POLYGON ((-114.76310422074341 48.1921956349155...,Montane Forest,1237.956177,38.905585,17010102
108,170101020102,Island Creek,POLYGON ((-114.83065259758746 48.2522771362874...,Montane Forest,1240.726685,48.555556,17010102


In [7]:
snow_counts = df.groupby("huc8")["Predominant_Snow"].value_counts().unstack(fill_value=0)
print(snow_counts.shape)
snow_counts.head(2)

(194, 7)


Predominant_Snow,Boreal Forest,Ephemeral,Maritime,Montane Forest,Ocean,Prairie,Tundra
huc8,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
17010102,0,0,0,22,0,0,0
17010103,0,0,0,18,0,0,0


In [8]:
summary_row = pd.DataFrame(
    snow_counts.sum(numeric_only=True).to_dict(),
    index=["Total"]
)
summary_row

Unnamed: 0,Boreal Forest,Ephemeral,Maritime,Montane Forest,Ocean,Prairie,Tundra
Total,70,3611,417,1982,21,1155,66


# Step 3 - Calculate Percent Ephemeral By Huc08

In [9]:
snow_counts["percent_ephemeral"] = (
    snow_counts["Ephemeral"] / snow_counts.sum(axis=1)
) * 100


In [10]:
snow_counts_sorted = snow_counts.sort_values(by="percent_ephemeral", ascending=False)
print(snow_counts_sorted.shape)
snow_counts_sorted.head(2)

(194, 8)


Predominant_Snow,Boreal Forest,Ephemeral,Maritime,Montane Forest,Ocean,Prairie,Tundra,percent_ephemeral
huc8,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
17070101,0,69,0,0,0,0,0,100.0
17090003,0,55,0,0,0,0,0,100.0


In [11]:
snow_counts_sorted.iloc[73:76]

Predominant_Snow,Boreal Forest,Ephemeral,Maritime,Montane Forest,Ocean,Prairie,Tundra,percent_ephemeral
huc8,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
17070106,0,25,9,0,0,0,0,73.529412
17110007,0,8,3,0,0,0,0,72.727273
17060306,0,43,0,13,0,4,0,71.666667


# Step 4 - Add Color Legend 

In [12]:
def add_color_column(df, color_map_snow):
    df["color_snow_type"] = df["Predominant_Snow"].apply(lambda x: color_map_snow.get(x, "yellow"))
    return df

In [13]:
color_map_snow = st.color_map_standard()
color_map_snow

{'Montane Forest': 'darkgreen',
 'Maritime': 'blue',
 'Ephemeral': '#E6E6FA',
 'Prairie': 'lightgreen',
 'Tundra': 'gray',
 'Boreal Forest': 'red'}

In [14]:
df_snow = add_color_column(df, color_map_snow)
df_snow.head(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["color_snow_type"] = df["Predominant_Snow"].apply(lambda x: color_map_snow.get(x, "yellow"))


Unnamed: 0,huc_id,huc_name,geometry,Predominant_Snow,Mean Elevation,Mean_Forest_Cover,huc8,color_snow_type
107,170101020101,Upper Pleasant Valley Fisher River,POLYGON ((-114.76310422074341 48.1921956349155...,Montane Forest,1237.956177,38.905585,17010102,darkgreen
108,170101020102,Island Creek,POLYGON ((-114.83065259758746 48.2522771362874...,Montane Forest,1240.726685,48.555556,17010102,darkgreen


# Step 5 - Map 

In [15]:
# Convert the 'geometry' column from WKT strings to shapely geometries
df_snow['geometry'] = df_snow['geometry'].apply(wkt.loads)

# Now create the GeoDataFrame
gdf_snow = gpd.GeoDataFrame(df_snow, geometry='geometry')

# Optionally set CRS
gdf_snow.set_crs("EPSG:4326", inplace=True)  # or your correct CRS


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_snow['geometry'] = df_snow['geometry'].apply(wkt.loads)


Unnamed: 0,huc_id,huc_name,geometry,Predominant_Snow,Mean Elevation,Mean_Forest_Cover,huc8,color_snow_type
107,170101020101,Upper Pleasant Valley Fisher River,"POLYGON ((-114.7631 48.1922, -114.76311 48.192...",Montane Forest,1237.956177,38.905585,17010102,darkgreen
108,170101020102,Island Creek,"POLYGON ((-114.83065 48.25228, -114.8322 48.25...",Montane Forest,1240.726685,48.555556,17010102,darkgreen
109,170101020103,Pleasant Valley Creek,"POLYGON ((-114.83106 48.22599, -114.83134 48.2...",Montane Forest,1214.646973,31.068212,17010102,darkgreen
110,170101020104,Pleasant Valley Fisher River-Pearsons Reservoir,"POLYGON ((-114.86144 48.13665, -114.86246 48.1...",Montane Forest,1262.054932,45.190845,17010102,darkgreen
111,170101020105,Pleasant Valley Fisher River-Barnum Creek,"POLYGON ((-115.07777 48.2157, -115.07834 48.21...",Montane Forest,1359.265869,50.988594,17010102,darkgreen
...,...,...,...,...,...,...,...,...
8917,171200090706,Stonehouse Canyon,"POLYGON ((-118.28659 42.93105, -118.28816 42.9...",Ephemeral,1394.948364,1.166363,17120009,#E6E6FA
8918,171200090801,Heath Lake,"POLYGON ((-118.15559 42.94945, -118.15592 42.9...",Ephemeral,1512.737549,0.002574,17120009,#E6E6FA
8919,171200090802,Sunrise Valley,"POLYGON ((-118.09958 43.05696, -118.09963 43.0...",Ephemeral,1328.579956,0.000743,17120009,#E6E6FA
8920,171200090803,Sheepshead Canyon Creek-Frontal Folly Farm Flat,"POLYGON ((-118.14632 42.9846, -118.14703 42.98...",Ephemeral,1420.760498,0.015180,17120009,#E6E6FA


In [18]:
gdf_snow_slim = gdf_snow[gdf_snow["huc8"].isin(['17100102', '17110018', '17110020'])]

In [20]:

gdf_snow_slim.explore(color=gdf_snow["color_snow_type"], legend=True)