# User Input

In [None]:
# Directory to zonal_stats files run from `submit_zonal_stats.sh`
directory_canadaAlaska = "/home/khb47/ABoVE/scripts/zonal_stats_CanadaAlaska_*.txt"
directory_epa2 = "/home/khb47/ABoVE/scripts/zonal_stats_EPA2_*.txt"

# Stats one wants to visualize:
visualize = 'CanadaAlaska' # Can be CanadaAlaska or EPA2

# Set up
Import the libraries, load the shapefiles and create tables for the zonal statistics

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib import colors
import glob
import os

In [None]:
# Load the shapefiles
epa_shapefile = "/projects/arctic/share/ABoVE_Biomass/OtherSpatialDatasets/EPA_ecoregion_lvl2_clipped_102001.shp"
canada_shapefile = "/projects/arctic/share/ABoVE_Biomass/OtherSpatialDatasets/CanadaAlaska_Boundaries_102001.shp"
epa_gdf = gpd.read_file(epa_shapefile)
canada_gdf = gpd.read_file(canada_shapefile)

In [None]:
# Make the Canada Alaska zonal stats data
canada_alaska = pd.DataFrame()
for file in glob.glob(directory_canadaAlaska):
    base_name = os.path.basename(file) 
    dataset_name = base_name[len("zonal_stats_CanadaAlaska_"):-len(".txt")]
    dataset_canada_alaska = pd.read_csv(file, delimiter=', ', engine='python')
    dataset_canada_alaska['dataset'] = dataset_name.split('_')[0]
    canada_alaska = pd.concat([canada_alaska, dataset_canada_alaska], ignore_index=True)
canada_alaska.head()

In [None]:
# Make the EPA2 zonal stats data
epa2 = pd.DataFrame()
for file in glob.glob(directory_epa2):
    base_name = os.path.basename(file) 
    dataset_name = base_name[len("zonal_stats_EPA2_"):-len(".txt")]
    dataset_epa2 = pd.read_csv(file, delimiter=', ', engine='python')
    dataset_epa2['dataset'] = dataset_name.split('_')[0]
    epa2 = pd.concat([epa2, dataset_epa2], ignore_index=True)
epa2.head()

# Visualization of Zonal Stats
In the cell below you can change which type of file you are looking at, either CanadaAlaska, or EPA2

In [None]:
# Change these depending on the region you want
if visualize == 'CanadaAlaska':
    df = canada_alaska.copy()
    shapefile = canada_gdf.copy()
    key = 'postal'
elif visualize == 'EPA2'
    df = epa2.copy()
    shapefile = epa_gdf.copy()
    key = 'NA_L2KEY'

## Bar chart of mean values across a dataset

In [None]:
df['Mean'] = pd.to_numeric(df['Mean'], errors='coerce')

# Group by Zone and Dataset, then plot the mean values
grouped_mean = df.groupby(['Zone', 'dataset'])['Mean'].mean().unstack()
grouped_mean.plot(kind='bar', figsize=(12, 6))
plt.title('Mean Values Across Datasets by Zone')
plt.xlabel('Zone')
plt.ylabel('Mean Value')
plt.legend(title='Dataset', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

## Boxplot of Distribution by dataset

In [None]:
sns.boxplot(x='dataset', y='Mean', data=df, hue='dataset', palette='mako')
plt.title("Distribution of Mean Values by Dataset")
plt.ylabel("Mean Value")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## Choropleth Maps

In [None]:
# Merge DataFrame with GeoDataFrame
df_geo = shapefile.merge(df, left_on=key, right_on='Zone')
df_geo[['Mean', 'Median', 'Sum', 'Std']] = df_geo[['Mean', 'Median', 'Sum', 'Std']].round(2)

# Calculate bounds for scaling
global_min = df_geo['Mean'].min()
global_max = df_geo['Mean'].max()

for dataset in df['dataset'].unique():
    df_subset = df_geo[df_geo['dataset'] == dataset]
    ax = df_subset.plot(
        column='Mean',
        cmap='viridis',
        legend=True,
        figsize=(12, 8),
        # Ensures the same color scale
        vmin=global_min,
        vmax=global_max 
    )
    ax.set_title(f'Choropleth Map of Mean Values ({dataset})', fontsize=14)
    ax.set_axis_off()

    # Add labels of the mean value
    # Should I add a +/- for the standard deviation?
    for x, y, mean, std in zip(df_subset.geometry.centroid.x, 
                           df_subset.geometry.centroid.y, 
                           df_subset['Mean'].round(2),
                           df_subset['Std'].round(2)):
        if not pd.isnull(mean):
            ax.annotate(
                text=f'{mean}', 
                xy=(x, y), 
                ha='center', 
                fontsize=8, 
                color='black',
                bbox=dict(boxstyle="round,pad=0.3", edgecolor='none', facecolor='white', alpha=0.6)
            )
    plt.show()

## Summary Table using a Heatmap

In [None]:
summary_table = df.groupby(['Zone', 'dataset']).agg(
    Mean=('Mean', 'mean'),
    Median=('Median', 'mean'),
    Sum=('Sum', 'sum'),
    Std=('Std', 'mean')
)

# Could print out summary table
#print(summary_table)

# Visualize using a heatmap
summary_pivot = summary_table.reset_index().pivot(index='Zone', columns='dataset', values='Mean')
sns.heatmap(summary_pivot, cmap='coolwarm', annot=True, fmt=".2f", cbar_kws={'label': 'Mean Value'})
plt.title('Heatmap of Mean Values by Zone and Dataset')
plt.show()
