# Post-Processing & Visualization 

gl<br>
09.29.20<br>

The purpose of this notebook is to showcase how data should be processed following `make stilt_output_conversion`. The big picture idea is that all simulations points over 0 have been collected and added to a single dataframe. The latitude and longitude should remain consistent for points as the grid should be constant over multiple simulations. Ideally (and to reduce reducancy) each of these points should be unique on a day to day basis with each chemical release as a seperate column entry. 

In [3]:
#Load Libraries

#Basic py: 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

#Geo
import geopandas as gpd
import fiona
from shapely.geometry import Point
import descartes
import contextily as ctx #Basemaps 

## Load Data from CSV into a Geopandas Dataframe: 092120_hysplit_v_stilt.csv

Let's examine a basic table of information 

In [4]:
def csv_to_geodataframe(path,epsg):
    
    #load the data
    sim_df = pd.read_csv(path)

    #Adjust the datetime
    sim_df['ss_date'] = pd.to_datetime(sim_df['ss_date'])

    #convert to a geodataframe
    sim_gdf = gpd.GeoDataFrame(sim_df, geometry=gpd.points_from_xy(sim_df.lon, sim_df.lat)).set_crs(epsg=4326)
    return sim_gdf.to_crs(epsg=epsg)

In [22]:
#Load the data:
hysplit_v_stilt_gdf = csv_to_geodataframe('/home/boogie2/Hanson_Lab/TRI_STILT/data/processed/stilt_output/shapefile/092120_hysplit_v_stilt.csv',3857)

#We expect four simulations to be present let's check
len(hysplit_v_stilt_gdf['id'].drop_duplicates()) == 4

#Was there any simulation overlap (added chemicals which were irrelevant?)
hysplit_v_stilt_gdf.drop_duplicates(subset=['zagl','Chemical'])

Unnamed: 0,lat,lon,foot,lbsperday,id,TRI_source_lati,TRI_source_long,zagl,Chemical,Release (lbs/year),YEAR,ss_name,ss_path,ss_date,geometry
0,41.095,-112.025,0.260448,1.26228,7,41.093219,-112.028221,0.0,STYRENE,1769.0,1990,199001010000_-112.028221_41.093219_0_foot,data/processed/stilt_output/netcdf/092120_hysp...,1970-01-01 00:03:19.001010,POINT (-12470565.956 5026364.257)
1,41.095,-112.025,0.260448,1.26228,7,41.093219,-112.028221,0.0,DICHLOROMETHANE,36000.0,1990,199001010000_-112.028221_41.093219_0_foot,data/processed/stilt_output/netcdf/092120_hysp...,1970-01-01 00:03:19.001010,POINT (-12470565.956 5026364.257)
30916,40.725,-112.015,2e-06,2.535514e-08,59,40.72557,-112.00081,0.0,"SULFURIC ACID (1994 AND AFTER ACID AEROSOLS"" O...",5.0,1990,199001010000_-112.00081_40.72557_0_foot,data/processed/stilt_output/netcdf/092120_hysp...,1970-01-01 00:03:19.001010,POINT (-12469452.761 4971863.456)
60723,41.665,-112.435,0.218712,5.992096,69,41.663,-112.432,0.0,"1,4-DIOXANE",10000.0,1990,199001010000_-112.432_41.663_0_foot,data/processed/stilt_output/netcdf/092120_hysp...,1970-01-01 00:03:19.001010,POINT (-12516206.947 5110929.414)


# Visualize the Simulation

In [45]:
#Functions to help with plotting
def geoplot(geo_df, ax, title, colormap_column, legend_boolean=True,vmin=0,vmax=10,alpha=0.7, markersize=5, cmap='YlOrRd'):

    """A plotting function for geographic areas
    
    ===
    Inputs:
    geo_df: A geodataframe with geometry column for plotting. For added background map, please ensure EPSG is set to 3857
    ax: The axis for plotting
    title: the title of the plot
    
    Returns:
    none
    ===
    """    
    #Plot
    geo_df.plot(cmap=cmap,
                ax=ax,
                column=colormap_column,
                vmin=vmin,
                vmax=vmax,
                legend=legend_boolean,
                alpha = alpha,
                markersize=markersize,
                legend_kwds={'label': "Mean Pollutant Concentration (lbs/km^2)",'orientation': "vertical"})

    #Add title
    ax.set_title(title)

    #Plot a background map if the EPSG is configured correctly
    if geo_df.crs == 3857:
        ctx.add_basemap(ax)

    #Let's remove the axis ticks too
    ax.set_axis_off()
    

### Base Visualization

In [62]:
#Let's visualize all simulations without any filtering
fig,ax = plt.subplots(1,1,figsize=(20,20)) 
geoplot(hysplit_v_stilt_gdf[hysplit_v_stilt_gdf.lbsperday>0.001], ax, 'STILT simulations: 01.01.1990', 'lbsperday')

#Removing axis also removed the background color let's add that back
fig.patch.set_facecolor('xkcd:white')
#make a directory if it doesn't exist
try:
    os.mkdir('/home/boogie2/Hanson_Lab/TRI_STILT/figures/hysplit_v_stilt')
except:
    print('directory exists')

#save the figure and close the plot
plt.savefig('/home/boogie2/Hanson_Lab/TRI_STILT/figures/hysplit_v_stilt/all_simulations.png')
plt.close()

directory exists


### Adding the TRI origin sources

In [55]:
#Let's add the original source points to the plot
#Gather the data
origin_tri = hysplit_v_stilt_gdf[['TRI_source_lati','TRI_source_long']].drop_duplicates()
origin_tri = gpd.GeoDataFrame(origin_tri, geometry=gpd.points_from_xy(origin_tri.TRI_source_long, origin_tri.TRI_source_lati)).set_crs(epsg=4326)
origin_tri = origin_tri.to_crs(epsg=3857)

In [69]:
from matplotlib.lines import Line2D
fig,ax = plt.subplots(1,1,figsize=(20,20)) 
geoplot(hysplit_v_stilt_gdf[hysplit_v_stilt_gdf.lbsperday>0.001], ax, 'STILT simulations: 01.01.1990', 'lbsperday')

#Add the original source locations
origin_tri.plot(ax = ax,color='purple',marker='^',markersize=60) #Arbritrary setting based upon the figsize

#Removing axis also removed the background color let's add that back
fig.patch.set_facecolor('xkcd:white')

#Add Legend
legend_elements = [Line2D([], [], marker='^', color='w', label='TRI Emission Site',markerfacecolor='purple', markersize=15)]
ax.legend(handles=legend_elements, loc='lower right')

#make a directory if it doesn't exist
try:
    os.mkdir('/home/boogie2/Hanson_Lab/TRI_STILT/figures/hysplit_v_stilt')
except:
    print('directory exists')

#save the figure and close the plot
plt.savefig('/home/boogie2/Hanson_Lab/TRI_STILT/figures/hysplit_v_stilt/all_simulations_with_source.png')
plt.close()

directory exists
