In [32]:
import pandas as pd  # provides interface for interacting with tabular data
import geopandas as gpd  # combines the capabilities of pandas and shapely for geospatial operations
from shapely.geometry import Point, Polygon, MultiPolygon  # for manipulating text data into geospatial shapes
from shapely import wkt  # stands for "well known text," allows for interchange across GIS programs
import rtree  # supports geospatial join
import os
import fnmatch
import numpy as np
import matplotlib.pyplot as plt
import descartes
import sys
import sklearn
import pickle
from shapely.ops import nearest_points
from datetime import datetime as dt, date
sys.path.append('/Users/saraprice/Documents/NYU/Fall_2020/DS_GA_1001/final_project/wildfires-1001/code/functions/')
from gis_processing import *

In [5]:
data_dir = '/Users/saraprice/Documents/NYU/Fall_2020/DS_GA_1001/final_project/wildfires-1001/data'

In [6]:
weather = pd.read_pickle(os.path.join(data_dir, 'clean_data/ERA_weather-data/ERA5_CAgrid_gdf.pkl'))

In [76]:
weather_1980_90 = pd.read_pickle(os.path.join(data_dir, 'clean_data/ERA_weather-data/ERA5_CAgrid1980_gdf.pkl'))

In [54]:
weather['month'] = weather['date'].apply(lambda x:x.month)
weather['year'] = weather['date'].apply(lambda x:x.year)

In [86]:
weather_1980_90['month'] = weather_1980_90['date'].apply(lambda x:x.month)
weather_1980_90['year'] = weather_1980_90['date'].apply(lambda x:x.year)
weather_1980_90['month_id'] = weather_1980_90['year'].astype(str) + "_" + weather_1980_90['month'].astype(str)

In [37]:
rename_weather = {'10 metre U wind component': 'U_wind_10m',
                  '10 metre V wind component': 'V_wind_10m',
                  '2 metre dewpoint temperature': '2m_dewpoint_tmp',
                  '2 metre temperature': '2m_tmp',
                  'Leaf area index, high vegetation':'leaf_high_veg',
                  'Leaf area index, low vegetation':'leaf_low_veg',
                  'Surface pressure' : 'surface_pressure',
                  'Total precipitation' : 'tot_prcp'}
orig_keys = list(rename_weather.keys())
for key in orig_keys:
    for i in [0, 6, 12, 18]:
        rename_weather[key + f" hrs:{i}"] = rename_weather[key]+f"_{i}hrs"
for key in orig_keys:
    del rename_weather[key]

In [79]:
try:
    weather.rename(columns= rename_weather, inplace = True)
except:
    pass
try:
    weather_1980_90.rename(columns= rename_weather, inplace = True)
except:
    pass

In [87]:
for i in weather.columns:
    if i not in weather_1980_90.columns:
        print(i)

In [99]:
weather_full = weather.append(weather_1980_90)

In [38]:
with open(os.path.join(data_dir, 'clean_data/ERA_weather-data/ERA_rename_dictionary.pkl'), 'wb') as handle:
    pickle.dump(rename_weather, handle)

In [41]:
weather.rename(columns= rename_weather, inplace = True)


In [137]:
w_cols = list(rename_weather.values())
print(w_cols)

['U_wind_10m_0hrs', 'U_wind_10m_6hrs', 'U_wind_10m_12hrs', 'U_wind_10m_18hrs', 'V_wind_10m_0hrs', 'V_wind_10m_6hrs', 'V_wind_10m_12hrs', 'V_wind_10m_18hrs', '2m_dewpoint_tmp_0hrs', '2m_dewpoint_tmp_6hrs', '2m_dewpoint_tmp_12hrs', '2m_dewpoint_tmp_18hrs', '2m_tmp_0hrs', '2m_tmp_6hrs', '2m_tmp_12hrs', '2m_tmp_18hrs', 'leaf_high_veg_0hrs', 'leaf_high_veg_6hrs', 'leaf_high_veg_12hrs', 'leaf_high_veg_18hrs', 'leaf_low_veg_0hrs', 'leaf_low_veg_6hrs', 'leaf_low_veg_12hrs', 'leaf_low_veg_18hrs', 'surface_pressure_0hrs', 'surface_pressure_6hrs', 'surface_pressure_12hrs', 'surface_pressure_18hrs', 'tot_prcp_0hrs', 'tot_prcp_6hrs', 'tot_prcp_12hrs', 'tot_prcp_18hrs']


In [160]:
def hist_weather_data(df, w_cols, n_years, min_year, max_year):
    w_cols_new = [f"hist_{col}_{n_years}y" for col in w_cols]
    output_df = pd.DataFrame()
    for g in df.GRID_ID.unique():
        df_sub = df[df['GRID_ID']==g]
        for m in range(1, 13):
            df_sub = df_sub[df_sub['month']==m]
            for y in np.arange(min_year, max_year +1):
                time_sub = df_sub[(df_sub['year']>=y-n_years) & (df_sub['year']<y)]
                hist_means= time_sub[w_cols].mean(axis = 0)
                hist_means = hist_means.to_dict()
                hist_means['GRID_ID'] = time_sub.GRID_ID
                hist_means['year'] = time_sub.year
                hist_means['month'] = time_sub.month
                new_df = pd.DataFrame(hist_means, columns = list(hist_means.keys()))
                output_df = output_df.append(new_df)
    return output_df

In [None]:
hist_weather_1yr = hist_weather_data(weather_full, w_cols, 1, 1990, 2019)

In [146]:
hist_weather_5yr = hist_weather_data(weather_full, w_cols, 5, 1990, 2019)

In [147]:
hist_weather_10yr = hist_weather_data(weather_full, w_cols, 5, 1990, 2019)

In [153]:
hist_weather_1yr.shape

(4020, 34)

In [157]:
hist_weather_1yr['year'].value_counts()

2017    134
2015    134
1992    134
1994    134
1996    134
1998    134
2000    134
2002    134
2004    134
2006    134
2008    134
2010    134
2012    134
2014    134
2016    134
2018    134
1989    134
1991    134
1993    134
1995    134
1997    134
1999    134
2001    134
2003    134
2005    134
2007    134
2009    134
2011    134
2013    134
1990    134
Name: year, dtype: int64