In [1]:
import pandas as pd  # provides interface for interacting with tabular data
import geopandas as gpd  # combines the capabilities of pandas and shapely for geospatial operations
from shapely.geometry import Point, Polygon, MultiPolygon  # for manipulating text data into geospatial shapes
from shapely import wkt  # stands for "well known text," allows for interchange across GIS programs
import rtree  # supports geospatial join
import os
import fnmatch
import numpy as np
import matplotlib.pyplot as plt
import descartes
import sys
import sklearn
import pandasql as ps
from datetime import datetime as dt, timedelta, date
sys.path.append('C:/Users/jades/1001 Intro to Data Science Notebooks/Project/wildfires-1001/code/functions/')
from gis_processing import *

pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [2]:
#Import the mapping df
gitdir = 'C:/Users/jades/1001 Intro to Data Science Notebooks/Project/wildfires-1001/'

subdir_from = 'data/clean_data/mapping_tables/'
filename_from = 'grid_neighbor_map.csv'

grid_neighbor_map = pd.read_csv(gitdir + subdir_from + filename_from)

In [4]:
# Import main df
data_dir = 'C:/Users/jades/1001 Intro to Data Science Notebooks/Project/wildfires-1001/data'
df_import = {}
target_full = pd.DataFrame()
for i in np.arange(1, 5):
    df_import[i] = pd.read_pickle(os.path.join(data_dir, f'clean_data/target_full_{i}.pkl')) 
    target_full = target_full.append(df_import[i])

In [154]:
# Remove unneeded columns and convert to a regular df
target_full = pd.DataFrame(target_full[['date', 'month_id', 'GRID_ID', 'FIRE_KEY', 'YEAR', 'end_date']])

## Create count of nearby grid fires

In [155]:
# Drop rows without fires
target_clean = target_full.drop(target_full[target_full['Y_bin'] == 0].index, axis=0)

KeyError: 'Y_bin'

In [None]:
# Select distinct month/grids
q1 = """SELECT DISTINCT month_id, GRID_ID FROM target_clean """

df_unique = ps.sqldf(q1, locals())

In [None]:
# Select distinct month/grids/fires
q2 = """SELECT DISTINCT month_id, GRID_ID, FIRE_KEY FROM target_clean """

df_unique_fires = ps.sqldf(q2, locals())

In [None]:
#Create a new month key column that is the create month + 1 for joining purposes
conditions = [
    (df_unique.month_id.map(lambda x: x[5:]).astype(int) == 12),
    (df_unique.month_id.map(lambda x: x[5:]).astype(int) != 12)
    ]

# Create a list of the values we want to assign for each condition
values = [(df_unique.month_id.map(lambda x: x[:4]).astype(int)+1).astype(str) + '_1', 
          df_unique.month_id.map(lambda x: x[:4]) + '_' + (df_unique.month_id.map(lambda x: x[5:]).astype(int)+1).astype(str)]

# Create a new column and use np.select to assign values to it using our lists as arguments
df_unique['month_id_key'] = np.select(conditions, values)

In [None]:
#Create a new month key column that is the create month + 1 for joining purposes
conditions = [
    (df_unique_fires.month_id.map(lambda x: x[5:]).astype(int) == 12),
    (df_unique_fires.month_id.map(lambda x: x[5:]).astype(int) != 12)
    ]

# Create a list of the values we want to assign for each condition
values = [(df_unique_fires.month_id.map(lambda x: x[:4]).astype(int)+1).astype(str) + '_1', 
          df_unique_fires.month_id.map(lambda x: x[:4]) + '_' + (df_unique_fires.month_id.map(lambda x: x[5:]).astype(int)+1).astype(str)]

# Create a new column and use np.select to assign values to it using our lists as arguments
df_unique_fires['month_id_key'] = np.select(conditions, values)

In [None]:
# Join on grid adj mapping
df_merge = df_unique.merge(grid_neighbor_map, how='inner', left_on='GRID_ID', right_on='GRID_ID')

#Join back to the fire count df joining grids with the counts from its neighbors
df_merge = df_merge.merge(df_unique_fires, how='inner', left_on=('GRID_ID_adj', 'month_id'), right_on=('GRID_ID', 'month_id_key'))

#Join back to the fire count df joining grids with the counts from its neighbors
df_merge = df_merge.merge(df_unique_fires, how='left', left_on=('GRID_ID_x', 'month_id_x', 'FIRE_KEY'), right_on=('GRID_ID', 'month_id', 'FIRE_KEY'))

In [None]:
# Group the adjacent fires by central grid and month
# Sum the multiplier column rather than doing a count to adjust for central grids on the edge
df_features = df_merge[['month_id_x', 'GRID_ID_x', 'multiplier']].groupby(['month_id_x', 'GRID_ID_x']).sum().reset_index()

# Create a column for the binary flag
df_features['adj_fire_bcount'] = 1

# Clean up the df
df_features = df_features.rename(columns={'month_id_x': 'month_id', 'GRID_ID_x': 'GRID_ID', 'multiplier': 'adj_fire_count'})

## Rejoin New Features to the Main DF

In [162]:
# Select distinct month/grids
q3 = """SELECT DISTINCT month_id, GRID_ID FROM target_full """

df_final = ps.sqldf(q3, locals())

In [None]:
# Do a left join of the features onto the main DF
df_final = df_final.merge(df_features, how='left', on=('GRID_ID', 'month_id'))

# Fill any NAs (places without fires) with 0s
df_final = df_final['adj_fire_count'].fillna(0)
df_final = df_final['adj_fire_bcount'].fillna(0)

## Export

In [None]:
#Save the clean df
subdir_to = 'data/clean_data/engineered_features/'
filename_to = 'adj_fire_final.csv'
df_final.to_csv(gitdir + subdir_to + filename_to, index=False)