In [33]:
import pandas as pd  # provides interface for interacting with tabular data
import geopandas as gpd  # combines the capabilities of pandas and shapely for geospatial operations
from shapely.geometry import Point, Polygon, MultiPolygon  # for manipulating text data into geospatial shapes
from shapely import wkt  # stands for "well known text," allows for interchange across GIS programs
import rtree  # supports geospatial join
import os
import fnmatch
import numpy as np
import matplotlib.pyplot as plt
import descartes
import sys
import sklearn
from datetime import datetime as dt, timedelta, date
sys.path.append('C:/Users/jades/1001 Intro to Data Science Notebooks/Project/wildfires-1001/code/functions/')
from gis_processing import *

pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [10]:
#Import the mapping df
gitdir = 'C:/Users/jades/1001 Intro to Data Science Notebooks/Project/wildfires-1001/'

subdir_from = 'data/clean_data/mapping_tables/'
filename_from = 'grid_neighbor_map.csv'

grid_neighbor_map = pd.read_csv(gitdir + subdir_from + filename_from)

In [23]:
#Import the fires data
data_dir = 'C:/Users/jades/1001 Intro to Data Science Notebooks/Project/wildfires-1001/data/clean_data/'

target_full = pd.read_csv(data_dir + 'target_sub_adj.csv', index_col=0)

#target_full_b = pd.read_pickle(data_dir + 'clean_data/target_full_1.pkl')
#target_full = pd.read_pickle(data_dir + 'clean_data/target_full_2.pkl')

## Create binary count of nearby grid fires

In [24]:
#Create a df that tracks the binary count of fires in each grid for each month
#This uses target_full df generated by the Option 1 above
fire_bcount = target_full[['month_id', 'GRID_ID', 'Y_bin']].groupby(['month_id', 'GRID_ID']).max().reset_index()

In [25]:
#Create a new month key column that is the create month + 1 for joining purposes
conditions = [
    (fire_bcount.month_id.map(lambda x: x[5:]).astype(int) == 12),
    (fire_bcount.month_id.map(lambda x: x[5:]).astype(int) != 12)
    ]

# create a list of the values we want to assign for each condition
values = [(fire_bcount.month_id.map(lambda x: x[:4]).astype(int)+1).astype(str) + '_1', 
          fire_bcount.month_id.map(lambda x: x[:4]) + '_' + (fire_bcount.month_id.map(lambda x: x[5:]).astype(int)+1).astype(str)]

# create a new column and use np.select to assign values to it using our lists as arguments
fire_bcount['month_id_key'] = np.select(conditions, values)

In [26]:
#Join on grid adj mapping
fire_bcount_merge = fire_bcount.merge(grid_neighbor_map, how='left', left_on='GRID_ID', right_on='GRID_ID')

#Join back to the fire count df joining grids with the counts from its neighbors
fire_bcount_final = fire_bcount_merge.merge(fire_bcount, how='inner', left_on=('GRID_ID_adj', 'month_id'), right_on=('GRID_ID', 'month_id_key'))

#Normalize the grids on the edges using our multiplier field
fire_bcount_final['adj_fire_bcount'] = fire_bcount_final['Y_bin_y']*fire_bcount_final['multiplier']

#Do the group by to calculate the number of adjacent fires in the last month
fire_bcount_final = fire_bcount_final[['month_id_x', 'GRID_ID_x', 'adj_fire_bcount']].groupby(['month_id_x', 'GRID_ID_x']).sum().reset_index()
fire_bcount_final = fire_bcount_final.rename(columns={'month_id_x':'month_id','GRID_ID_x':'GRID_ID'})

## Create count of nearby grid fires

In [27]:
#Create a df that tracks the count of fires in each grid for each month
#This uses target_full df generated by the Option 1 above
fire_count = target_full[['month_id', 'GRID_ID', 'Y_fire_count']].groupby(['month_id', 'GRID_ID']).max().reset_index()

In [28]:
#Create a new month key column that is the create month + 1 for joining purposes
conditions = [
    (fire_count.month_id.map(lambda x: x[5:]).astype(int) == 12),
    (fire_count.month_id.map(lambda x: x[5:]).astype(int) != 12)
    ]

# create a list of the values we want to assign for each condition
values = [(fire_count.month_id.map(lambda x: x[:4]).astype(int)+1).astype(str) + '_1', 
          fire_count.month_id.map(lambda x: x[:4]) + '_' + (fire_count.month_id.map(lambda x: x[5:]).astype(int)+1).astype(str)]

# create a new column and use np.select to assign values to it using our lists as arguments
fire_count['month_id_key'] = np.select(conditions, values)

In [29]:
#Join on grid adj mapping
fire_count_merge = fire_count.merge(grid_neighbor_map, how='left', left_on='GRID_ID', right_on='GRID_ID')

#Join back to the fire count df joining grids with the counts from its neighbors
fire_count_final = fire_count_merge.merge(fire_count, how='inner', left_on=('GRID_ID_adj', 'month_id'), right_on=('GRID_ID', 'month_id_key'))

#Normalize the grids on the edges using our multiplier field
fire_count_final['adj_fire_count'] = fire_count_final['Y_fire_count_y']*fire_count_final['multiplier']

#Do the group by to calculate the number of adjacent fires in the last month
fire_count_final = fire_count_final[['month_id_x', 'GRID_ID_x', 'adj_fire_count']].groupby(['month_id_x', 'GRID_ID_x']).sum().reset_index()
fire_count_final = fire_count_final.rename(columns={'month_id_x':'month_id','GRID_ID_x':'GRID_ID'})

## Merge and Export

In [30]:
#Merge our two attribute dataframes together
adj_fire_final = fire_count_final.merge(fire_bcount_final, how='inner', on=('month_id', 'GRID_ID'))

In [36]:
#Save the clean df
subdir_to = 'data/clean_data/engineered_features/'
filename_to = 'adj_fire_final.csv'
adj_fire_final.to_csv(gitdir + subdir_to + filename_to, index=False)