# Preprocessing the Wildfires File

In [11]:
import sqlite3
import pandas as pd
import numpy as np
import pickle

In [12]:
cnx = sqlite3.connect('../us_wildfire_dataset/FPA_FOD_20170508.sqlite')
df = pd.read_sql_query("SELECT DISCOVERY_DATE, LATITUDE, LONGITUDE, FIRE_SIZE, STATE FROM fires", cnx)

In [13]:
df.shape

(1880465, 5)

In [14]:
# filter california fires
df = df[df["STATE"] == "CA"]

In [15]:
df

Unnamed: 0,DISCOVERY_DATE,LATITUDE,LONGITUDE,FIRE_SIZE,STATE
0,2453403.5,40.036944,-121.005833,0.10,CA
1,2453137.5,38.933056,-120.404444,0.25,CA
2,2453156.5,38.984167,-120.735556,0.10,CA
3,2453184.5,38.559167,-119.913333,0.10,CA
4,2453184.5,38.559167,-119.933056,0.10,CA
5,2453186.5,38.635278,-120.103611,0.10,CA
6,2453187.5,38.688333,-120.153333,0.10,CA
7,2453437.5,40.968056,-122.433889,0.80,CA
8,2453444.5,41.233611,-122.283333,1.00,CA
9,2453187.5,38.548333,-120.149167,0.10,CA


In [23]:
# california exists between 32 degrees North and 42 degrees north
# and 126 degrees west and 114 degrees west

# the below code will generate a grid of points that will break cali down

import numpy as np

resolution = 5 # resolution defines the number of grid squares horizontally and vertically
longitude_range = np.linspace(-116, -118, resolution + 1)
latitude_range = np.linspace(34, 36, resolution + 1)

datapoints = {}
for row in range(resolution):
    for col in range(resolution):
        rel = df[df["LATITUDE"] >= latitude_range[row]]
        rel = rel[rel["LATITUDE"] < latitude_range[row + 1]]
        rel = rel[rel["LONGITUDE"] <= longitude_range[col]]
        rel = rel[rel["LONGITUDE"] > longitude_range[col + 1]]
        datapoints[(row, col)] = rel
        
print(datapoints[(0, 2)])


         DISCOVERY_DATE   LATITUDE   LONGITUDE  FIRE_SIZE STATE
7198          2453779.5  34.238056 -117.186389       0.10    CA
7276          2453779.5  34.143889 -116.990000       0.20    CA
10714         2453923.5  34.331111 -116.811667       0.10    CA
11225         2453891.5  34.238056 -117.077500       0.01    CA
11329         2453944.5  34.246944 -116.977778       0.10    CA
11330         2453938.5  34.217500 -116.951111       0.10    CA
11670         2453758.5  34.200000 -117.135000     485.00    CA
11792         2453936.5  34.301944 -116.917778       0.10    CA
11855         2453934.5  34.208333 -117.129444       0.01    CA
11875         2453932.5  34.286667 -117.085278      10.00    CA
11884         2453941.5  34.316389 -117.041389       1.00    CA
11890         2453938.5  34.320278 -117.008611       0.10    CA
12271         2453938.5  34.223056 -117.098611      10.00    CA
12282         2453936.5  34.215278 -117.075833       0.25    CA
12285         2453936.5  34.200556 -117.

In [None]:
with open("../us_wildfire_dataset/ca_fires_raw.pkl", "wb") as f:
    pickle.dump(df, f, protocol=pickle.HIGHEST_PROTOCOL)