In [1]:
# SQLAlchemy
from sqlalchemy import create_engine
from sqlalchemy import inspect

# Pandas
import pandas as pd
from datetime import datetime

# Path to sqlite
database_path = "Resources/FPA_FOD_20170508.sqlite"

import os

import psycopg2

from config import db_password

In [2]:
# Create an engine that can talk to the database
engine = create_engine(f"sqlite:///{database_path}")

In [3]:
# Get the name of the tables. 
inspector = inspect(engine)
inspector.get_table_names()

['ElementaryGeometries',
 'Fires',
 'KNN',
 'NWCG_UnitIDActive_20170109',
 'SpatialIndex',
 'geometry_columns',
 'geometry_columns_auth',
 'geometry_columns_field_infos',
 'geometry_columns_statistics',
 'geometry_columns_time',
 'idx_Fires_Shape',
 'idx_Fires_Shape_node',
 'idx_Fires_Shape_parent',
 'idx_Fires_Shape_rowid',
 'spatial_ref_sys',
 'spatial_ref_sys_aux',
 'spatialite_history',
 'sql_statements_log',
 'sqlite_sequence',
 'views_geometry_columns',
 'views_geometry_columns_auth',
 'views_geometry_columns_field_infos',
 'views_geometry_columns_statistics',
 'virts_geometry_columns',
 'virts_geometry_columns_auth',
 'virts_geometry_columns_field_infos',
 'virts_geometry_columns_statistics']

In [4]:
# Using the inspector to print the column names within the 'Fires' table
columns = inspector.get_columns('Fires')
for column_name in columns:
    print(column_name["name"])


OBJECTID
FOD_ID
FPA_ID
SOURCE_SYSTEM_TYPE
SOURCE_SYSTEM
NWCG_REPORTING_AGENCY
NWCG_REPORTING_UNIT_ID
NWCG_REPORTING_UNIT_NAME
SOURCE_REPORTING_UNIT
SOURCE_REPORTING_UNIT_NAME
LOCAL_FIRE_REPORT_ID
LOCAL_INCIDENT_ID
FIRE_CODE
FIRE_NAME
ICS_209_INCIDENT_NUMBER
ICS_209_NAME
MTBS_ID
MTBS_FIRE_NAME
COMPLEX_NAME
FIRE_YEAR
DISCOVERY_DATE
DISCOVERY_DOY
DISCOVERY_TIME
STAT_CAUSE_CODE
STAT_CAUSE_DESCR
CONT_DATE
CONT_DOY
CONT_TIME
FIRE_SIZE
FIRE_SIZE_CLASS
LATITUDE
LONGITUDE
OWNER_CODE
OWNER_DESCR
STATE
COUNTY
FIPS_CODE
FIPS_NAME
Shape


In [5]:
# Make a connection to the SQL database
conn = engine.connect()

In [6]:
# Filter out fires - include only fires in Oregon
fire_data_df = pd.read_sql("SELECT * FROM Fires WHERE State = 'OR'", conn)
fire_data_df

Unnamed: 0,OBJECTID,FOD_ID,FPA_ID,SOURCE_SYSTEM_TYPE,SOURCE_SYSTEM,NWCG_REPORTING_AGENCY,NWCG_REPORTING_UNIT_ID,NWCG_REPORTING_UNIT_NAME,SOURCE_REPORTING_UNIT,SOURCE_REPORTING_UNIT_NAME,...,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,OWNER_CODE,OWNER_DESCR,STATE,COUNTY,FIPS_CODE,FIPS_NAME,Shape
0,32,32,FS-1418907,FED,FS-FIRESTAT,FS,USORDEF,Deschutes National Forest,0601,Deschutes National Forest,...,A,43.995556,-121.414167,13.0,STATE OR PRIVATE,OR,17,017,Deschutes,b'\x00\x01\xad\x10\x00\x00\x90\xaf\xeb\xb4\x81...
1,36,36,FS-1418916,FED,FS-FIRESTAT,FS,USORDEF,Deschutes National Forest,0601,Deschutes National Forest,...,A,44.043333,-121.386111,5.0,USFS,OR,17,017,Deschutes,b'\x00\x01\xad\x10\x00\x00\x9c\x84_\x0b\xb6X^\...
2,44,44,FS-1418924,FED,FS-FIRESTAT,FS,USORDEF,Deschutes National Forest,0601,Deschutes National Forest,...,A,43.955556,-121.352222,5.0,USFS,OR,17,017,Deschutes,b'\x00\x01\xad\x10\x00\x00\xc4\xf4\x10\xcf\x8a...
3,47,47,FS-1418935,FED,FS-FIRESTAT,FS,USORUMF,Umatilla National Forest,0614,Umatilla National Forest,...,B,44.911111,-119.696111,5.0,USFS,OR,69,069,Wheeler,b'\x00\x01\xad\x10\x00\x00@\xf5\x9c\x15\x8d\xe...
4,49,49,FS-1418943,FED,FS-FIRESTAT,FS,USORDEF,Deschutes National Forest,0601,Deschutes National Forest,...,A,43.725278,-121.574167,5.0,USFS,OR,17,017,Deschutes,b'\x00\x01\xad\x10\x00\x00\x98\x86\x8f%\xbfd^\...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61083,1847484,300273935,SFO-2015ORODF1578108416,NONFED,ST-NASF,ST/C&L,USORORS,Oregon Department of Forestry State Headquarters,ORODF,Oregon Department of Forestry,...,A,43.971670,-124.100280,8.0,PRIVATE,OR,Lane,039,Lane,b'\x00\x01\xad\x10\x00\x00T\x1c\xce\xfcj\x06_\...
61084,1847532,300274008,SFO-2015ORODF1572111916,NONFED,ST-NASF,ST/C&L,USORORS,Oregon Department of Forestry State Headquarters,ORODF,Oregon Department of Forestry,...,A,43.343330,-124.197500,8.0,PRIVATE,OR,Coos,011,Coos,b'\x00\x01\xad\x10\x00\x00p=\n\xd7\xa3\x0c_\xc...
61085,1849328,300276174,SFO-2015ORODF1595505916,NONFED,ST-NASF,ST/C&L,USORORS,Oregon Department of Forestry State Headquarters,ORODF,Oregon Department of Forestry,...,A,44.003330,-121.321670,8.0,PRIVATE,OR,Deschutes,017,Deschutes,b'\x00\x01\xad\x10\x00\x00\xac\x86\xc4=\x96T^\...
61086,1859995,300293828,ICS209_2015_2813572,INTERAGCY,IA-ICS209,BLM,USORPRD,Prineville District,ORPRD,Prineville District,...,E,44.526700,-121.389700,14.0,MISSING/NOT SPECIFIED,OR,Jefferson County,031,Jefferson,b'\x00\x01\xad\x10\x00\x00\x10\xd0D\xd8\xf0X^\...


In [7]:
#What are the data types
fire_data_df.dtypes

OBJECTID                        int64
FOD_ID                          int64
FPA_ID                         object
SOURCE_SYSTEM_TYPE             object
SOURCE_SYSTEM                  object
NWCG_REPORTING_AGENCY          object
NWCG_REPORTING_UNIT_ID         object
NWCG_REPORTING_UNIT_NAME       object
SOURCE_REPORTING_UNIT          object
SOURCE_REPORTING_UNIT_NAME     object
LOCAL_FIRE_REPORT_ID           object
LOCAL_INCIDENT_ID              object
FIRE_CODE                      object
FIRE_NAME                      object
ICS_209_INCIDENT_NUMBER        object
ICS_209_NAME                   object
MTBS_ID                        object
MTBS_FIRE_NAME                 object
COMPLEX_NAME                   object
FIRE_YEAR                       int64
DISCOVERY_DATE                float64
DISCOVERY_DOY                   int64
DISCOVERY_TIME                 object
STAT_CAUSE_CODE               float64
STAT_CAUSE_DESCR               object
CONT_DATE                     float64
CONT_DOY    

In [8]:
# Create a dataFrame with the useful columns for this analysis
oregon_fires_df = fire_data_df[["FOD_ID","FIRE_NAME","FIRE_SIZE","FIRE_SIZE_CLASS", "LATITUDE", "LONGITUDE", "FIRE_YEAR", "DISCOVERY_DATE", "DISCOVERY_DOY", "CONT_DATE", "CONT_DOY", "STAT_CAUSE_CODE", "STAT_CAUSE_DESCR", "FIPS_CODE", "FIPS_NAME"]]
oregon_fires_df.head(20)

Unnamed: 0,FOD_ID,FIRE_NAME,FIRE_SIZE,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,FIRE_YEAR,DISCOVERY_DATE,DISCOVERY_DOY,CONT_DATE,CONT_DOY,STAT_CAUSE_CODE,STAT_CAUSE_DESCR,FIPS_CODE,FIPS_NAME
0,32,0028,0.1,A,43.995556,-121.414167,2005,2453406.5,36,2453407.5,37.0,4.0,Campfire,17,Deschutes
1,36,0044,0.1,A,44.043333,-121.386111,2005,2453434.5,64,2453434.5,64.0,4.0,Campfire,17,Deschutes
2,44,0088,0.1,A,43.955556,-121.352222,2005,2453465.5,95,2453465.5,95.0,4.0,Campfire,17,Deschutes
3,47,EARLY,5.0,B,44.911111,-119.696111,2005,2453440.5,70,2453440.5,70.0,9.0,Miscellaneous,69,Wheeler
4,49,0032,0.1,A,43.725278,-121.574167,2005,2453417.5,47,2453417.5,47.0,4.0,Campfire,17,Deschutes
5,50,0041,0.1,A,44.41,-121.315556,2005,2453431.5,61,2453431.5,61.0,5.0,Debris Burning,17,Deschutes
6,51,HALFWAY 108,0.1,A,42.133889,-121.234167,2005,2453518.5,148,2453518.5,148.0,1.0,Lightning,37,Lake
7,52,PARADISE 104,0.1,A,42.311389,-120.902778,2005,2453507.5,137,2453507.5,137.0,9.0,Miscellaneous,37,Lake
8,53,STUMP 106,0.1,A,42.950833,-120.836111,2005,2453517.5,147,2453517.5,147.0,7.0,Arson,37,Lake
9,56,COTTONWOOD 128,0.1,A,42.386944,-120.808611,2005,2453546.5,176,2453547.5,177.0,1.0,Lightning,37,Lake


In [9]:
# See what data is missing
oregon_fires_df.count()

FOD_ID              61088
FIRE_NAME           56970
FIRE_SIZE           61088
FIRE_SIZE_CLASS     61088
LATITUDE            61088
LONGITUDE           61088
FIRE_YEAR           61088
DISCOVERY_DATE      61088
DISCOVERY_DOY       61088
CONT_DATE           60751
CONT_DOY            60751
STAT_CAUSE_CODE     61088
STAT_CAUSE_DESCR    61088
FIPS_CODE           40178
FIPS_NAME           40178
dtype: int64

In [10]:
# Drop fires that don't have a containment date
oregon_fires_df = oregon_fires_df.dropna(subset=["CONT_DATE","CONT_DOY"], how='all')
oregon_fires_df.count()

FOD_ID              60751
FIRE_NAME           56633
FIRE_SIZE           60751
FIRE_SIZE_CLASS     60751
LATITUDE            60751
LONGITUDE           60751
FIRE_YEAR           60751
DISCOVERY_DATE      60751
DISCOVERY_DOY       60751
CONT_DATE           60751
CONT_DOY            60751
STAT_CAUSE_CODE     60751
STAT_CAUSE_DESCR    60751
FIPS_CODE           39916
FIPS_NAME           39916
dtype: int64

In [11]:
oregon_fires_df.tail()

Unnamed: 0,FOD_ID,FIRE_NAME,FIRE_SIZE,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,FIRE_YEAR,DISCOVERY_DATE,DISCOVERY_DOY,CONT_DATE,CONT_DOY,STAT_CAUSE_CODE,STAT_CAUSE_DESCR,FIPS_CODE,FIPS_NAME
61081,300273801,OLD PORTLAND/KASTER#3,0.01,A,45.85047,-122.81353,2015,2457292.5,270,2457292.5,270.0,9.0,Miscellaneous,9,Columbia
61082,300273869,WINHURST LANE,0.1,A,43.09,-124.39028,2015,2457293.5,271,2457294.5,272.0,9.0,Miscellaneous,11,Coos
61083,300273935,MUNSEL CREEK,0.01,A,43.97167,-124.10028,2015,2457297.5,275,2457297.5,275.0,9.0,Miscellaneous,39,Lane
61084,300274008,MP 240 HWY 101,0.01,A,43.34333,-124.1975,2015,2457299.5,277,2457300.5,278.0,9.0,Miscellaneous,11,Coos
61085,300276174,CHINA HAT 698,0.1,A,44.00333,-121.32167,2015,2457357.5,335,2457360.5,338.0,9.0,Miscellaneous,17,Deschutes


In [12]:
#Explore causes of fires
oregon_fires_df["STAT_CAUSE_DESCR"].value_counts()

Lightning            31329
Campfire              7418
Debris Burning        5067
Miscellaneous         4907
Equipment Use         4720
Smoking               2295
Arson                 1822
Children              1175
Missing/Undefined      808
Fireworks              600
Railroad               314
Powerline              215
Structure               81
Name: STAT_CAUSE_DESCR, dtype: int64

In [13]:
#Explore counties where fires are reported
oregon_fires_df["FIPS_NAME"].value_counts(dropna=False)

NaN           20835
Jackson        3650
Klamath        3541
Douglas        3532
Deschutes      3251
Lane           2944
Grant          2588
Josephine      2530
Lake           1689
Clackamas      1438
Harney         1188
Crook          1184
Wallowa         948
Umatilla        947
Baker           919
Coos            912
Linn            815
Wasco           761
Union           722
Wheeler         678
Malheur         672
Marion          505
Hood River      496
Jefferson       442
Clatsop         414
Morrow          411
Tillamook       409
Curry           395
Columbia        372
Lincoln         339
Yamhill         305
Benton          277
Washington      247
Polk            215
Multnomah        99
Sherman          43
Gilliam          38
Name: FIPS_NAME, dtype: int64

In [14]:
# Convert those Julian!!?? dates into normal dates
oregon_fires_df["DISCOVER_DATE"] = pd.to_datetime(oregon_fires_df["DISCOVERY_DATE"], unit='D', origin='julian')
oregon_fires_df["CONTAINMENT_DATE"] = pd.to_datetime(oregon_fires_df["CONT_DATE"], unit='D', origin='julian')
oregon_fires_df.head()

Unnamed: 0,FOD_ID,FIRE_NAME,FIRE_SIZE,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,FIRE_YEAR,DISCOVERY_DATE,DISCOVERY_DOY,CONT_DATE,CONT_DOY,STAT_CAUSE_CODE,STAT_CAUSE_DESCR,FIPS_CODE,FIPS_NAME,DISCOVER_DATE,CONTAINMENT_DATE
0,32,0028,0.1,A,43.995556,-121.414167,2005,2453406.5,36,2453407.5,37.0,4.0,Campfire,17,Deschutes,2005-02-05,2005-02-06
1,36,0044,0.1,A,44.043333,-121.386111,2005,2453434.5,64,2453434.5,64.0,4.0,Campfire,17,Deschutes,2005-03-05,2005-03-05
2,44,0088,0.1,A,43.955556,-121.352222,2005,2453465.5,95,2453465.5,95.0,4.0,Campfire,17,Deschutes,2005-04-05,2005-04-05
3,47,EARLY,5.0,B,44.911111,-119.696111,2005,2453440.5,70,2453440.5,70.0,9.0,Miscellaneous,69,Wheeler,2005-03-11,2005-03-11
4,49,0032,0.1,A,43.725278,-121.574167,2005,2453417.5,47,2453417.5,47.0,4.0,Campfire,17,Deschutes,2005-02-16,2005-02-16


In [15]:
#Drop the original columns that contained julian dates
oregon_fires_df.drop(["DISCOVERY_DATE", "CONT_DATE"], axis=1, inplace=True)
oregon_fires_df.head()

Unnamed: 0,FOD_ID,FIRE_NAME,FIRE_SIZE,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,FIRE_YEAR,DISCOVERY_DOY,CONT_DOY,STAT_CAUSE_CODE,STAT_CAUSE_DESCR,FIPS_CODE,FIPS_NAME,DISCOVER_DATE,CONTAINMENT_DATE
0,32,0028,0.1,A,43.995556,-121.414167,2005,36,37.0,4.0,Campfire,17,Deschutes,2005-02-05,2005-02-06
1,36,0044,0.1,A,44.043333,-121.386111,2005,64,64.0,4.0,Campfire,17,Deschutes,2005-03-05,2005-03-05
2,44,0088,0.1,A,43.955556,-121.352222,2005,95,95.0,4.0,Campfire,17,Deschutes,2005-04-05,2005-04-05
3,47,EARLY,5.0,B,44.911111,-119.696111,2005,70,70.0,9.0,Miscellaneous,69,Wheeler,2005-03-11,2005-03-11
4,49,0032,0.1,A,43.725278,-121.574167,2005,47,47.0,4.0,Campfire,17,Deschutes,2005-02-16,2005-02-16


In [16]:
# Calculate the number of days that the fires burned
oregon_fires_df["DURATION"] = (oregon_fires_df["CONTAINMENT_DATE"] - oregon_fires_df["DISCOVER_DATE"]).dt.days
oregon_fires_df["DURATION"].value_counts()

0       42330
1       11198
2        2458
3        1449
4         706
        ...  
262         1
274         1
118         1
207         1
2557        1
Name: DURATION, Length: 132, dtype: int64

In [17]:
# Extract the discovery_date_month and containment_date_month
oregon_fires_df["discovery_month"] = oregon_fires_df["DISCOVER_DATE"].dt.strftime('%m').astype('int')
oregon_fires_df["containment_month"] = oregon_fires_df["CONTAINMENT_DATE"].dt.strftime('%m').astype('int')
oregon_fires_df

Unnamed: 0,FOD_ID,FIRE_NAME,FIRE_SIZE,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,FIRE_YEAR,DISCOVERY_DOY,CONT_DOY,STAT_CAUSE_CODE,STAT_CAUSE_DESCR,FIPS_CODE,FIPS_NAME,DISCOVER_DATE,CONTAINMENT_DATE,DURATION,discovery_month,containment_month
0,32,0028,0.10,A,43.995556,-121.414167,2005,36,37.0,4.0,Campfire,017,Deschutes,2005-02-05,2005-02-06,1,2,2
1,36,0044,0.10,A,44.043333,-121.386111,2005,64,64.0,4.0,Campfire,017,Deschutes,2005-03-05,2005-03-05,0,3,3
2,44,0088,0.10,A,43.955556,-121.352222,2005,95,95.0,4.0,Campfire,017,Deschutes,2005-04-05,2005-04-05,0,4,4
3,47,EARLY,5.00,B,44.911111,-119.696111,2005,70,70.0,9.0,Miscellaneous,069,Wheeler,2005-03-11,2005-03-11,0,3,3
4,49,0032,0.10,A,43.725278,-121.574167,2005,47,47.0,4.0,Campfire,017,Deschutes,2005-02-16,2005-02-16,0,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61081,300273801,OLD PORTLAND/KASTER#3,0.01,A,45.850470,-122.813530,2015,270,270.0,9.0,Miscellaneous,009,Columbia,2015-09-27,2015-09-27,0,9,9
61082,300273869,WINHURST LANE,0.10,A,43.090000,-124.390280,2015,271,272.0,9.0,Miscellaneous,011,Coos,2015-09-28,2015-09-29,1,9,9
61083,300273935,MUNSEL CREEK,0.01,A,43.971670,-124.100280,2015,275,275.0,9.0,Miscellaneous,039,Lane,2015-10-02,2015-10-02,0,10,10
61084,300274008,MP 240 HWY 101,0.01,A,43.343330,-124.197500,2015,277,278.0,9.0,Miscellaneous,011,Coos,2015-10-04,2015-10-05,1,10,10


In [18]:
# Check
oregon_fires_df.head()

Unnamed: 0,FOD_ID,FIRE_NAME,FIRE_SIZE,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,FIRE_YEAR,DISCOVERY_DOY,CONT_DOY,STAT_CAUSE_CODE,STAT_CAUSE_DESCR,FIPS_CODE,FIPS_NAME,DISCOVER_DATE,CONTAINMENT_DATE,DURATION,discovery_month,containment_month
0,32,0028,0.1,A,43.995556,-121.414167,2005,36,37.0,4.0,Campfire,17,Deschutes,2005-02-05,2005-02-06,1,2,2
1,36,0044,0.1,A,44.043333,-121.386111,2005,64,64.0,4.0,Campfire,17,Deschutes,2005-03-05,2005-03-05,0,3,3
2,44,0088,0.1,A,43.955556,-121.352222,2005,95,95.0,4.0,Campfire,17,Deschutes,2005-04-05,2005-04-05,0,4,4
3,47,EARLY,5.0,B,44.911111,-119.696111,2005,70,70.0,9.0,Miscellaneous,69,Wheeler,2005-03-11,2005-03-11,0,3,3
4,49,0032,0.1,A,43.725278,-121.574167,2005,47,47.0,4.0,Campfire,17,Deschutes,2005-02-16,2005-02-16,0,2,2


In [19]:
oregon_fires_df.dtypes

FOD_ID                        int64
FIRE_NAME                    object
FIRE_SIZE                   float64
FIRE_SIZE_CLASS              object
LATITUDE                    float64
LONGITUDE                   float64
FIRE_YEAR                     int64
DISCOVERY_DOY                 int64
CONT_DOY                    float64
STAT_CAUSE_CODE             float64
STAT_CAUSE_DESCR             object
FIPS_CODE                    object
FIPS_NAME                    object
DISCOVER_DATE        datetime64[ns]
CONTAINMENT_DATE     datetime64[ns]
DURATION                      int64
discovery_month               int64
containment_month             int64
dtype: object

In [20]:
#rename FIPS columns to County
oregon_fires_df.rename(columns={"FIPS_CODE": "COUNTY_CODE", "FIPS_NAME":"COUNTY_NAME"}, inplace=True)
oregon_fires_df.head()

Unnamed: 0,FOD_ID,FIRE_NAME,FIRE_SIZE,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,FIRE_YEAR,DISCOVERY_DOY,CONT_DOY,STAT_CAUSE_CODE,STAT_CAUSE_DESCR,COUNTY_CODE,COUNTY_NAME,DISCOVER_DATE,CONTAINMENT_DATE,DURATION,discovery_month,containment_month
0,32,0028,0.1,A,43.995556,-121.414167,2005,36,37.0,4.0,Campfire,17,Deschutes,2005-02-05,2005-02-06,1,2,2
1,36,0044,0.1,A,44.043333,-121.386111,2005,64,64.0,4.0,Campfire,17,Deschutes,2005-03-05,2005-03-05,0,3,3
2,44,0088,0.1,A,43.955556,-121.352222,2005,95,95.0,4.0,Campfire,17,Deschutes,2005-04-05,2005-04-05,0,4,4
3,47,EARLY,5.0,B,44.911111,-119.696111,2005,70,70.0,9.0,Miscellaneous,69,Wheeler,2005-03-11,2005-03-11,0,3,3
4,49,0032,0.1,A,43.725278,-121.574167,2005,47,47.0,4.0,Campfire,17,Deschutes,2005-02-16,2005-02-16,0,2,2


In [21]:
#Convert column names to lower case and rename discover_date to discovery_date
oregon_fires_df.rename(columns={"DISCOVER_DATE": "DISCOVERY_DATE"}, inplace=True)
oregon_fires_df.rename(str.lower, axis='columns', inplace=True)
oregon_fires_df

Unnamed: 0,fod_id,fire_name,fire_size,fire_size_class,latitude,longitude,fire_year,discovery_doy,cont_doy,stat_cause_code,stat_cause_descr,county_code,county_name,discovery_date,containment_date,duration,discovery_month,containment_month
0,32,0028,0.10,A,43.995556,-121.414167,2005,36,37.0,4.0,Campfire,017,Deschutes,2005-02-05,2005-02-06,1,2,2
1,36,0044,0.10,A,44.043333,-121.386111,2005,64,64.0,4.0,Campfire,017,Deschutes,2005-03-05,2005-03-05,0,3,3
2,44,0088,0.10,A,43.955556,-121.352222,2005,95,95.0,4.0,Campfire,017,Deschutes,2005-04-05,2005-04-05,0,4,4
3,47,EARLY,5.00,B,44.911111,-119.696111,2005,70,70.0,9.0,Miscellaneous,069,Wheeler,2005-03-11,2005-03-11,0,3,3
4,49,0032,0.10,A,43.725278,-121.574167,2005,47,47.0,4.0,Campfire,017,Deschutes,2005-02-16,2005-02-16,0,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61081,300273801,OLD PORTLAND/KASTER#3,0.01,A,45.850470,-122.813530,2015,270,270.0,9.0,Miscellaneous,009,Columbia,2015-09-27,2015-09-27,0,9,9
61082,300273869,WINHURST LANE,0.10,A,43.090000,-124.390280,2015,271,272.0,9.0,Miscellaneous,011,Coos,2015-09-28,2015-09-29,1,9,9
61083,300273935,MUNSEL CREEK,0.01,A,43.971670,-124.100280,2015,275,275.0,9.0,Miscellaneous,039,Lane,2015-10-02,2015-10-02,0,10,10
61084,300274008,MP 240 HWY 101,0.01,A,43.343330,-124.197500,2015,277,278.0,9.0,Miscellaneous,011,Coos,2015-10-04,2015-10-05,1,10,10


In [22]:
#write dataframe to a csv file in the Resource folder
os.makedirs('Resources', exist_ok=True)
oregon_fires_df.to_csv('Resources/oregon_fires.csv',index=False)

In [23]:
#Create a dataFrame of fire causes
Fire_Causes = oregon_fires_df[["stat_cause_code", "stat_cause_descr"]].drop_duplicates()
Fire_Causes.sort_values(by=["stat_cause_code"], inplace=True)

Fire_Causes.set_index("stat_cause_code", inplace=True)
Fire_Causes

Unnamed: 0_level_0,stat_cause_descr
stat_cause_code,Unnamed: 1_level_1
1.0,Lightning
2.0,Equipment Use
3.0,Smoking
4.0,Campfire
5.0,Debris Burning
6.0,Railroad
7.0,Arson
8.0,Children
9.0,Miscellaneous
10.0,Fireworks


In [24]:
# Create a dataFrame of Counties
Counties = oregon_fires_df[["county_code", "county_name"]].drop_duplicates()
Counties.sort_values(by=["county_code"], inplace=True)

Counties.set_index("county_code", inplace=True)
Counties

Unnamed: 0_level_0,county_name
county_code,Unnamed: 1_level_1
1.0,Baker
3.0,Benton
5.0,Clackamas
7.0,Clatsop
9.0,Columbia
11.0,Coos
13.0,Crook
15.0,Curry
17.0,Deschutes
19.0,Douglas


In [25]:
# Create dataFrame of Fire_Names
Fire_Names = oregon_fires_df[["fod_id", "fire_name"]]
Fire_Names.set_index("fod_id", inplace=True)

In [26]:
Fire_Names

Unnamed: 0_level_0,fire_name
fod_id,Unnamed: 1_level_1
32,0028
36,0044
44,0088
47,EARLY
49,0032
...,...
300273801,OLD PORTLAND/KASTER#3
300273869,WINHURST LANE
300273935,MUNSEL CREEK
300274008,MP 240 HWY 101


In [27]:
# Create a dataFrame of all the rest of the fire information in order to populate the Fires table
Fires = oregon_fires_df[["fod_id", "fire_size", "fire_size_class", "latitude", "longitude", "fire_year", "discovery_date", "containment_date", "discovery_month", "containment_month", "duration", "county_code", "stat_cause_code"]]
Fires.set_index("fod_id", inplace=True)
Fires.head()

Unnamed: 0_level_0,fire_size,fire_size_class,latitude,longitude,fire_year,discovery_date,containment_date,discovery_month,containment_month,duration,county_code,stat_cause_code
fod_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
32,0.1,A,43.995556,-121.414167,2005,2005-02-05,2005-02-06,2,2,1,17,4.0
36,0.1,A,44.043333,-121.386111,2005,2005-03-05,2005-03-05,3,3,0,17,4.0
44,0.1,A,43.955556,-121.352222,2005,2005-04-05,2005-04-05,4,4,0,17,4.0
47,5.0,B,44.911111,-119.696111,2005,2005-03-11,2005-03-11,3,3,0,69,9.0
49,0.1,A,43.725278,-121.574167,2005,2005-02-16,2005-02-16,2,2,0,17,4.0


In [28]:
# Connect to the RDS postgres db and add the dataframes to postgres
db_string = f"postgresql://postgres:{db_password}@fires.crlyg1rjxxj2.us-west-2.rds.amazonaws.com:5432/postgres"
engine = create_engine(db_string)

In [29]:
Fire_Causes.to_sql(name="fire_causes", if_exists="replace", con=engine)

In [30]:
Counties.to_sql(name="counties", if_exists="replace", con=engine)

In [31]:
Fire_Names.to_sql(name="fire_names", if_exists="replace", con=engine)

In [32]:
Fires.to_sql(name="fires", if_exists="replace", con=engine)