In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
#read in CSV files of data
pop_data = pd.read_csv('Data/Oregon_Population_Estimates_By_v002.csv')
fire_data = pd.read_csv('Data/fire-occurence.csv')

#look at index of data columns
fire_data.columns

Index(['Serial', 'FireCategory', 'FireYear', 'Area', 'DistrictName',
       'UnitName', 'FullFireNumber', 'FireName', 'Size_class', 'EstTotalAcres',
       'Protected_Acres', 'HumanOrLightning', 'CauseBy', 'GeneralCause',
       'SpecificCause', 'Cause_Comments', 'Lat_DD', 'Long_DD', 'LatLongDD',
       'FO_LandOwnType', 'Twn', 'Rng', 'Sec', 'Subdiv', 'LandmarkLocation',
       'County', 'RegUseZone', 'RegUseRestriction', 'Industrial_Restriction',
       'Ign_DateTime', 'ReportDateTime', 'Discover_DateTime',
       'Control_DateTime', 'CreationDate', 'ModifiedDate', 'DistrictCode',
       'UnitCode', 'DistFireNumber'],
      dtype='object')

In [3]:
#begin cleaning data of unneeded columns - Rename FireYear column to match poopulation index names
fire_data.drop(fire_data.columns[[0, 1, 3, 5, 6, 7, 8, 10, 11, 12, 
                                  14, 15, 16, 17, 18, 19, 20, 21, 
                                  22, 23, 24, 26, 27, 28, 29, 30, 
                                  31, 32, 33, 34, 35, 36, 37]], axis=1, inplace=True)

fire_data['County'] = fire_data['County'].map(str.upper)
fire_data.rename(columns={'FireYear': 'Year'}, inplace=True)

#attempt to remove any blank spaces from "County" column - issues with merging
fire_data['County'] = fire_data['County'].apply(lambda x: x.replace(' ', ''))

fire_data

Unnamed: 0,Year,DistrictName,EstTotalAcres,GeneralCause,County
0,2000,Central Oregon,0.75,Lightning,GRANT
1,2000,Northeast Oregon,80.00,Lightning,UNION
2,2001,Southwest Oregon,0.10,Smoking,JACKSON
3,2002,West Oregon,0.01,Recreation,BENTON
4,2003,West Oregon,0.01,Lightning,POLK
...,...,...,...,...,...
23485,2021,Walker Range - WRFPA,0.75,Equipment Use,KLAMATH
23486,2022,Western Lane,0.01,Recreation,LANE
23487,2021,Northeast Oregon,67.43,Lightning,UMATILLA
23488,2022,Northeast Oregon,0.10,Equipment Use,UMATILLA


In [4]:
#population data is only from 2010-2020, so I am going to filter the fire data to only reflect dates in that range

Fire_Data_Subset = fire_data[(fire_data['Year'] >= 2010) & (fire_data['Year'] <= 2020)]

Fire_Data_Subset

Unnamed: 0,Year,DistrictName,EstTotalAcres,GeneralCause,County
19,2010,Forest Grove,0.01,Recreation,COLUMBIA
20,2010,West Oregon,0.14,Recreation,BENTON
21,2010,Southwest Oregon,0.10,Lightning,JACKSON
22,2010,Central Oregon,0.05,Lightning,DESCHUTES
23,2010,Walker Range - WRFPA,0.25,Lightning,KLAMATH
...,...,...,...,...,...
22366,2011,Klamath-Lake,0.01,Equipment Use,KLAMATH
22367,2018,Southwest Oregon,0.01,Debris Burning,JOSEPHINE
22368,2018,Southwest Oregon,0.50,Equipment Use,JOSEPHINE
22369,2013,Northeast Oregon,0.01,Miscellaneous,WALLOWA


In [5]:
#remove unneeded columns from population data as well, also removing potential blank spaces from "County" column
pop_data.drop(pop_data.columns[[3,4]], axis=1, inplace=True)

pop_data['County'] = pop_data['County'].apply(lambda x: x.replace(' ', ''))

pop_data

Unnamed: 0,Year,County,Population_Estimate
0,2010,BAKER,16116
1,2011,BAKER,16069
2,2012,BAKER,16001
3,2013,BAKER,16027
4,2014,BAKER,16030
...,...,...,...
391,2016,YAMHILL,104143
392,2017,YAMHILL,105313
393,2018,YAMHILL,106390
394,2019,YAMHILL,106927


In [6]:
#attempt to merge data to see what index looks like

Combined_Data = pd.merge(Fire_Data_Subset, pop_data, left_on=['Year','County'], right_on=['Year','County'],)

Combined_Data.to_csv('data/Fire_Data_With_Pop_Clean.csv')

Combined_Data.sort_values('County')

Unnamed: 0,Year,DistrictName,EstTotalAcres,GeneralCause,County,Population_Estimate
7589,2014,Northeast Oregon,0.01,Lightning,BAKER,16030
7575,2014,Northeast Oregon,0.10,Lightning,BAKER,16030
7576,2014,Northeast Oregon,13.20,Lightning,BAKER,16030
7577,2014,Northeast Oregon,3.30,Lightning,BAKER,16030
7578,2014,Northeast Oregon,1516.00,Lightning,BAKER,16030
...,...,...,...,...,...,...
10034,2014,Forest Grove,5.03,Equipment Use,YAMHILL,100850
10033,2014,West Oregon,0.20,Juveniles,YAMHILL,100850
10032,2014,West Oregon,0.10,Equipment Use,YAMHILL,100850
10039,2014,Forest Grove,3.00,Equipment Use,YAMHILL,100850
