In [3]:
import sqlite3
import pandas as pd

### Connect to National Fires Database
Data Source: https://www.fs.usda.gov/rds/archive/catalog/RDS-2013-0009.4  
USDA Data Description: https://www.fs.fed.us/rm/pubs_other/rmrs_2014_short_k001.pdf

In [4]:
firedata1 = 'RDS-2013-0009.4_SQLITE/Data/FPA_FOD_20170508.sqlite'

In [5]:
conn = sqlite3.connect(firedata1)

### Print All Table Names

In [7]:
cursor = conn.cursor()

In [8]:
sql = '''SELECT 
    name
FROM 
    sqlite_master 
WHERE 
    type ='table'
    '''

In [9]:
cursor.execute(sql)

<sqlite3.Cursor at 0x14d82151490>

In [10]:
cursor.fetchall()

[('spatial_ref_sys',),
 ('spatialite_history',),
 ('sqlite_sequence',),
 ('geometry_columns',),
 ('spatial_ref_sys_aux',),
 ('views_geometry_columns',),
 ('virts_geometry_columns',),
 ('geometry_columns_statistics',),
 ('views_geometry_columns_statistics',),
 ('virts_geometry_columns_statistics',),
 ('geometry_columns_field_infos',),
 ('views_geometry_columns_field_infos',),
 ('virts_geometry_columns_field_infos',),
 ('geometry_columns_time',),
 ('geometry_columns_auth',),
 ('views_geometry_columns_auth',),
 ('virts_geometry_columns_auth',),
 ('sql_statements_log',),
 ('SpatialIndex',),
 ('ElementaryGeometries',),
 ('KNN',),
 ('Fires',),
 ('idx_Fires_Shape',),
 ('idx_Fires_Shape_node',),
 ('idx_Fires_Shape_rowid',),
 ('idx_Fires_Shape_parent',),
 ('NWCG_UnitIDActive_20170109',)]

### Read as Data Frame

In [11]:
sql = "select * from fires"
df = pd.read_sql(sql, conn)
df.head()

Unnamed: 0,OBJECTID,FOD_ID,FPA_ID,SOURCE_SYSTEM_TYPE,SOURCE_SYSTEM,NWCG_REPORTING_AGENCY,NWCG_REPORTING_UNIT_ID,NWCG_REPORTING_UNIT_NAME,SOURCE_REPORTING_UNIT,SOURCE_REPORTING_UNIT_NAME,...,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,OWNER_CODE,OWNER_DESCR,STATE,COUNTY,FIPS_CODE,FIPS_NAME,Shape
0,1,1,FS-1418826,FED,FS-FIRESTAT,FS,USCAPNF,Plumas National Forest,511,Plumas National Forest,...,A,40.036944,-121.005833,5.0,USFS,CA,63,63,Plumas,b'\x00\x01\xad\x10\x00\x00\xe8d\xc2\x92_@^\xc0...
1,2,2,FS-1418827,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,503,Eldorado National Forest,...,A,38.933056,-120.404444,5.0,USFS,CA,61,61,Placer,b'\x00\x01\xad\x10\x00\x00T\xb6\xeej\xe2\x19^\...
2,3,3,FS-1418835,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,503,Eldorado National Forest,...,A,38.984167,-120.735556,13.0,STATE OR PRIVATE,CA,17,17,El Dorado,b'\x00\x01\xad\x10\x00\x00\xd0\xa5\xa0W\x13/^\...
3,4,4,FS-1418845,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,503,Eldorado National Forest,...,A,38.559167,-119.913333,5.0,USFS,CA,3,3,Alpine,b'\x00\x01\xad\x10\x00\x00\x94\xac\xa3\rt\xfa]...
4,5,5,FS-1418847,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,503,Eldorado National Forest,...,A,38.559167,-119.933056,5.0,USFS,CA,3,3,Alpine,b'\x00\x01\xad\x10\x00\x00@\xe3\xaa.\xb7\xfb]\...


In [12]:
# df.to_csv('fires_db.csv')

In [14]:
# Subsetting to the state of California within fires_db

ca = df.loc[df['STATE'] == 'CA']

In [18]:
# A tuple that represents the fires_db dimensions:

ca.shape

(189550, 39)

In [19]:
# To see all of the column labels of the fires_db:

ca.columns

Index(['OBJECTID', 'FOD_ID', 'FPA_ID', 'SOURCE_SYSTEM_TYPE', 'SOURCE_SYSTEM',
       'NWCG_REPORTING_AGENCY', 'NWCG_REPORTING_UNIT_ID',
       'NWCG_REPORTING_UNIT_NAME', 'SOURCE_REPORTING_UNIT',
       'SOURCE_REPORTING_UNIT_NAME', 'LOCAL_FIRE_REPORT_ID',
       'LOCAL_INCIDENT_ID', 'FIRE_CODE', 'FIRE_NAME',
       'ICS_209_INCIDENT_NUMBER', 'ICS_209_NAME', 'MTBS_ID', 'MTBS_FIRE_NAME',
       'COMPLEX_NAME', 'FIRE_YEAR', 'DISCOVERY_DATE', 'DISCOVERY_DOY',
       'DISCOVERY_TIME', 'STAT_CAUSE_CODE', 'STAT_CAUSE_DESCR', 'CONT_DATE',
       'CONT_DOY', 'CONT_TIME', 'FIRE_SIZE', 'FIRE_SIZE_CLASS', 'LATITUDE',
       'LONGITUDE', 'OWNER_CODE', 'OWNER_DESCR', 'STATE', 'COUNTY',
       'FIPS_CODE', 'FIPS_NAME', 'Shape'],
      dtype='object')

In [20]:
# To see the column labels that were not shown in previous table:

ca[['FIRE_NAME', 'STAT_CAUSE_DESCR', 'FIRE_YEAR', 'DISCOVERY_DATE', 'FIRE_SIZE', 'SOURCE_REPORTING_UNIT_NAME', 'LOCAL_FIRE_REPORT_ID', 'LOCAL_INCIDENT_ID', 'FIRE_CODE', 'ICS_209_INCIDENT_NUMBER', 'ICS_209_NAME', 'MTBS_ID', 'MTBS_FIRE_NAME', 'COMPLEX_NAME']].head()

Unnamed: 0,FIRE_NAME,STAT_CAUSE_DESCR,FIRE_YEAR,DISCOVERY_DATE,FIRE_SIZE,SOURCE_REPORTING_UNIT_NAME,LOCAL_FIRE_REPORT_ID,LOCAL_INCIDENT_ID,FIRE_CODE,ICS_209_INCIDENT_NUMBER,ICS_209_NAME,MTBS_ID,MTBS_FIRE_NAME,COMPLEX_NAME
0,FOUNTAIN,Miscellaneous,2005,2453403.5,0.1,Plumas National Forest,1,PNF-47,BJ8K,,,,,
1,PIGEON,Lightning,2004,2453137.5,0.25,Eldorado National Forest,13,13,AAC0,,,,,
2,SLACK,Debris Burning,2004,2453156.5,0.1,Eldorado National Forest,27,021,A32W,,,,,
3,DEER,Lightning,2004,2453184.5,0.1,Eldorado National Forest,43,6,,,,,,
4,STEVENOT,Lightning,2004,2453184.5,0.1,Eldorado National Forest,44,7,,,,,,


In [21]:
# To see the data frame summary of columns and their data types:

ca.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 189550 entries, 0 to 1880464
Data columns (total 39 columns):
OBJECTID                      189550 non-null int64
FOD_ID                        189550 non-null int64
FPA_ID                        189550 non-null object
SOURCE_SYSTEM_TYPE            189550 non-null object
SOURCE_SYSTEM                 189550 non-null object
NWCG_REPORTING_AGENCY         189550 non-null object
NWCG_REPORTING_UNIT_ID        189550 non-null object
NWCG_REPORTING_UNIT_NAME      189550 non-null object
SOURCE_REPORTING_UNIT         189550 non-null object
SOURCE_REPORTING_UNIT_NAME    189550 non-null object
LOCAL_FIRE_REPORT_ID          61933 non-null object
LOCAL_INCIDENT_ID             127983 non-null object
FIRE_CODE                     55533 non-null object
FIRE_NAME                     174557 non-null object
ICS_209_INCIDENT_NUMBER       2838 non-null object
ICS_209_NAME                  2838 non-null object
MTBS_ID                       1137 non-null obje

In [22]:
# Confirming data in the forest fires_db from 1992 to 2015:

ca['FIRE_YEAR'].unique()

array([2005, 2004, 2006, 2002, 2007, 2008, 2009, 1992, 1993, 1994, 1995,
       1996, 1997, 1998, 1999, 2000, 2001, 2003, 2010, 2011, 2012, 2013,
       2014, 2015], dtype=int64)

In [23]:
# Checking the number of fire occurances in the fires_db (unique days of fire):

len(ca['DISCOVERY_DATE'].unique())

8324

In [24]:
# Looking for the total number of occurences per stated cause of fire:

print(ca.groupby('STAT_CAUSE_DESCR').size())

STAT_CAUSE_DESCR
Arson                19635
Campfire              9516
Children              6930
Debris Burning       14318
Equipment Use        39407
Fireworks              219
Lightning            27000
Miscellaneous        51943
Missing/Undefined    12605
Powerline             1198
Railroad               720
Smoking               5936
Structure              123
dtype: int64


In [25]:
# Looking for the total number of occurences per year per stated cause of fire:

ca.groupby(['STAT_CAUSE_DESCR','FIRE_YEAR']).size()

STAT_CAUSE_DESCR  FIRE_YEAR
Arson             1992         1155
                  1993         1077
                  1994          768
                  1995          997
                  1996          948
                               ... 
Structure         2011            6
                  2012           10
                  2013            8
                  2014           20
                  2015            3
Length: 309, dtype: int64

In [27]:
# The number of fire occurences per year per stated cause of fire for the state of CA:

ca.groupby(['STAT_CAUSE_DESCR','FIRE_YEAR']).size()

STAT_CAUSE_DESCR  FIRE_YEAR
Arson             1992         1155
                  1993         1077
                  1994          768
                  1995          997
                  1996          948
                               ... 
Structure         2011            6
                  2012           10
                  2013            8
                  2014           20
                  2015            3
Length: 309, dtype: int64

In [28]:
ca.groupby('STAT_CAUSE_DESCR').max()

Unnamed: 0_level_0,OBJECTID,FOD_ID,FPA_ID,SOURCE_SYSTEM_TYPE,SOURCE_SYSTEM,NWCG_REPORTING_AGENCY,NWCG_REPORTING_UNIT_ID,NWCG_REPORTING_UNIT_NAME,SOURCE_REPORTING_UNIT,SOURCE_REPORTING_UNIT_NAME,...,CONT_DATE,CONT_DOY,FIRE_SIZE,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,OWNER_CODE,OWNER_DESCR,STATE,Shape
STAT_CAUSE_DESCR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Arson,1880458,300348354,W-99825,NONFED,ST-NASF,ST/C&L,USNVHTF,Yosemite National Park,USCASJR,Yuma Field Office,...,2457387.5,366.0,160371.0,G,42.0325,-114.441,15.0,USFS,CA,b'\x00\x01\xad\x10\x00\x00\xfc\xff\xff\xff\xff...
Campfire,1804572,300218047,W-9999,NONFED,ST-NASF,ST/C&L,USORRSF,Yosemite National Park,USCASRR,Yuma Field Office,...,2457384.5,365.0,280059.0,G,42.051667,-114.1258,15.0,USFS,CA,b'\x00\x01\xad\x10\x00\x00\xfc\xff\xff\xff\xff...
Children,1880454,300348297,W-9912,NONFED,ST-NASF,ST/C&L,USNVHTF,Yosemite National Park,NVCCD,Yuma Field Office,...,2457362.5,366.0,38356.0,G,41.996111,-114.1958,15.0,USFS,CA,b'\x00\x01\xad\x10\x00\x00\xfc\xff\xff\xff\xff...
Debris Burning,1880453,300348293,W-99978,NONFED,ST-NASF,ST/C&L,USNVHTF,Yosemite National Park,USCATKR,Yuma Field Office,...,2457379.5,366.0,162702.0,G,42.001944,-114.3019,15.0,USFS,CA,b'\x00\x01\xad\x10\x00\x00\xfc\xff\xff\xff\xff...
Equipment Use,1880452,300348291,W-99821,NONFED,ST-NASF,ST/C&L,USORRSF,Yosemite National Park,USCASJR,Yosemite National Park,...,2457373.5,365.0,240207.0,G,42.03809,-114.481111,15.0,USFS,CA,b'\x00\x01\xad\x10\x00\x00\xfc\xff\xff\xff\xff...
Fireworks,1797737,300209770,W-9917,FED,FS-FIRESTAT,NPS,USNVHTF,Whiskeytown National Recreation Area,CAWNP,Yuma Field Office,...,2457305.5,350.0,2400.0,F,41.8606,-114.2008,14.0,USFS,CA,b'\x00\x01\xad\x10\x00\x00\xfce\xf7\xe4a\xa5\\...
Lightning,1880459,300348361,W-9959,NONFED,ST-NASF,ST/C&L,USORRSF,Yosemite National Park,USCACLR,Yuma Field Office,...,2457356.5,365.0,315578.8,G,42.003056,-114.1402,15.0,USFS,CA,b'\x00\x01\xad\x10\x00\x00\xfc\xff\xff\xff\xff...
Miscellaneous,1880465,300348399,W-9966,NONFED,ST-NASF,ST/C&L,USORRSF,Yosemite National Park,USCATNR,Yuma Field Office,...,2457386.5,366.0,255858.0,G,42.0119,-114.1555,15.0,USFS,CA,b'\x00\x01\xad\x10\x00\x00\xfc\xff\xff\xff\xff...
Missing/Undefined,1880464,300348377,W-91741,NONFED,ST-NASF,ST/C&L,USNVCCD,Yosemite National Park,USCATNR,Yosemite National Park,...,2457385.5,365.0,108204.0,G,42.0078,-114.428056,15.0,USFS,CA,b'\x00\x01\xad\x10\x00\x00\xfc\xff\xff\xff\xff...
Powerline,1880225,300346883,W-9848,NONFED,ST-NASF,ST/C&L,USNVHTF,Yosemite National Park,CAYNP,Yosemite National Park,...,2457299.5,365.0,47000.0,G,41.983992,-114.2194,15.0,USFS,CA,b'\x00\x01\xad\x10\x00\x00\xfc\xff\xff\xff\xff...


In [29]:
groups = ca.groupby('STAT_CAUSE_DESCR')
for key, group in groups:
    print(key)
    print(group.head())

Arson
      OBJECTID  FOD_ID      FPA_ID SOURCE_SYSTEM_TYPE SOURCE_SYSTEM  \
96          97      98  FS-1419090                FED   FS-FIRESTAT   
407        408     415  FS-1419763                FED   FS-FIRESTAT   
558        559     566  FS-1420114                FED   FS-FIRESTAT   
1344      1345    1359  FS-1421854                FED   FS-FIRESTAT   
1458      1459    1473  FS-1422081                FED   FS-FIRESTAT   

     NWCG_REPORTING_AGENCY NWCG_REPORTING_UNIT_ID  \
96                      FS                USNVHTF   
407                     FS                USCAANF   
558                     FS                USCAPNF   
1344                    FS                USCACNF   
1458                    FS                USCACNF   

              NWCG_REPORTING_UNIT_NAME SOURCE_REPORTING_UNIT  \
96    Humboldt-Toiyabe National Forest                  0417   
407            Angeles National Forest                  0501   
558             Plumas National Forest                  

[5 rows x 39 columns]
Smoking
      OBJECTID  FOD_ID      FPA_ID SOURCE_SYSTEM_TYPE SOURCE_SYSTEM  \
181        182     185  FS-1419306                FED   FS-FIRESTAT   
182        183     186  FS-1419307                FED   FS-FIRESTAT   
231        232     236  FS-1419393                FED   FS-FIRESTAT   
396        397     404  FS-1419729                FED   FS-FIRESTAT   
1894      1895    1910  FS-1422971                FED   FS-FIRESTAT   

     NWCG_REPORTING_AGENCY NWCG_REPORTING_UNIT_ID    NWCG_REPORTING_UNIT_NAME  \
181                     FS                USCALPF  Los Padres National Forest   
182                     FS                USCALPF  Los Padres National Forest   
231                     FS                USCASQF     Sequoia National Forest   
396                     FS                USCAANF     Angeles National Forest   
1894                    FS                USCAENF    Eldorado National Forest   

     SOURCE_REPORTING_UNIT  SOURCE_REPORTING_UNIT_NAME  

In [30]:
groups = ca.groupby('STAT_CAUSE_DESCR')
for key, group in groups:
    print(key)
    print(group.max())

Arson
OBJECTID                                                                1880458
FOD_ID                                                                300348354
FPA_ID                                                                  W-99825
SOURCE_SYSTEM_TYPE                                                       NONFED
SOURCE_SYSTEM                                                           ST-NASF
NWCG_REPORTING_AGENCY                                                    ST/C&L
NWCG_REPORTING_UNIT_ID                                                  USNVHTF
NWCG_REPORTING_UNIT_NAME                                 Yosemite National Park
SOURCE_REPORTING_UNIT                                                   USCASJR
SOURCE_REPORTING_UNIT_NAME                                    Yuma Field Office
FIRE_YEAR                                                                  2015
DISCOVERY_DATE                                                      2.45739e+06
DISCOVERY_DOY                     

OBJECTID                                                                1880459
FOD_ID                                                                300348361
FPA_ID                                                                   W-9959
SOURCE_SYSTEM_TYPE                                                       NONFED
SOURCE_SYSTEM                                                           ST-NASF
NWCG_REPORTING_AGENCY                                                    ST/C&L
NWCG_REPORTING_UNIT_ID                                                  USORRSF
NWCG_REPORTING_UNIT_NAME                                 Yosemite National Park
SOURCE_REPORTING_UNIT                                                   USCACLR
SOURCE_REPORTING_UNIT_NAME                                    Yuma Field Office
FIRE_YEAR                                                                  2015
DISCOVERY_DATE                                                      2.45735e+06
DISCOVERY_DOY                           

OBJECTID                                                                1880402
FOD_ID                                                                300348051
FPA_ID                                                                   W-9922
SOURCE_SYSTEM_TYPE                                                       NONFED
SOURCE_SYSTEM                                                           ST-NASF
NWCG_REPORTING_AGENCY                                                    ST/C&L
NWCG_REPORTING_UNIT_ID                                                  USNVHTF
NWCG_REPORTING_UNIT_NAME                                 Yosemite National Park
SOURCE_REPORTING_UNIT                                                     NVCCD
SOURCE_REPORTING_UNIT_NAME                                    Yuma Field Office
COMPLEX_NAME                                                                NaN
FIRE_YEAR                                                                  2015
DISCOVERY_DATE                          