# Preparing the Dataset

In [1]:
import numpy as np
import pandas as pd
import os
import pymongo
from dtime import is_leap_year, ymd
import datetime

In [2]:
npy_file = os.path.join("data","Fire_Data.npy")

In [3]:
pickle = np.load(npy_file, allow_pickle=True)

In [4]:
fires_df = pd.DataFrame(pickle, columns=['FPA_ID','FIRE_NAME','FIRE_YEAR','DISCOVERY_DOY','DISCOVERY_TIME','CONT_DOY','CONT_TIME','FIRE_SIZE','FIRE_SIZE_CLASS','LATITUDE','LONGITUDE','STATE','COUNTY','FIPS_CODE','FIPS_NAME','STAT_CAUSE_DESCR','OWNER_CODE','OWNER_DESCR','DAYS_TO_CONT'])

In [5]:
fires_df.head()

Unnamed: 0,FPA_ID,FIRE_NAME,FIRE_YEAR,DISCOVERY_DOY,DISCOVERY_TIME,CONT_DOY,CONT_TIME,FIRE_SIZE,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,STATE,COUNTY,FIPS_CODE,FIPS_NAME,STAT_CAUSE_DESCR,OWNER_CODE,OWNER_DESCR,DAYS_TO_CONT
0,FS-1418826,FOUNTAIN,2005,33,1300,33,1730,0.1,A,40.0369,-121.006,CA,63,63,Plumas,Miscellaneous,5,USFS,0
1,FS-1418827,PIGEON,2004,133,845,133,1530,0.25,A,38.9331,-120.404,CA,61,61,Placer,Lightning,5,USFS,0
2,FS-1418835,SLACK,2004,152,1921,152,2024,0.1,A,38.9842,-120.736,CA,17,17,El Dorado,Debris Burning,13,STATE OR PRIVATE,0
3,FS-1418845,DEER,2004,180,1600,185,1400,0.1,A,38.5592,-119.913,CA,3,3,Alpine,Lightning,5,USFS,5
4,FS-1418847,STEVENOT,2004,180,1600,185,1200,0.1,A,38.5592,-119.933,CA,3,3,Alpine,Lightning,5,USFS,5


In [6]:
fires_df["DAYS_TO_CONT"] = fires_df["DAYS_TO_CONT"].transform(lambda x: x if x > -1 else x + 365)

In [7]:
fires_df['DISCOVERY_TIME'] = fires_df['DISCOVERY_TIME'].apply(lambda x: x.replace("None",'0') if x == "None" else x)

In [8]:
fires_df['DISCOVERY_TIME'] = fires_df['DISCOVERY_TIME'].astype('str')

In [9]:
fires_df['CONT_TIME'] = fires_df['CONT_TIME'].apply(lambda x: x.replace("None",'0') if x == "None" else x)

In [10]:
fires_df['CONT_TIME'] = fires_df['CONT_TIME'].apply(lambda x: x.replace("",'0') if x == "" else x)

In [11]:
fires_df['CONT_TIME'] = fires_df['CONT_TIME'].astype('str')

In [12]:
fires_df['DISCOVERY_HOUR'] = fires_df['DISCOVERY_TIME'].apply(lambda x: x[:2])

In [14]:
fires_df['DISCOVERY_MINUTE'] = fires_df['DISCOVERY_TIME'].apply(lambda x: x[2:])

In [15]:
fires_df['CONT_HOUR'] = fires_df['CONT_TIME'].apply(lambda x: x[:2])

In [16]:
fires_df['CONT_MINUTE'] = fires_df['CONT_TIME'].apply(lambda x: x[2:])

In [17]:
# fires_df['DISCOVERY_DATE_PD'] = pd.to_datetime(fires_df['FIRE_YEAR'] * 1000 + fires_df['DISCOVERY_DOY'], format='%Y%j') 


In [18]:
# fires_df['DISCOVERY_DATE_PD'] = pd.to_datetime(fires_df['FIRE_YEAR'] * 31,556,952 + fires_df['DISCOVERY_DOY'], units='s', format='%Y%j%X') 


In [19]:
fires_df.head()

Unnamed: 0,FPA_ID,FIRE_NAME,FIRE_YEAR,DISCOVERY_DOY,DISCOVERY_TIME,CONT_DOY,CONT_TIME,FIRE_SIZE,FIRE_SIZE_CLASS,LATITUDE,...,FIPS_CODE,FIPS_NAME,STAT_CAUSE_DESCR,OWNER_CODE,OWNER_DESCR,DAYS_TO_CONT,DISCOVERY_HOUR,DISCOVERY_MINUTE,CONT_HOUR,CONT_MINUTE
0,FS-1418826,FOUNTAIN,2005,33,1300,33,1730,0.1,A,40.0369,...,63,Plumas,Miscellaneous,5,USFS,0.0,13,0,17,30
1,FS-1418827,PIGEON,2004,133,845,133,1530,0.25,A,38.9331,...,61,Placer,Lightning,5,USFS,0.0,8,45,15,30
2,FS-1418835,SLACK,2004,152,1921,152,2024,0.1,A,38.9842,...,17,El Dorado,Debris Burning,13,STATE OR PRIVATE,0.0,19,21,20,24
3,FS-1418845,DEER,2004,180,1600,185,1400,0.1,A,38.5592,...,3,Alpine,Lightning,5,USFS,5.0,16,0,14,0
4,FS-1418847,STEVENOT,2004,180,1600,185,1200,0.1,A,38.5592,...,3,Alpine,Lightning,5,USFS,5.0,16,0,12,0


In [20]:
# fires_df['DISCOVERY_TIME_NS'] = pd.to_datetime(fires_df['DISCOVERY_HOUR'].replace('',0).astype('int') * 3.6e12 + fires_df['DISCOVERY_MINUTE'].replace('',0).astype('int') * 6.0e10, format='ns')

In [20]:
ctr = 0
for col in fires_df.columns:
    ctr += 1
    print(ctr, col)


1 FPA_ID
2 FIRE_NAME
3 FIRE_YEAR
4 DISCOVERY_DOY
5 DISCOVERY_TIME
6 CONT_DOY
7 CONT_TIME
8 FIRE_SIZE
9 FIRE_SIZE_CLASS
10 LATITUDE
11 LONGITUDE
12 STATE
13 COUNTY
14 FIPS_CODE
15 FIPS_NAME
16 STAT_CAUSE_DESCR
17 OWNER_CODE
18 OWNER_DESCR
19 DAYS_TO_CONT
20 DISCOVERY_HOUR
21 DISCOVERY_MINUTE
22 CONT_MINUTE
23 CONT_HOUR


In [21]:
fires_df.STAT_CAUSE_DESCR.value_counts()

Debris Burning       429028
Miscellaneous        323805
Arson                281455
Lightning            278468
Missing/Undefined    166723
Equipment Use        147612
Campfire              76139
Children              61167
Smoking               52869
Railroad              33455
Powerline             14448
Fireworks             11500
Structure              3796
Name: STAT_CAUSE_DESCR, dtype: int64

In [21]:
# build datetime for CONT and DISCOVERY Dates 
# if cont doy less than discovery doy add 1 to year
# make new column for CONT and DISCOVERY
disc_dates = []
cont_dates = []
for row in fires_df.itertuples():
    Dyr = int(row[3])
    Ddy = int(row[4])
    Dhr = int(row[20])
    Cdy = int(row[6])
    Chr = int(row[22])
    if Cdy < Ddy:
        Cyr = int(row[3]) + 1 
    else:
        Cyr = int(row[3])
    try:
        Dmn = int(row[21])
    except ValueError as De:
        Dmn = 00
    try:
        Cmn = int(row[23])
    except ValueError as Ce:
        Cmn = 00
    DYr,DMo,DDa = ymd(Dyr,Ddy)
    CYr,CMo,CDa = ymd(Cyr,Cdy)
    disc_dates.append(datetime.datetime(DYr,DMo,DDa,Dhr,Dmn))
    cont_dates.append(datetime.datetime(CYr,CMo,CDa,Chr,Cmn))

In [22]:
print(len(disc_dates),len(cont_dates))

1880465 1880465


In [23]:
fires_df['DISCOVERY_DATE'] = disc_dates

In [24]:
fires_df['CONT_DATE'] = cont_dates

In [25]:
fires_df.head()

Unnamed: 0,FPA_ID,FIRE_NAME,FIRE_YEAR,DISCOVERY_DOY,DISCOVERY_TIME,CONT_DOY,CONT_TIME,FIRE_SIZE,FIRE_SIZE_CLASS,LATITUDE,...,STAT_CAUSE_DESCR,OWNER_CODE,OWNER_DESCR,DAYS_TO_CONT,DISCOVERY_HOUR,DISCOVERY_MINUTE,CONT_HOUR,CONT_MINUTE,DISCOVERY_DATE,CONT_DATE
0,FS-1418826,FOUNTAIN,2005,33,1300,33,1730,0.1,A,40.0369,...,Miscellaneous,5,USFS,0.0,13,0,17,30,2005-02-02 13:00:00,2005-02-02 17:30:00
1,FS-1418827,PIGEON,2004,133,845,133,1530,0.25,A,38.9331,...,Lightning,5,USFS,0.0,8,45,15,30,2004-05-12 08:45:00,2004-05-12 15:30:00
2,FS-1418835,SLACK,2004,152,1921,152,2024,0.1,A,38.9842,...,Debris Burning,13,STATE OR PRIVATE,0.0,19,21,20,24,2004-05-31 19:21:00,2004-05-31 20:24:00
3,FS-1418845,DEER,2004,180,1600,185,1400,0.1,A,38.5592,...,Lightning,5,USFS,5.0,16,0,14,0,2004-06-28 16:00:00,2004-07-03 14:00:00
4,FS-1418847,STEVENOT,2004,180,1600,185,1200,0.1,A,38.5592,...,Lightning,5,USFS,5.0,16,0,12,0,2004-06-28 16:00:00,2004-07-03 12:00:00


In [26]:
Viz_df = fires_df[['FPA_ID','FIRE_NAME','FIRE_YEAR','FIRE_SIZE','FIRE_SIZE_CLASS','LATITUDE','LONGITUDE','STATE','FIPS_CODE','FIPS_NAME','STAT_CAUSE_DESCR','OWNER_CODE','OWNER_DESCR','DAYS_TO_CONT','DISCOVERY_DATE','CONT_DATE']]

In [27]:
np.save("Fire_Data_clean.npy",Viz_df)

# Load saved clean Df

In [2]:
Viz_df = pd.DataFrame(np.load(os.path.join("data","Fire_Data_clean.npy"), allow_pickle=True) , columns=['FPA_ID','FIRE_NAME','FIRE_YEAR','FIRE_SIZE','FIRE_SIZE_CLASS','LATITUDE','LONGITUDE','STATE','FIPS_CODE','FIPS_NAME','STAT_CAUSE_DESCR','OWNER_CODE','OWNER_DESCR','DAYS_TO_CONT','DISCOVERY_DATE','CONT_DATE'])

In [3]:
Viz_df.head(10)

Unnamed: 0,FPA_ID,FIRE_NAME,FIRE_YEAR,FIRE_SIZE,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,STATE,FIPS_CODE,FIPS_NAME,STAT_CAUSE_DESCR,OWNER_CODE,OWNER_DESCR,DAYS_TO_CONT,DISCOVERY_DATE,CONT_DATE
0,FS-1418826,FOUNTAIN,2005,0.1,A,40.0369,-121.006,CA,63,Plumas,Miscellaneous,5,USFS,0,2005-02-02 13:00:00,2005-02-02 17:30:00
1,FS-1418827,PIGEON,2004,0.25,A,38.9331,-120.404,CA,61,Placer,Lightning,5,USFS,0,2004-05-12 08:45:00,2004-05-12 15:30:00
2,FS-1418835,SLACK,2004,0.1,A,38.9842,-120.736,CA,17,El Dorado,Debris Burning,13,STATE OR PRIVATE,0,2004-05-31 19:21:00,2004-05-31 20:24:00
3,FS-1418845,DEER,2004,0.1,A,38.5592,-119.913,CA,3,Alpine,Lightning,5,USFS,5,2004-06-28 16:00:00,2004-07-03 14:00:00
4,FS-1418847,STEVENOT,2004,0.1,A,38.5592,-119.933,CA,3,Alpine,Lightning,5,USFS,5,2004-06-28 16:00:00,2004-07-03 12:00:00
5,FS-1418849,HIDDEN,2004,0.1,A,38.6353,-120.104,CA,5,Amador,Lightning,5,USFS,1,2004-06-30 18:00:00,2004-07-01 16:00:00
6,FS-1418851,FORK,2004,0.1,A,38.6883,-120.153,CA,17,El Dorado,Lightning,5,USFS,1,2004-07-01 18:00:00,2004-07-02 14:00:00
7,FS-1418854,SLATE,2005,0.8,B,40.9681,-122.434,CA,67,,Debris Burning,13,STATE OR PRIVATE,0,2005-03-08 13:00:00,2005-03-08 16:00:00
8,FS-1418856,SHASTA,2005,1.0,B,41.2336,-122.283,CA,67,,Debris Burning,13,STATE OR PRIVATE,0,2005-03-15 12:00:00,2005-03-15 17:00:00
9,FS-1418859,TANGLEFOOT,2004,0.1,A,38.5483,-120.149,CA,5,Amador,Lightning,5,USFS,1,2004-07-01 18:00:00,2004-07-02 18:00:00


In [9]:
cont_df = pd.DataFrame(Viz_df.groupby(['STAT_CAUSE_DESCR','FIRE_YEAR']).DAYS_TO_CONT.sum())

In [24]:
for I,row in cont_df.iterrows():
    print(I[0],I[1])
    print(row.DAYS_TO_CONT)
    print('='*50)

24607.0
Children 1994
251189.0
Children 1995
275816.0
Children 1996
231417.0
Children 1997
250782.0
Children 1998
321867.0
Children 1999
351209.0
Children 2000
289858.0
Children 2001
338519.0
Children 2002
244286.0
Children 2003
157613.0
Children 2004
183608.0
Children 2005
244053.0
Children 2006
300723.0
Children 2007
251239.0
Children 2008
218345.0
Children 2009
173054.0
Children 2010
167748.0
Children 2011
123826.0
Children 2012
62277.0
Children 2013
23307.0
Children 2014
17312.0
Children 2015
28400.0
Debris Burning 1992
766244.0
Debris Burning 1993
685584.0
Debris Burning 1994
943589.0
Debris Burning 1995
1152194.0
Debris Burning 1996
883947.0
Debris Burning 1997
1124251.0
Debris Burning 1998
1998733.0
Debris Burning 1999
2845667.0
Debris Burning 2000
2173526.0
Debris Burning 2001
1958708.0
Debris Burning 2002
1498202.0
Debris Burning 2003
1340424.0
Debris Burning 2004
1677094.0
Debris Burning 2005
2329539.0
Debris Burning 2006
3059877.0
Debris Burning 2007
1891035.0
Debris Burning

In [87]:
arson = 0
for row in Viz_df[['FIRE_YEAR','','STAT_CAUSE_DESCR']].head(200).iterrows():
    if row[1].STAT_CAUSE_DESCR == 'Arson':
        arson += row[1].FIRE_SIZE
arson        

50.71

In [4]:
df0 = pd.DataFrame(Viz_df.groupby(['STAT_CAUSE_DESCR']).FIRE_SIZE.sum())

In [5]:
df0

Unnamed: 0_level_0,FIRE_SIZE
STAT_CAUSE_DESCR,Unnamed: 1_level_1
Arson,9487274.0
Campfire,3429061.0
Children,469830.3
Debris Burning,5975793.0
Equipment Use,6799046.0
Fireworks,318207.3
Lightning,87033500.0
Miscellaneous,14394200.0
Missing/Undefined,8751725.0
Powerline,1609443.0


In [6]:
df1 = pd.DataFrame(Viz_df.STAT_CAUSE_DESCR.value_counts())

In [7]:
df1

Unnamed: 0,STAT_CAUSE_DESCR
Debris Burning,429028
Miscellaneous,323805
Arson,281455
Lightning,278468
Missing/Undefined,166723
Equipment Use,147612
Campfire,76139
Children,61167
Smoking,52869
Railroad,33455


In [8]:
df2 = df0.merge(df1,left_index=True,right_index=True)

In [9]:
df2

Unnamed: 0,FIRE_SIZE,STAT_CAUSE_DESCR
Arson,9487274.0,281455
Campfire,3429061.0,76139
Children,469830.3,61167
Debris Burning,5975793.0,429028
Equipment Use,6799046.0,147612
Fireworks,318207.3,11500
Lightning,87033500.0,278468
Miscellaneous,14394200.0,323805
Missing/Undefined,8751725.0,166723
Powerline,1609443.0,14448


In [20]:
vpie_data = []
for index,data in df2.iterrows():
    each_slice = {
        "name": index,
        "y": round(data.FIRE_SIZE,2),
        "z": int(data.STAT_CAUSE_DESCR)
    }
    vpie_data.append(each_slice)

In [21]:
vpie_data

[{'name': 'Arson', 'y': 9487274.16, 'z': 281455},
 {'name': 'Campfire', 'y': 3429061.46, 'z': 76139},
 {'name': 'Children', 'y': 469830.29, 'z': 61167},
 {'name': 'Debris Burning', 'y': 5975792.71, 'z': 429028},
 {'name': 'Equipment Use', 'y': 6799046.37, 'z': 147612},
 {'name': 'Fireworks', 'y': 318207.31, 'z': 11500},
 {'name': 'Lightning', 'y': 87033501.02, 'z': 278468},
 {'name': 'Miscellaneous', 'y': 14394204.2, 'z': 323805},
 {'name': 'Missing/Undefined', 'y': 8751725.49, 'z': 166723},
 {'name': 'Powerline', 'y': 1609442.86, 'z': 14448},
 {'name': 'Railroad', 'y': 849613.77, 'z': 33455},
 {'name': 'Smoking', 'y': 842660.52, 'z': 52869},
 {'name': 'Structure', 'y': 172189.39, 'z': 3796}]

In [31]:
cont_pivot = pd.pivot_table(Viz_df, values='DAYS_TO_CONT', columns=['FIRE_YEAR'],
                    index=['STAT_CAUSE_DESCR'], aggfunc=np.sum, fill_value=0)
cont_pivot

FIRE_YEAR,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
STAT_CAUSE_DESCR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Arson,618391,606053,478178,630855,413389,971202,1665608,2273776,2374672,1839656,...,1785206,943502,803800,553930,868114,268422,174803,133296,164384,86334
Campfire,64031,99188,233714,324954,230477,104897,128210,182645,134881,147059,...,162571,195159,170812,92912,205517,42351,65298,35710,59382,49344
Children,249234,224607,251189,275816,231417,250782,321867,351209,289858,338519,...,300723,251239,218345,173054,167748,123826,62277,23307,17312,28400
Debris Burning,766244,685584,943589,1152194,883947,1124251,1998733,2845667,2173526,1958708,...,3059877,1891035,2299278,1848126,2212653,917859,800797,959236,877622,763921
Equipment Use,638550,657162,582778,629934,641879,691549,855974,1065114,886861,834809,...,1284400,889697,1023375,691843,630758,645881,273307,226291,208988,298000
Fireworks,995,557,866,432,1996,10686,11043,7724,12574,12438,...,5961,10635,4097,27991,62286,984,6477,3467,3906,2790
Lightning,342395,398192,400560,263065,285342,350842,577775,669506,875076,474570,...,983411,766472,467709,308491,459421,291250,249099,163735,142954,418284
Miscellaneous,684238,703063,642822,669628,692001,1149380,1279111,1632390,2527167,1186769,...,2109032,1586797,2153216,1352111,2049261,1377294,946553,732791,1030376,801732
Missing/Undefined,308577,290425,689439,279952,343469,166564,188363,156176,120098,225397,...,1473040,2328143,1465276,1116764,1074052,1095515,466785,238613,53848,108823
Powerline,117,245,30,45,3165,7965,17767,13915,11768,13309,...,4423,14949,2432,117837,161586,288456,95048,24467,99931,111981


In [59]:
cont_list = []
for index,group in cont_pivot.iterrows():
    print(index)
    years = [index_ for index_,item in group.iteritems()]
    nums = [item for index_,item in group.iteritems()]
    print(years)
    print(nums)
    each_cause ={
        "cause": index,
        "data": {
            "years": years,
            "data": nums}
            }
    cont_list.append(each_cause)

Arson
[1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015]
[618391, 606053, 478178, 630855, 413389, 971202, 1665608, 2273776, 2374672, 1839656, 1154331, 775350, 951382, 1197313, 1785206, 943502, 803800, 553930, 868114, 268422, 174803, 133296, 164384, 86334]
Campfire
[1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015]
[64031, 99188, 233714, 324954, 230477, 104897, 128210, 182645, 134881, 147059, 128326, 119696, 113549, 361914, 162571, 195159, 170812, 92912, 205517, 42351, 65298, 35710, 59382, 49344]
Children
[1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015]
[249234, 224607, 251189, 275816, 231417, 250782, 321867, 351209, 289858, 338519, 244286, 157613, 183608, 244053, 300723, 251239, 218345, 173054, 167748, 1238

In [60]:
cont_list[0]

{'cause': 'Arson',
 'data': {'years': [1992,
   1993,
   1994,
   1995,
   1996,
   1997,
   1998,
   1999,
   2000,
   2001,
   2002,
   2003,
   2004,
   2005,
   2006,
   2007,
   2008,
   2009,
   2010,
   2011,
   2012,
   2013,
   2014,
   2015],
  'data': [618391,
   606053,
   478178,
   630855,
   413389,
   971202,
   1665608,
   2273776,
   2374672,
   1839656,
   1154331,
   775350,
   951382,
   1197313,
   1785206,
   943502,
   803800,
   553930,
   868114,
   268422,
   174803,
   133296,
   164384,
   86334]}}

In [36]:
[str(col) for col in cont_pivot.columns]

['1992',
 '1993',
 '1994',
 '1995',
 '1996',
 '1997',
 '1998',
 '1999',
 '2000',
 '2001',
 '2002',
 '2003',
 '2004',
 '2005',
 '2006',
 '2007',
 '2008',
 '2009',
 '2010',
 '2011',
 '2012',
 '2013',
 '2014',
 '2015']

In [48]:
cause_size_pivot = pd.pivot_table(Viz_df, values='FIRE_SIZE', index=['FIRE_YEAR'],
                    columns=['STAT_CAUSE_DESCR'], aggfunc=np.sum, fill_value=0)
cause_size_pivot                    

STAT_CAUSE_DESCR,Arson,Campfire,Children,Debris Burning,Equipment Use,Fireworks,Lightning,Miscellaneous,Missing/Undefined,Powerline,Railroad,Smoking,Structure
FIRE_YEAR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1992,267327.3,26931.74,50900.503,154201.142,136179.239,2619.0,1007120.0,302305.2,157951.7,3181.1,38062.04,52704.858,474.1
1993,193621.8,30402.979,13744.149,109894.683,122668.21,6908.8,1112361.0,217835.1,249443.7,34542.0,43204.848,57012.01,56.8
1994,477147.6,69247.4,14479.984,182475.81,137054.61,16026.7,2393715.0,186375.5,487495.6,17243.4,41323.07,58814.553,35276.5
1995,290068.9,68101.023,16675.848,191292.641,116224.279,3678.8,831346.9,183589.9,243791.4,28085.2,23561.54,51976.18,1192.5
1996,514986.0,154556.55,18374.694,269211.21,520321.42,48124.9,3249387.0,414805.2,641121.7,66759.8,64130.68,43223.89,152.4
1997,177105.2,17847.75,7013.03,166325.85,117987.81,3163.55,2218295.0,213296.5,180773.5,63834.82,27992.5,20826.09,173.46
1998,302851.0,13626.52,13890.35,138022.14,210612.29,10098.36,836085.4,174401.5,217474.8,7557.33,70407.92,15861.73,213.12
1999,707899.9,51843.41,14746.892,337270.0,376550.7291,27166.65,3661816.0,462308.3,211279.8,104211.86,96900.74,27964.32,241.13
2000,659805.8,143085.45,23974.11,378198.58,456780.774,11626.01,4895256.0,653654.6,135960.7,19901.631,161571.57,92561.48,7121.46
2001,470140.8,76039.02,14353.09,295063.094,210882.161,27267.74,1954476.0,396253.7,119507.0,5924.64,89416.21,63641.31,1107.06


In [58]:
cause_size_pivot.sum()

STAT_CAUSE_DESCR
Arson                9.487274e+06
Campfire             3.429061e+06
Children             4.698303e+05
Debris Burning       5.975793e+06
Equipment Use        6.799046e+06
Fireworks            3.182073e+05
Lightning            8.703350e+07
Miscellaneous        1.439420e+07
Missing/Undefined    8.751725e+06
Powerline            1.609443e+06
Railroad             8.496138e+05
Smoking              8.426605e+05
Structure            1.721894e+05
dtype: float64

In [56]:
streamgraph_data = []
for each in cause_size_pivot.iteritems():
    each_cause = {
        "name": each[0],
        "data": [v for k,v in cause_size_pivot[each[0]].items()]
    }
    streamgraph_data.append(each_cause)


In [57]:
streamgraph_data

[{'name': 'Arson',
  'data': [267327.29000000743,
   193621.7730000055,
   477147.63699998736,
   290068.8800000042,
   514985.9799999912,
   177105.17000000508,
   302850.9800100079,
   707899.8609999632,
   659805.8109999703,
   470140.7999999718,
   1003302.1926799705,
   440481.2141899857,
   277178.73098999605,
   352592.8540099907,
   557676.1949599689,
   437086.8611799823,
   290579.52074999426,
   439684.2774999785,
   202693.8560000052,
   387772.28999998735,
   414064.7679999777,
   291194.95900000294,
   142213.08870000494,
   189799.17000000706]},
 {'name': 'Campfire',
  'data': [26931.739999999416,
   30402.97899999859,
   69247.39999999979,
   68101.02299999856,
   154556.5500000053,
   17847.75000000003,
   13626.520000000119,
   51843.40999999991,
   143085.4500000011,
   76039.0199999997,
   311659.5110000022,
   402416.39499998785,
   57698.729999999254,
   129945.05700000239,
   201249.75600000596,
   289151.18651000375,
   156385.5599900088,
   58829.04499999813,
 

In [28]:
[v for k,v in cause_size_pivot.Lightning.items()]

[1007120.0899998662,
 1112360.9300000295,
 2393714.7700002114,
 831346.9299999787,
 3249387.3100001407,
 2218295.280000119,
 836085.3899999753,
 3661816.040000111,
 4895256.451999845,
 1954476.4100001354,
 4000895.3248592056,
 2429578.645190223,
 7180591.23017886,
 7604853.915019143,
 5556431.731009671,
 5973669.811039369,
 2236002.2400001762,
 3938008.2116201483,
 2030729.7285000947,
 3764986.2400000975,
 6883806.685999363,
 3002130.350000161,
 2073452.8912500767,
 8198504.409999249]

In [18]:
[col for col in cause_size_pivot.columns if col != 'Lightning']

['Arson',
 'Campfire',
 'Children',
 'Debris Burning',
 'Equipment Use',
 'Fireworks',
 'Miscellaneous',
 'Missing/Undefined',
 'Powerline',
 'Railroad',
 'Smoking',
 'Structure']

In [61]:
[[ k, v ] for k,v in cause_size_pivot[[col for col in cause_size_pivot.columns if col != 'Lightning']].agg(sum).items()]

[['Arson', 9487274.159969768],
 ['Campfire', 3429061.461000004],
 ['Children', 469830.28806999704],
 ['Debris Burning', 5975792.711603071],
 ['Equipment Use', 6799046.366469939],
 ['Fireworks', 318207.3129999991],
 ['Miscellaneous', 14394204.201985849],
 ['Missing/Undefined', 8751725.491536878],
 ['Powerline', 1609442.8589999983],
 ['Railroad', 849613.7701800035],
 ['Smoking', 842660.5221999962],
 ['Structure', 172189.39100000024]]

In [26]:
manmade_list = [[ k, v ] for k,v in Viz_df.STAT_CAUSE_DESCR.value_counts().items() if k != 'Lightning']
manmade_list

[['Debris Burning', 429028],
 ['Miscellaneous', 323805],
 ['Arson', 281455],
 ['Missing/Undefined', 166723],
 ['Equipment Use', 147612],
 ['Campfire', 76139],
 ['Children', 61167],
 ['Smoking', 52869],
 ['Railroad', 33455],
 ['Powerline', 14448],
 ['Fireworks', 11500],
 ['Structure', 3796]]

In [27]:
lightning_list = [[ k, v ] for k,v in Viz_df.STAT_CAUSE_DESCR.value_counts().items() if k == 'Lightning']
lightning_list

[['Lightning', 278468]]

# Connect to MongoDB and push data to Database

In [24]:
## Setup Mongo DB
client = pymongo.MongoClient()

In [25]:
## Build DataBase
db = client.Project_2_db
fires = db.fires

In [77]:
fires.insert_many(Viz_df.to_dict('records'))

<pymongo.results.InsertManyResult at 0x2327cc84688>

In [26]:
fires.create_index([("FIRE_YEAR", pymongo.ASCENDING)],name='year',unique=False)

'year'

In [27]:
fires.create_index([("STAT_CAUSE_DESCR", pymongo.ASCENDING)],name='cause',unique=False)

'cause'

In [28]:
fires.create_index([("STATE", pymongo.ASCENDING)],name='state',unique=False)

'state'

In [29]:
fires.create_index([("DISCOVERY_DATE", pymongo.ASCENDING)],name='disc_date',unique=False)

'disc_date'

In [30]:
fires.create_index([("CONT_DATE", pymongo.ASCENDING)],name='cont_date',unique=False)

'cont_date'