In [74]:
# Dependencies and Setup

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import json
import time
import scipy.stats as st
from scipy.stats import linregress

In [75]:
# Output File (CSV)
pyre_data_path = "Resources/California_Fire_Incidents.csv"

In [76]:
pyre_data = pd.read_csv(pyre_data_path)

In [77]:
pyre1_df = pyre_data[["MajorIncident","UniqueId", "CountyIds","Counties", 
                         "AcresBurned", "Injuries","Fatalities", "Days Burned",
                         "AirTankers", "ArchiveYear", "Dozers", "Engines", "Helicopters",
                         "WaterTenders","CrewsInvolved","PersonnelInvolved", 
                         "StructuresDamaged", "StructuresDestroyed", "StructuresEvacuated",
                         "StructuresThreatened"]
                       ]                
pyre1_df.head(1)

Unnamed: 0,MajorIncident,UniqueId,CountyIds,Counties,AcresBurned,Injuries,Fatalities,Days Burned,AirTankers,ArchiveYear,Dozers,Engines,Helicopters,WaterTenders,CrewsInvolved,PersonnelInvolved,StructuresDamaged,StructuresDestroyed,StructuresEvacuated,StructuresThreatened
0,False,5fb18d4d-213f-4d83-a179-daaf11939e78,55,Tuolumne,257314.0,,,20,,2013,,,,,,,,,,


In [78]:
pyre1_df = pyre1_df.loc[pyre1_df["MajorIncident"]==True]
pyre1_df.head(1)

Unnamed: 0,MajorIncident,UniqueId,CountyIds,Counties,AcresBurned,Injuries,Fatalities,Days Burned,AirTankers,ArchiveYear,Dozers,Engines,Helicopters,WaterTenders,CrewsInvolved,PersonnelInvolved,StructuresDamaged,StructuresDestroyed,StructuresEvacuated,StructuresThreatened
4,True,46731fb8-3350-4920-bdf7-910ac0eb715c,56,Ventura,24251.0,10.0,,9,,2013,8.0,117.0,11.0,11.0,47.0,2167.0,6.0,10.0,,


In [79]:
pyre1_df["MajorIncident"].count()

383

In [80]:
df = pyre1_df.drop(["MajorIncident"], axis=1)
df.head(1)

Unnamed: 0,UniqueId,CountyIds,Counties,AcresBurned,Injuries,Fatalities,Days Burned,AirTankers,ArchiveYear,Dozers,Engines,Helicopters,WaterTenders,CrewsInvolved,PersonnelInvolved,StructuresDamaged,StructuresDestroyed,StructuresEvacuated,StructuresThreatened
4,46731fb8-3350-4920-bdf7-910ac0eb715c,56,Ventura,24251.0,10.0,,9,,2013,8.0,117.0,11.0,11.0,47.0,2167.0,6.0,10.0,,


In [81]:
# 1. NORMALIZE VARIOUS PARAMETERS

In [82]:
# 1.1 Injuries/Million Acres Burned

df['MilAcresBurned'] = df['AcresBurned'].div(1000000)
df['Injuries_per_MilAcresBurned'] = df['Injuries']/df['MilAcresBurned']

df1 = df[["UniqueId", "Counties", "AcresBurned", 
          "Injuries","MilAcresBurned","Injuries_per_MilAcresBurned"]
          ]

df1 = df1.loc[(df1["Injuries_per_MilAcresBurned"] > 0.1) & (df1["MilAcresBurned"] > 0.01)]

df1 = df1.sort_values("AcresBurned", ascending = 0).reset_index(drop = True).round(2)

df1

Unnamed: 0,UniqueId,Counties,AcresBurned,Injuries,MilAcresBurned,Injuries_per_MilAcresBurned
0,90b0daf7-5d84-42d9-bb35-bb78d4faf950,Glenn,410203.0,3.0,0.41,7.31
1,90b0daf7-5d84-42d9-bb35-bb78d4faf950,Mendocino,410203.0,3.0,0.41,7.31
2,90b0daf7-5d84-42d9-bb35-bb78d4faf950,Lake,410203.0,3.0,0.41,7.31
3,90b0daf7-5d84-42d9-bb35-bb78d4faf950,Colusa,410203.0,3.0,0.41,7.31
4,75dafe80-f18a-4a4a-9a37-4b564c5f6014,Butte,153336.0,3.0,0.15,19.56
5,3df6ecf7-f095-483d-b1c4-2dc969d61261,El Dorado,97717.0,12.0,0.1,122.8
6,97ff14e8-6d08-4214-ad3b-847fa3a0e84a,Yolo,90288.0,1.0,0.09,11.08
7,97ff14e8-6d08-4214-ad3b-847fa3a0e84a,Napa,90288.0,1.0,0.09,11.08
8,809d101c-8aa3-4a53-bf20-f38ae7ec7941,Sonoma,77758.0,4.0,0.08,51.44
9,040d03d0-fbdd-438c-8174-f81e4a1bba57,San Luis Obispo,46344.0,1.0,0.05,21.58


In [83]:
# 1.2 Added: Personnel/Million Acres Burned & Personnel/Injury

PersonnelInvolved = df["PersonnelInvolved"]
df2 = pd.concat([df1,PersonnelInvolved], axis = 1)

df2['Personnel_per_MilAcresBurned'] = df2['PersonnelInvolved']/df['MilAcresBurned']

df2['Personnel_per_Injury'] = df2['PersonnelInvolved']/df['Injuries']

df2 = df2.loc[(df2["Injuries_per_MilAcresBurned"] > 0.1) 
              & (df2["MilAcresBurned"] > 0.01)
#              & (df2["Personnel_per_MilAcresBurned"] > 0.01)
#           & (df2["Personnel_per_Injury"] > 0.01)
             ]

df2 = df2.sort_values("AcresBurned", ascending = 0).reset_index(drop = True).round(2)

df2.head(3)

Unnamed: 0,UniqueId,Counties,AcresBurned,Injuries,MilAcresBurned,Injuries_per_MilAcresBurned,PersonnelInvolved,Personnel_per_MilAcresBurned,Personnel_per_Injury
0,90b0daf7-5d84-42d9-bb35-bb78d4faf950,Glenn,410203.0,3.0,0.41,7.31,,,
1,90b0daf7-5d84-42d9-bb35-bb78d4faf950,Mendocino,410203.0,3.0,0.41,7.31,,,
2,90b0daf7-5d84-42d9-bb35-bb78d4faf950,Lake,410203.0,3.0,0.41,7.31,,,


In [101]:
# 1.3 Analysis by County

df4 = df2.drop(["UniqueId"], axis =1)

df4 = df4.groupby(["Counties"]).sum() 

df4 = df4.sort_values("AcresBurned", ascending = 0).reset_index().round(2)

df4

Unnamed: 0,Counties,AcresBurned,Injuries,MilAcresBurned,Injuries_per_MilAcresBurned,PersonnelInvolved,Personnel_per_MilAcresBurned,Personnel_per_Injury
0,Mendocino,446726.0,4.0,0.45,34.69,1811.0,260014.36,301.83
1,Lake,425388.0,4.0,0.43,73.16,726.0,229311.43,363.0
2,Colusa,410203.0,3.0,0.41,7.31,0.0,0.0,0.0
3,Glenn,410203.0,3.0,0.41,7.31,0.0,0.0,0.0
4,Butte,153336.0,3.0,0.15,19.56,2167.0,89357.14,216.7
5,El Dorado,97717.0,12.0,0.1,122.8,0.0,0.0,0.0
6,Napa,90288.0,1.0,0.09,11.08,0.0,0.0,0.0
7,Yolo,90288.0,1.0,0.09,11.08,2106.0,103784.74,81.0
8,Sonoma,77758.0,4.0,0.08,51.44,0.0,0.0,0.0
9,San Luis Obispo,46344.0,1.0,0.05,21.58,898.0,78572.05,179.6


In [84]:
# 2. STATISTICAL ANALYSIS

In [85]:
# 2.1 Statistica Analysis of All Data

df3 = df2.describe().round(1)
df3 = df3.drop(["25%","50%", "75%"]).reset_index()
df3

# df3 = df3.drop(axis=1,columns=["25%","50%", "75%"], level = 1)
# DataFrame.drop(labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise')

Unnamed: 0,index,AcresBurned,Injuries,MilAcresBurned,Injuries_per_MilAcresBurned,PersonnelInvolved,Personnel_per_MilAcresBurned,Personnel_per_Injury
0,count,17.0,17.0,17.0,17.0,10.0,10.0,10.0
1,mean,141163.2,5.4,0.1,153.5,1207.9,155114.8,183.3
2,std,157769.8,6.4,0.2,313.8,771.1,92954.4,104.7
3,min,15185.0,1.0,0.0,7.3,188.0,42363.4,31.3
4,max,410203.0,26.0,0.4,1281.3,2167.0,304323.2,363.0


In [86]:
# 2.2 Statistical Analysis by County



Unnamed: 0,Counties,AcresBurned,Injuries,MilAcresBurned,Injuries_per_MilAcresBurned,PersonnelInvolved,Personnel_per_MilAcresBurned,Personnel_per_Injury
0,Mendocino,446726.0,4.0,0.45,34.69,1811.0,260014.36,301.83
1,Lake,425388.0,4.0,0.43,73.16,726.0,229311.43,363.0
2,Colusa,410203.0,3.0,0.41,7.31,0.0,0.0,0.0
3,Glenn,410203.0,3.0,0.41,7.31,0.0,0.0,0.0
4,Butte,153336.0,3.0,0.15,19.56,2167.0,89357.14,216.7
5,El Dorado,97717.0,12.0,0.1,122.8,0.0,0.0,0.0
6,Napa,90288.0,1.0,0.09,11.08,0.0,0.0,0.0
7,Yolo,90288.0,1.0,0.09,11.08,2106.0,103784.74,81.0
8,Sonoma,77758.0,4.0,0.08,51.44,0.0,0.0,0.0
9,San Luis Obispo,46344.0,1.0,0.05,21.58,898.0,78572.05,179.6


In [100]:
# 2.2 Statistical Analysis by County
# Similar to 1.3 except that two Counties had multiple events (Lake and Mendocino)

df5 = df2.drop(["UniqueId"], axis =1)

df5 = df5.groupby(["Counties"]).describe() 
 
df5 = df5.drop(axis=1,columns=["count","std","25%","50%", "75%"], level = 1).reset_index()

df5 = df5.sort_values([("AcresBurned", "mean")], ascending = False).round(2)

# In [11]: df.sort_values([('Group1', 'C')], ascending=False)

df5

Unnamed: 0_level_0,Counties,AcresBurned,AcresBurned,AcresBurned,Injuries,Injuries,Injuries,MilAcresBurned,MilAcresBurned,MilAcresBurned,...,Injuries_per_MilAcresBurned,PersonnelInvolved,PersonnelInvolved,PersonnelInvolved,Personnel_per_MilAcresBurned,Personnel_per_MilAcresBurned,Personnel_per_MilAcresBurned,Personnel_per_Injury,Personnel_per_Injury,Personnel_per_Injury
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,min,max,mean,min,max,mean,min,max,...,max,mean,min,max,mean,min,max,mean,min,max
1,Colusa,410203.0,410203.0,410203.0,3.0,3.0,3.0,0.41,0.41,0.41,...,7.31,,,,,,,,,
3,Glenn,410203.0,410203.0,410203.0,3.0,3.0,3.0,0.41,0.41,0.41,...,7.31,,,,,,,,,
5,Mendocino,223363.0,36523.0,410203.0,2.0,1.0,3.0,0.22,0.04,0.41,...,27.38,1811.0,1811.0,1811.0,260014.36,260014.36,260014.36,301.83,301.83,301.83
4,Lake,212694.0,15185.0,410203.0,2.0,1.0,3.0,0.22,0.02,0.41,...,65.85,726.0,726.0,726.0,229311.43,229311.43,229311.43,363.0,363.0,363.0
0,Butte,153336.0,153336.0,153336.0,3.0,3.0,3.0,0.15,0.15,0.15,...,19.56,2167.0,2167.0,2167.0,89357.14,89357.14,89357.14,216.7,216.7,216.7
2,El Dorado,97717.0,97717.0,97717.0,12.0,12.0,12.0,0.1,0.1,0.1,...,122.8,,,,,,,,,
6,Napa,90288.0,90288.0,90288.0,1.0,1.0,1.0,0.09,0.09,0.09,...,11.08,,,,,,,,,
14,Yolo,90288.0,90288.0,90288.0,1.0,1.0,1.0,0.09,0.09,0.09,...,11.08,2106.0,2106.0,2106.0,103784.74,103784.74,103784.74,81.0,81.0,81.0
11,Sonoma,77758.0,77758.0,77758.0,4.0,4.0,4.0,0.08,0.08,0.08,...,51.44,,,,,,,,,
8,San Luis Obispo,46344.0,46344.0,46344.0,1.0,1.0,1.0,0.05,0.05,0.05,...,21.58,898.0,898.0,898.0,78572.05,78572.05,78572.05,179.6,179.6,179.6


In [None]:
# SCRAP TO BE DELETED --------------------------------------------------------------------------------------------------------

In [None]:

# stats_tot_mean = df.groupby(["Counties"]).mean()
# # stats_tot_mean = stats_tot_mean.sort_values("AcresBurned", ascending = 0).reset_index().round(0)

In [None]:
# # Generate a summary statistics table of mean, median, variance, standard deviation, 
# # and SEM of the tumor volume for each regimen
# ac_burn_mean = major.groupby('Counties')['AcresBurned'].mean()
# ac_burn_median = major.groupby('Counties')['AcresBurned'].median()
# ac_burn_variance = major.groupby('Counties')['AcresBurned'].var()
# ac_burn_standard_dv = major.groupby('Counties')['AcresBurned'].std()
# ac_burn_sem = major.groupby('Counties')['AcresBurned'].sem()

In [None]:
# ac_burn_summary_stat = pd.DataFrame({"Acres Burned Mean": ac_burn_mean, 
#                                       "Acres Burned Median": ac_burn_median, 
#                                       "Acres Burned Variance": ac_burn_variance, 
#                                       "Acres Burned Standard Deviation": ac_burn_standard_dv,
#                                       "Acres Burned SEM": ac_burn_sem}
#                                       )

In [None]:
# ac_burn_summary_stat.round(0).head()

In [None]:
# clean = fires_by_county[(fires_by_county['ArchiveYear'] >= 2020) & (fires_by_county['ArchiveYear'] <= 2010)].index

# fires_by_county.drop(clean, inplace=True)
# fires_by_county.head()