In [None]:
# ## INTRODUCTION
# This analysis evaluates two sets of data

# 1. All data, regardless of County

# Various parameters are evaluated, such as injuries per acres burned, injuries per personnel 
# involved, etc.
# Various statistics are calculated for each parameter.

# 2. Data by County

# Similar parameters are evaluated to qualitatively observe variations among Counties.

# No statistical analysis is conducted because it would be meaningless, as it would require to 
# draw statistics (mean, etc) on all the Counties.

# An interesting evaluation, not conducted with the current dataset, would be to normalize the 
# parameters of interest by the acreage of each County


In [95]:
# 1. ESTABLISH DEPENDENCIES AND SET UP FILES

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import json
import time
import scipy.stats as st
from scipy.stats import linregress

In [96]:
# Output File (CSV)
pyre_data_path_ale = "Resources/California_Fire_Incidents.csv"

In [97]:
pyre1_df = pd.read_csv(pyre_data_path_ale)

In [98]:
# Select column headers for data frame
pyre1_df = pyre1_df[["UniqueId", "CountyIds","Counties", 
                         "AcresBurned", "Injuries","Fatalities", "Days Burned",
                         "AirTankers", "ArchiveYear", "Dozers", "Engines", "Helicopters",
                         "WaterTenders","CrewsInvolved","PersonnelInvolved", 
                         "StructuresDamaged", "StructuresDestroyed", "StructuresEvacuated",
                         "StructuresThreatened"]
                       ]                
pyre1_df.head(1)

Unnamed: 0,UniqueId,CountyIds,Counties,AcresBurned,Injuries,Fatalities,Days Burned,AirTankers,ArchiveYear,Dozers,Engines,Helicopters,WaterTenders,CrewsInvolved,PersonnelInvolved,StructuresDamaged,StructuresDestroyed,StructuresEvacuated,StructuresThreatened
0,5fb18d4d-213f-4d83-a179-daaf11939e78,55,Tuolumne,257314.0,,,20,,2013,,,,,,,,,,


In [99]:
# Revise data frame to include fires > 10 acres only 

pyre1_df = pyre1_df.loc[pyre1_df["AcresBurned"]>10]

pyre1_df = pyre1_df.sort_values("AcresBurned", ascending = 0)

pyre1_df = pyre1_df.drop(["CountyIds", "Days Burned", "ArchiveYear"], axis = 1)

pyre1_df = pyre1_df.reset_index(drop=True)

pyre1_df.head(1)


Unnamed: 0,UniqueId,Counties,AcresBurned,Injuries,Fatalities,AirTankers,Dozers,Engines,Helicopters,WaterTenders,CrewsInvolved,PersonnelInvolved,StructuresDamaged,StructuresDestroyed,StructuresEvacuated,StructuresThreatened
0,90b0daf7-5d84-42d9-bb35-bb78d4faf950,Mendocino,410203.0,3.0,1.0,,,,,,,,,246.0,,1050.0


In [100]:
# Create variable for two separate evaluations.

# First evaluation for all data, regardless of the County:

df = pyre1_df

df.head(1)

Unnamed: 0,UniqueId,Counties,AcresBurned,Injuries,Fatalities,AirTankers,Dozers,Engines,Helicopters,WaterTenders,CrewsInvolved,PersonnelInvolved,StructuresDamaged,StructuresDestroyed,StructuresEvacuated,StructuresThreatened
0,90b0daf7-5d84-42d9-bb35-bb78d4faf950,Mendocino,410203.0,3.0,1.0,,,,,,,,,246.0,,1050.0


In [101]:
# Second evaluation by County:

df1 = pyre1_df.groupby(["Counties"]).sum()

df1.head(1)

Unnamed: 0_level_0,AcresBurned,Injuries,Fatalities,AirTankers,Dozers,Engines,Helicopters,WaterTenders,CrewsInvolved,PersonnelInvolved,StructuresDamaged,StructuresDestroyed,StructuresEvacuated,StructuresThreatened
Counties,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Alameda,6387.0,0.0,0.0,0.0,0.0,5.0,0.0,1.0,0.0,18.0,0.0,1.0,0.0,0.0


In [102]:
# 2. ANALYSIS OF ALL DATA (NOT BY COUNTY) 

In [103]:
# 2.1 For all data, regardless of county: conduct statistical evaluation; add variance and remove percentiles

df
df = df.describe().round(3)
df.loc['var'] = df.apply(lambda x: x["std"]**0.5)
df = df.drop(["25%","50%", "75%"]).reset_index().round(2)
df.head(10)

Unnamed: 0,index,AcresBurned,Injuries,Fatalities,AirTankers,Dozers,Engines,Helicopters,WaterTenders,CrewsInvolved,PersonnelInvolved,StructuresDamaged,StructuresDestroyed,StructuresEvacuated,StructuresThreatened
0,count,1557.0,119.0,21.0,28.0,123.0,190.0,84.0,146.0,170.0,202.0,67.0,173.0,0.0,30.0
1,mean,4813.16,3.52,8.62,4.07,7.58,23.66,5.36,7.82,11.62,328.63,67.97,274.86,,522.8
2,std,27905.01,3.82,18.53,6.4,14.03,41.09,7.26,12.72,14.48,523.0,155.77,1566.02,,739.59
3,min,11.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,0.0
4,max,410203.0,26.0,85.0,27.0,76.0,256.0,29.0,79.0,82.0,3100.0,783.0,18804.0,,2600.0
5,var,167.05,1.95,4.3,2.53,3.75,6.41,2.7,3.57,3.81,22.87,12.48,39.57,,27.2


In [104]:
# 2.2 For all data, regardless of county: add columns showing parameters normalized 
# by acres burned, personnel involved and crews involved.

df

df['100AcresBurned'] = df['AcresBurned'].div(10**2)

df['Injuries_per_100AcresBurned'] = df['Injuries']/df['AcresBurned']*100

df['Injuries_per_AcresBurned'] = df['Injuries']/df['AcresBurned']

df['Injuries_per_100Personnel'] = df['Injuries']/df['PersonnelInvolved']*100

df['StructuresDestroyed_per_PersonnelInvolved'] = df['StructuresDestroyed']/df['PersonnelInvolved']

df['StructuresDamaged_per_PersonnelInvolved'] = df['StructuresDamaged']/df['PersonnelInvolved']

df['Personnel_per_Injury'] = df['PersonnelInvolved']/df['Injuries']

df['Engines_per_100AcresBurned'] = df['Engines']/df['100AcresBurned']

df['Injuries_per_10Crews'] = df['Injuries']/df['CrewsInvolved']*10

df


Unnamed: 0,index,AcresBurned,Injuries,Fatalities,AirTankers,Dozers,Engines,Helicopters,WaterTenders,CrewsInvolved,...,StructuresThreatened,100AcresBurned,Injuries_per_100AcresBurned,Injuries_per_AcresBurned,Injuries_per_100Personnel,StructuresDestroyed_per_PersonnelInvolved,StructuresDamaged_per_PersonnelInvolved,Personnel_per_Injury,Engines_per_100AcresBurned,Injuries_per_10Crews
0,count,1557.0,119.0,21.0,28.0,123.0,190.0,84.0,146.0,170.0,...,30.0,15.57,7.642903,0.076429,58.910891,0.856436,0.331683,1.697479,12.202954,7.0
1,mean,4813.16,3.52,8.62,4.07,7.58,23.66,5.36,7.82,11.62,...,522.8,48.1316,0.073133,0.000731,1.071113,0.836381,0.206828,93.360795,0.491569,3.02926
2,std,27905.01,3.82,18.53,6.4,14.03,41.09,7.26,12.72,14.48,...,739.59,279.0501,0.013689,0.000137,0.730402,2.994302,0.297839,136.910995,0.14725,2.638122
3,min,11.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.11,0.0,0.0,,,,,0.0,
4,max,410203.0,26.0,85.0,27.0,76.0,256.0,29.0,79.0,82.0,...,2600.0,4102.03,0.006338,6.3e-05,0.83871,6.065806,0.252581,119.230769,0.062408,3.170732
5,var,167.05,1.95,4.3,2.53,3.75,6.41,2.7,3.57,3.81,...,27.2,1.6705,1.167315,0.011673,8.526454,1.730214,0.545693,11.728205,3.837174,5.11811


In [105]:
# Remove columns not required for evaluation, for clarity

df = df.drop(["Fatalities","AirTankers","Dozers", "Helicopters", "WaterTenders", "StructuresEvacuated", "100AcresBurned", "Injuries_per_AcresBurned"], axis =1)

df.round(2)

Unnamed: 0,index,AcresBurned,Injuries,Engines,CrewsInvolved,PersonnelInvolved,StructuresDamaged,StructuresDestroyed,StructuresThreatened,Injuries_per_100AcresBurned,Injuries_per_100Personnel,StructuresDestroyed_per_PersonnelInvolved,StructuresDamaged_per_PersonnelInvolved,Personnel_per_Injury,Engines_per_100AcresBurned,Injuries_per_10Crews
0,count,1557.0,119.0,190.0,170.0,202.0,67.0,173.0,30.0,7.64,58.91,0.86,0.33,1.7,12.2,7.0
1,mean,4813.16,3.52,23.66,11.62,328.63,67.97,274.86,522.8,0.07,1.07,0.84,0.21,93.36,0.49,3.03
2,std,27905.01,3.82,41.09,14.48,523.0,155.77,1566.02,739.59,0.01,0.73,2.99,0.3,136.91,0.15,2.64
3,min,11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,
4,max,410203.0,26.0,256.0,82.0,3100.0,783.0,18804.0,2600.0,0.01,0.84,6.07,0.25,119.23,0.06,3.17
5,var,167.05,1.95,6.41,3.81,22.87,12.48,39.57,27.2,1.17,8.53,1.73,0.55,11.73,3.84,5.12


In [106]:
# 2. ANALYSIS OF DATA BY COUNTY

# 2.1 Sum values by County

df1

df1 = df1.groupby(["Counties"]).sum()

df1

Unnamed: 0_level_0,AcresBurned,Injuries,Fatalities,AirTankers,Dozers,Engines,Helicopters,WaterTenders,CrewsInvolved,PersonnelInvolved,StructuresDamaged,StructuresDestroyed,StructuresEvacuated,StructuresThreatened
Counties,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Alameda,6387.0,0.0,0.0,0.0,0.0,5.0,0.0,1.0,0.0,18.0,0.0,1.0,0.0,0.0
Amador,6265.0,2.0,0.0,4.0,7.0,57.0,3.0,16.0,13.0,190.0,0.0,68.0,0.0,0.0
Butte,190652.0,23.0,85.0,14.0,76.0,405.0,32.0,91.0,135.0,5580.0,63.0,19021.0,0.0,0.0
Calaveras,2648.0,4.0,0.0,0.0,18.0,35.0,18.0,18.0,18.0,189.0,1.0,9.0,0.0,0.0
Colusa,459316.0,3.0,1.0,0.0,82.0,273.0,23.0,84.0,63.0,3241.0,0.0,281.0,0.0,2075.0
Contra Costa,6884.0,1.0,0.0,0.0,5.0,3.0,0.0,4.0,8.0,231.0,0.0,1.0,0.0,0.0
Del Norte,38407.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
El Dorado,114411.0,19.0,0.0,0.0,1.0,42.0,2.0,7.0,15.0,50.0,3.0,157.0,0.0,2600.0
Fresno,214401.0,4.0,0.0,0.0,15.0,106.0,5.0,26.0,47.0,1534.0,5.0,15.0,0.0,5.0
Glenn,413201.0,3.0,1.0,0.0,5.0,6.0,5.0,5.0,11.0,221.0,0.0,247.0,0.0,1050.0


In [107]:
# 2.2 For analysis by County: add columns showing parameters normalized by acres burned, personnel involved and crews involved.
#Remove columns not required

df1

df1['100AcresBurned'] = df1['AcresBurned'].div(10**2)

df1['Injuries_per_100AcresBurned'] = df1['Injuries']/df1['AcresBurned']*100

df1['Injuries_per_AcresBurned'] = df1['Injuries']/df1['AcresBurned']

df1['Injuries_per_100Personnel'] = df1['Injuries']/df1['PersonnelInvolved']*100

df1['StructuresDestroyed_per_PersonnelInvolved'] = df1['StructuresDestroyed']/df1['PersonnelInvolved']

df1['StructuresDamaged_per_PersonnelInvolved'] = df1['StructuresDamaged']/df1['PersonnelInvolved']

df1['Personnel_per_Injury'] = df1['PersonnelInvolved']/df1['Injuries']

df1['Engines_per_100AcresBurned'] = df1['Engines']/df1['100AcresBurned']

df1['Injuries_per_10Crews'] = df1['Injuries']/df1['CrewsInvolved']*10

df1 = df1.sort_values("AcresBurned", ascending = 0).reset_index(drop = False)

df1 = df1.drop(["Fatalities","AirTankers","Dozers", "Helicopters", "WaterTenders", "StructuresEvacuated", "100AcresBurned", "Injuries_per_AcresBurned"], axis =1)


df1.head()


Unnamed: 0,Counties,AcresBurned,Injuries,Engines,CrewsInvolved,PersonnelInvolved,StructuresDamaged,StructuresDestroyed,StructuresThreatened,Injuries_per_100AcresBurned,Injuries_per_100Personnel,StructuresDestroyed_per_PersonnelInvolved,StructuresDamaged_per_PersonnelInvolved,Personnel_per_Injury,Engines_per_100AcresBurned,Injuries_per_10Crews
0,Lake,582784.0,8.0,379.0,98.0,4407.0,44.0,901.0,2075.0,0.001373,0.181529,0.204447,0.009984,550.875,0.065033,0.816327
1,Mendocino,512702.0,29.0,275.0,76.0,3485.0,44.0,838.0,2075.0,0.005656,0.832138,0.240459,0.012626,120.172414,0.053637,3.815789
2,Trinity,481048.0,2.0,14.0,87.0,761.0,61.0,1657.0,0.0,0.000416,0.262812,2.177398,0.080158,380.5,0.00291,0.229885
3,Colusa,459316.0,3.0,273.0,63.0,3241.0,0.0,281.0,2075.0,0.000653,0.092564,0.086702,0.0,1080.333333,0.059436,0.47619
4,Shasta,448517.0,55.0,203.0,103.0,2768.0,79.0,1940.0,0.0,0.012263,1.986994,0.700867,0.02854,50.327273,0.04526,5.339806


In [108]:
# SCRAP TO BE DELETED (CODE MAY TURN OUT TO BE USEFUL AT SOME POINT)--------------------------------------------------------------------------------------------------------

In [109]:
# df5 = df5.sort_values([("AcresBurned", "mean")], ascending = False).round(2)

# df1 = df[["UniqueId", "Counties", "AcresBurned", 
#           "Injuries","StructuresDestroyed", "Injuries_per_AcresBurned",
# #          "Injuries_per_100AcresBurned" ]
#         ]
# df2 = pd.concat([df1,PersonnelInvolved], axis = 1)

# df3 = df3.drop(axis=1,columns=["25%","50%", "75%"], level = 1)
# DataFrame.drop(labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise')

# df1 = df1.loc[
#                 (df1["Injuries_per_MilAcresBurned"] >0) 
#                & (df1["MilAcresBurned"] > 0.001)
#                 & (df1["Personnel_per_MilAcresBurned"] > 0)
#                 & (df1["Personnel_per_Injury"] > 0)
#              ]


# stats_tot_mean = df.groupby(["Counties"]).mean()
# # stats_tot_mean = stats_tot_mean.sort_values("AcresBurned", ascending = 0).reset_index().round(0)

# # Generate a summary statistics table of mean, median, variance, standard deviation, 
# # and SEM of the tumor volume for each regimen
# ac_burn_mean = major.groupby('Counties')['AcresBurned'].mean()
# ac_burn_median = major.groupby('Counties')['AcresBurned'].median()
# ac_burn_variance = major.groupby('Counties')['AcresBurned'].var()
# ac_burn_standard_dv = major.groupby('Counties')['AcresBurned'].std()
# ac_burn_sem = major.groupby('Counties')['AcresBurned'].sem()

# ac_burn_summary_stat = pd.DataFrame({"Acres Burned Mean": ac_burn_mean, 
#                                       "Acres Burned Median": ac_burn_median, 
#                                       "Acres Burned Variance": ac_burn_variance, 
#                                       "Acres Burned Standard Deviation": ac_burn_standard_dv,
#                                       "Acres Burned SEM": ac_burn_sem}
#                                       )

# clean = fires_by_county[(fires_by_county['ArchiveYear'] >= 2020) & (fires_by_county['ArchiveYear'] <= 2010)].index

# fires_by_county.drop(clean, inplace=True)
# fires_by_county.head()


# pyre1_df = pyre1_df.loc[pyre1_df["MajorIncident"]==True]
# pyre1_df.head(1)
# pyre1_df["MajorIncident"].count()