In [1]:
# Dependencies
import pandas as pd

In [104]:
#  Data cleansing 

wild_fire_1991_2015 = pd.read_csv("../wild_fire_source_data/wild_fire_1991_2015.csv")
wild_fire_2010_2019 = pd.read_csv("../wild_fire_source_data/wild_fire_2010_2019.csv")
wild_fire_2020 = pd.read_csv("../wild_fire_source_data/wild_fire_2020.csv")

wild_fire_count_1990_2020 = pd.read_csv("../wild_fire_source_data/wild_fire_count_1990_2020.csv")

wild_fire_count_1990_2020.head(15)

Unnamed: 0,Year,Fires,Acres
0,2020,58950,10122336
1,2019,50477,4664364
2,2018,58083,8767492
3,2017,71499,10026086
4,2016,67743,5509995
5,2015,68151,10125149
6,2014,63312,3595613
7,2013,47579,4319546
8,2012,67774,9326238
9,2011,74126,8711367


In [94]:
# selecting only the necessary columns from 1991-2020 dataset
wild_fire_1991_2015_df = wild_fire_1991_2015[["fire_size","latitude","longitude","state","disc_clean_date"]]

# selecting only the necessary columns from 2010-2019 dataset
wild_fire_2010_2019_df = wild_fire_2010_2019[["brightness(F)","latitude","longitude","acq_date"]]

# selecting only the necessary columns from 2020 dataset
wild_fire_2020_df = wild_fire_2020[["brightness(F)","latitude","longitude","acq_date"]]

# Using .rename(columns={}) in order to rename columns
renamed_1991_2015_df = wild_fire_1991_2015_df.rename(columns={"disc_clean_date" : "date"})
renamed_2010_2019_df = wild_fire_2010_2019_df.rename(columns={"acq_date" : "date"}) 
renamed_2020_df = wild_fire_2020_df.rename(columns={"acq_date" : "date"}) 

# count of dataframe
renamed_1991_2015_df.count()
renamed_2010_2019_df.count()
renamed_2020_df.count()

# removing duplicates
unique_1991_2015_df = renamed_1991_2015_df.drop_duplicates(subset=["date"], keep ="last")
unique_2010_2019_df = renamed_2010_2019_df.drop_duplicates(subset=["date"], keep ="last")
unique_2020_df = renamed_2020_df.drop_duplicates(subset=["date"], keep ="last")

# count after removing duplicates
unique_1991_2015_df_count = unique_1991_2015_df.count()
unique_2010_2019_df_count = unique_2010_2019_df.count()
unique_2020_df_count = unique_2020_df.count() 

# merge all 3 datasets with the common column name "date" 
combined_wild_fire1 = pd.merge(unique_1991_2015_df,unique_2010_2019_df, how="outer", on="date")
combined_wild_fire1

Unnamed: 0,fire_size,latitude_x,longitude_x,state,date,brightness(F),latitude_y,longitude_y
0,20.0,39.540000,-101.670000,KS,12/31/2015,202.37,19.4100,-155.2794
1,1.0,32.406052,-95.878887,TX,12/24/2015,206.51,19.4083,-155.2783
2,154.7,35.293600,-96.547800,OK,12/22/2015,221.45,19.4045,-155.2774
3,20.0,33.918398,-97.657024,TX,12/21/2015,82.49,34.9738,-97.9799
4,1.0,35.193840,-94.197170,AR,12/19/2015,102.11,19.4087,-155.2816
...,...,...,...,...,...,...,...,...
9418,,,,,3/24/2019,114.53,32.4672,-109.9317
9419,,,,,3/25/2019,109.85,35.0714,-119.2449
9420,,,,,3/26/2019,89.87,46.6824,-118.7601
9421,,,,,3/27/2019,98.15,41.8609,-122.8270


In [95]:
# selecting only the necessary columns from merged dataset
combined_wild_fire = combined_wild_fire1[["fire_size","latitude_x","longitude_y","date"]]
renamed_combined_wild_fire1 = combined_wild_fire.rename(columns={"latitude_x" : "latitude","longitude_y" : "longitude"})
renamed_combined_wild_fire1

Unnamed: 0,fire_size,latitude,longitude,date
0,20.0,39.540000,-155.2794,12/31/2015
1,1.0,32.406052,-155.2783,12/24/2015
2,154.7,35.293600,-155.2774,12/22/2015
3,20.0,33.918398,-97.9799,12/21/2015
4,1.0,35.193840,-155.2816,12/19/2015
...,...,...,...,...
9418,,,-109.9317,3/24/2019
9419,,,-119.2449,3/25/2019
9420,,,-118.7601,3/26/2019
9421,,,-122.8270,3/27/2019


In [105]:
# merget the 3rd dataset
combined_wild_fire2 = pd.merge(renamed_combined_wild_fire1,unique_2020_df,how="outer", on="date")
combined_wild_fire2

Unnamed: 0,fire_size,latitude_x,longitude_x,date,brightness(F),latitude_y,longitude_y
0,20.0,39.540000,-155.2794,12/31/2015,,,
1,1.0,32.406052,-155.2783,12/24/2015,,,
2,154.7,35.293600,-155.2774,12/22/2015,,,
3,20.0,33.918398,-97.9799,12/21/2015,,,
4,1.0,35.193840,-155.2816,12/19/2015,,,
...,...,...,...,...,...,...,...
9507,,,,9/23/2020,130.73,37.513,-119.363
9508,,,,9/24/2020,104.09,39.859,-121.281
9509,,,,9/25/2020,135.95,46.464,-120.359
9510,,,,9/26/2020,109.67,21.306,-158.092


In [118]:
# generating bar chart 

# Tell matplotlib that we will be making a bar chart
# Years in x-axis and our y axis is number of fires
# We apply align="edge" to ensure our bars line up with our tick marks
# plt.bar(x_axis, users, color='r', alpha=0.5, align="center")
wild_fire_count_1990_2020.head()
wild_fire_count_1990_2020['Fires'] = wild_fire_count_1990_2020['Fires'].str.replace(',', '').astype(int)
wild_fire_count_1990_2020.head()
# wild_fire_count_1990_2020['year'].astype(str).astype(int)
# wild_fire_count_1990_2020_df = wild_fire_count_1990_2020['Year','Fires']

AttributeError: Can only use .str accessor with string values!