In [None]:
import pandas as pd

In [None]:
fires_by_month = pd.read_pickle('fires_by_month.pkl')

In [None]:
print("First 5 Rows of the DataFrame:")
print(fires_by_month.head())

First 5 Rows of the DataFrame:
  state  fire_year  fire_month  acres_burned  days_burning  fire_count
0    AK       1992           5        4202.0         135.0          14
1    AK       1992           6       86401.0         417.0          23
2    AK       1992           7       48516.7         500.0          26
3    AK       1992           8        3305.0          92.0           4
4    AK       1992           9          20.0           1.0           1


## Add and modify columns

In [None]:
# add a column for the mean number of acres burned per day
fires_by_month['mean_acres_per_day'] = fires_by_month['acres_burned'] / fires_by_month['days_burning']
print(fires_by_month.head())

  state  fire_year  fire_month  acres_burned  days_burning  fire_count  \
0    AK       1992           5        4202.0         135.0          14   
1    AK       1992           6       86401.0         417.0          23   
2    AK       1992           7       48516.7         500.0          26   
3    AK       1992           8        3305.0          92.0           4   
4    AK       1992           9          20.0           1.0           1   

   mean_acres_per_day  
0           31.125926  
1          207.196643  
2           97.033400  
3           35.923913  
4           20.000000  


In [None]:
import numpy as np
# add a column that uses a lambda to calculate the mean number of acres burned per day
fires_by_month['mean_acres_per_day_lambda'] = fires_by_month.apply(
    lambda row: row['acres_burned'] / row['days_burning'] if row['days_burning'] !=0 else np.nan, axis = 1
)
print(fires_by_month.head())

# Here, I understood that division is handled differently in direct division and lamda division cases.
# So, I'm printing the rows where days_burning is 0. From the output I can see that 'inf' is the result of division in direct division that NymPy does.
# For records where days_burning is '0', I see that there are valid values for 'acres_burned'. 
# So, '0' in 'days_burning' should represent a missing value. In this case, I'm choosing not to show '0' as the mean and showing nan (not a number).

# Filter rows where 'days_burning' equals 0
zero_rows = fires_by_month[fires_by_month['days_burning'] == 0]

# Check if the filtered DataFrame is empty
if not zero_rows.empty:
    print("\nRows with zero in the 'days_burning' column:")
    print(zero_rows.head())


  state  fire_year  fire_month  acres_burned  days_burning  fire_count  \
0    AK       1992           5        4202.0         135.0          14   
1    AK       1992           6       86401.0         417.0          23   
2    AK       1992           7       48516.7         500.0          26   
3    AK       1992           8        3305.0          92.0           4   
4    AK       1992           9          20.0           1.0           1   

   mean_acres_per_day  mean_acres_per_day_lambda  
0           31.125926                  31.125926  
1          207.196643                 207.196643  
2           97.033400                  97.033400  
3           35.923913                  35.923913  
4           20.000000                  20.000000  

Rows with zero in the 'days_burning' column:
   state  fire_year  fire_month  acres_burned  days_burning  fire_count  \
10    AK       1994           3         200.0           0.0           1   
29    AK       1997           3         300.0        

In [None]:
# write a function to convert the fire_month column from an int value to a string value such as 'Jan', 'Feb', etc.

def int_to_month_abbr(month_int):
    month_map = {
        1: "Jan", 2: "Feb", 3: "Mar", 4: "Apr", 5: "May", 6: "Jun",
        7: "Jul", 8: "Aug", 9: "Sep", 10: "Oct", 11: "Nov", 12: "Dec"
    }
    return month_map.get(month_int, "Unknown")


In [None]:
# I'm applying this function to the DataFrame for the fire_month column

fires_by_month['fire_month'] = fires_by_month['fire_month'].apply(int_to_month_abbr)

print("DataFrame after converting fire_month from numbers to month names")
print(fires_by_month.head())

DataFrame after converting fire_month from numbers to month names
  state  fire_year fire_month  acres_burned  days_burning  fire_count  \
0    AK       1992        May        4202.0         135.0          14   
1    AK       1992        Jun       86401.0         417.0          23   
2    AK       1992        Jul       48516.7         500.0          26   
3    AK       1992        Aug        3305.0          92.0           4   
4    AK       1992        Sep          20.0           1.0           1   

   mean_acres_per_day  mean_acres_per_day_lambda  
0           31.125926                  31.125926  
1          207.196643                 207.196643  
2           97.033400                  97.033400  
3           35.923913                  35.923913  
4           20.000000                  20.000000  


## Work with indexes

In [None]:
# set an index on the state, fire_year, and fire_month columns

fires_by_month.set_index(['state', 'fire_year', 'fire_month'], inplace=True)
print("\nDataFrame with Multi-Index")
print(fires_by_month.head())


DataFrame with Multi-Index
                            acres_burned  days_burning  fire_count  \
state fire_year fire_month                                           
AK    1992      May               4202.0         135.0          14   
                Jun              86401.0         417.0          23   
                Jul              48516.7         500.0          26   
                Aug               3305.0          92.0           4   
                Sep                 20.0           1.0           1   

                            mean_acres_per_day  mean_acres_per_day_lambda  
state fire_year fire_month                                                 
AK    1992      May                  31.125926                  31.125926  
                Jun                 207.196643                 207.196643  
                Jul                  97.033400                  97.033400  
                Aug                  35.923913                  35.923913  
                Sep      

In [None]:
# unstack the fire_month column and store the resulting DataFrame in a different variable

fires_unstacked = fires_by_month.unstack('fire_month')
print("\nUnstacked DataFrame (fire_month unstacked)\n")
print(fires_unstacked.head())


Unstacked DataFrame (fire_month unstacked)

                acres_burned                                                  \
fire_month               Apr      Aug Dec Feb Jan       Jul       Jun    Mar   
state fire_year                                                                
AK    1992               NaN   3305.0 NaN NaN NaN   48516.7   86401.0    NaN   
      1993             113.0   1132.0 NaN NaN NaN  483493.3  197740.2    NaN   
      1994               NaN  24139.0 NaN NaN NaN   50510.0  184428.0  200.0   
      1995             810.6      NaN NaN NaN NaN   27856.0    5622.0    NaN   
      1996             190.8     10.0 NaN NaN NaN   56143.0  446273.4    NaN   

                              ... mean_acres_per_day_lambda          \
fire_month           May Nov  ...                       Dec Feb Jan   
state fire_year               ...                                     
AK    1992        4202.0 NaN  ...                       NaN NaN NaN   
      1993        4152.0 NaN  

In [None]:
# reset the index for the fires_by_month DataFrame

fires_by_month.reset_index(inplace=True)
print("DataFrame After Resetting Index:\n")
print(fires_by_month.head())
print(fires_unstacked.columns)


DataFrame After Resetting Index:

  state  fire_year fire_month  acres_burned  days_burning  fire_count  \
0    AK       1992        May        4202.0         135.0          14   
1    AK       1992        Jun       86401.0         417.0          23   
2    AK       1992        Jul       48516.7         500.0          26   
3    AK       1992        Aug        3305.0          92.0           4   
4    AK       1992        Sep          20.0           1.0           1   

   mean_acres_per_day  mean_acres_per_day_lambda  
0           31.125926                  31.125926  
1          207.196643                 207.196643  
2           97.033400                  97.033400  
3           35.923913                  35.923913  
4           20.000000                  20.000000  
MultiIndex([(             'acres_burned', 'Apr'),
            (             'acres_burned', 'Aug'),
            (             'acres_burned', 'Dec'),
            (             'acres_burned', 'Feb'),
            (        

## Combine data

In [None]:
# create new fire data
new_fire = pd.DataFrame(data=[['CA',2021,'Jun',1000,100,1,10,10]], columns=fires_by_month.columns)
new_fire.head()

Unnamed: 0,state,fire_year,fire_month,acres_burned,days_burning,fire_count,mean_acres_per_day,mean_acres_per_day_lambda
0,CA,2021,Jun,1000,100,1,10,10


In [None]:
# add the new fire data to the end of the original DataFrame
fires_by_month_combined = pd.concat([fires_by_month, new_fire], ignore_index=False)

In [None]:
# display the last five rows of the original DataFrame
print(fires_by_month_combined.tail())

     state  fire_year fire_month  acres_burned  days_burning  fire_count  \
9296    WY       2015        Sep        5977.0         126.0          25   
9297    WY       2015        Oct       10337.8          15.0           6   
9298    WY       2015        Nov         509.3           3.0           4   
9299    WY       2015        Dec          72.0           0.0           2   
0       CA       2021        Jun        1000.0         100.0           1   

      mean_acres_per_day  mean_acres_per_day_lambda  
9296           47.436508                  47.436508  
9297          689.186667                 689.186667  
9298          169.766667                 169.766667  
9299                 inf                        NaN  
0              10.000000                  10.000000  


In [None]:
# reset the index for the original DataFrame, dropping the numeric index
fires_by_month_combined.reset_index(drop=True, inplace=True)

In [None]:
#display the last five rows of the DataFrame again
print(fires_by_month_combined.tail())

     state  fire_year fire_month  acres_burned  days_burning  fire_count  \
9296    WY       2015        Sep        5977.0         126.0          25   
9297    WY       2015        Oct       10337.8          15.0           6   
9298    WY       2015        Nov         509.3           3.0           4   
9299    WY       2015        Dec          72.0           0.0           2   
9300    CA       2021        Jun        1000.0         100.0           1   

      mean_acres_per_day  mean_acres_per_day_lambda  
9296           47.436508                  47.436508  
9297          689.186667                 689.186667  
9298          169.766667                 169.766667  
9299                 inf                        NaN  
9300           10.000000                  10.000000  


## Fix the SettingWithCopyWarning

In [None]:
# the cell that causes the SettingWithCopyWarning
#fires_ak = fires_by_month.query('state == "AK"')
# To avoid warnings, I'm creating a copy first.

fires_ak = fires_by_month.query('state == "AK"').copy()
fires_ak.mean_acres_per_day = fires_ak.mean_acres_per_day.round()
print("Alaska Fire Data")
fires_ak.head()

Alaska Fire Data


Unnamed: 0,state,fire_year,fire_month,acres_burned,days_burning,fire_count,mean_acres_per_day,mean_acres_per_day_lambda
0,AK,1992,May,4202.0,135.0,14,31.0,31.125926
1,AK,1992,Jun,86401.0,417.0,23,207.0,207.196643
2,AK,1992,Jul,48516.7,500.0,26,97.0,97.0334
3,AK,1992,Aug,3305.0,92.0,4,36.0,35.923913
4,AK,1992,Sep,20.0,1.0,1,20.0,20.0


In [None]:
fires_by_month.head()

Unnamed: 0,state,fire_year,fire_month,acres_burned,days_burning,fire_count,mean_acres_per_day,mean_acres_per_day_lambda
0,AK,1992,May,4202.0,135.0,14,31.125926,31.125926
1,AK,1992,Jun,86401.0,417.0,23,207.196643,207.196643
2,AK,1992,Jul,48516.7,500.0,26,97.0334,97.0334
3,AK,1992,Aug,3305.0,92.0,4,35.923913,35.923913
4,AK,1992,Sep,20.0,1.0,1,20.0,20.0
