In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('forestfires.csv')

In [3]:
df.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


In [4]:
non_zero_areas = df[df['area'] > 0]['area']
median_non_zero = non_zero_areas.median()


In [5]:
df['area_class'] = np.where(df['area'] == 0, 'NotAffected',
                           np.where(df['area'] <= median_non_zero, 'PartiallyAffected', 'MostlyAffected'))


In [6]:
df.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,area_class
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0,NotAffected
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0,NotAffected
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0,NotAffected
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0,NotAffected
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0,NotAffected


In [11]:
not_affected = df[df['area_class'] == 'NotAffected']
partially_affected = df[df['area_class'] == 'PartiallyAffected']
mostly_affected = df[df['area_class'] == 'MostlyAffected']


(135, 14)

In [10]:
merged_subsets = pd.concat([partially_affected, mostly_affected])
merged_subsets

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,area_class
138,9,9,jul,tue,85.8,48.3,313.4,3.9,18.0,42,2.7,0.0,0.36,PartiallyAffected
139,1,4,sep,tue,91.0,129.5,692.6,7.0,21.7,38,2.2,0.0,0.43,PartiallyAffected
140,2,5,sep,mon,90.9,126.5,686.5,7.0,21.9,39,1.8,0.0,0.47,PartiallyAffected
141,1,2,aug,wed,95.5,99.9,513.3,13.2,23.3,31,4.5,0.0,0.55,PartiallyAffected
142,8,6,aug,fri,90.1,108.0,529.8,12.5,21.2,51,8.9,0.0,0.61,PartiallyAffected
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499,7,5,aug,tue,96.1,181.1,671.2,14.3,27.3,63,4.9,6.4,10.82,MostlyAffected
504,4,3,aug,wed,94.5,139.4,689.1,20.0,28.9,29,4.9,0.0,49.59,MostlyAffected
512,4,3,aug,sun,81.6,56.7,665.6,1.9,27.8,32,2.7,0.0,6.44,MostlyAffected
513,2,4,aug,sun,81.6,56.7,665.6,1.9,21.9,71,5.8,0.0,54.29,MostlyAffected


In [12]:
not_affected.shape


(247, 14)

In [13]:
partially_affected.shape

(135, 14)

In [14]:
mostly_affected.shape

(135, 14)

In [15]:
df.shape

(517, 14)

In [17]:
sorted_df = df.sort_values(by=['temp', 'wind', 'area'])
sorted_df

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,area_class
280,4,6,dec,fri,84.7,26.7,352.6,4.1,2.2,59,4.9,0.0,9.27,MostlyAffected
282,6,3,feb,sun,84.9,27.5,353.5,3.4,4.2,51,4.0,0.0,0.00,NotAffected
465,2,2,feb,sat,79.5,3.6,15.3,1.8,4.6,59,0.9,0.0,6.84,MostlyAffected
463,6,5,feb,tue,75.1,4.4,16.2,1.9,4.6,82,6.3,0.0,5.39,PartiallyAffected
279,4,4,dec,mon,85.4,25.4,349.7,2.6,4.6,21,8.5,0.0,9.77,MostlyAffected
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
492,1,3,aug,fri,95.9,158.0,633.6,11.3,32.4,27,2.2,0.0,0.00,NotAffected
491,4,4,aug,thu,95.8,152.0,624.1,13.8,32.4,21,4.5,0.0,0.00,NotAffected
496,4,5,aug,mon,96.2,175.5,661.8,16.8,32.6,26,3.1,0.0,2.77,PartiallyAffected
484,2,5,aug,sun,94.9,130.3,587.1,14.1,33.1,25,4.0,0.0,26.43,MostlyAffected


In [18]:
transposed_df = df.transpose()

In [19]:
transposed_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,507,508,509,510,511,512,513,514,515,516
X,7,7,7,8,8,8,8,8,8,7,...,2,1,5,6,8,4,2,7,1,6
Y,5,4,4,6,6,6,6,6,6,5,...,4,2,4,5,6,3,4,4,4,3
month,mar,oct,oct,mar,mar,aug,aug,aug,sep,sep,...,aug,aug,aug,aug,aug,aug,aug,aug,aug,nov
day,fri,tue,sat,fri,sun,sun,mon,mon,tue,sat,...,fri,fri,fri,fri,sun,sun,sun,sun,sat,tue
FFMC,86.2,90.6,90.6,91.7,89.3,92.3,92.3,91.5,91.0,92.5,...,91.0,91.0,91.0,91.0,81.6,81.6,81.6,81.6,94.4,79.5
DMC,26.2,35.4,43.7,33.3,51.3,85.3,88.9,145.4,129.5,88.0,...,166.9,166.9,166.9,166.9,56.7,56.7,56.7,56.7,146.0,3.0
DC,94.3,669.1,686.9,77.5,102.2,488.0,495.6,608.2,692.6,698.6,...,752.6,752.6,752.6,752.6,665.6,665.6,665.6,665.6,614.7,106.7
ISI,5.1,6.7,6.7,9.0,9.6,14.7,8.5,10.7,7.0,7.1,...,7.1,7.1,7.1,7.1,1.9,1.9,1.9,1.9,11.3,1.1
temp,8.2,18.0,14.6,8.3,11.4,22.2,24.1,8.0,13.1,22.8,...,25.9,25.9,21.1,18.2,27.8,27.8,21.9,21.2,25.6,11.8
RH,51,33,33,97,99,29,27,86,63,40,...,41,41,71,62,35,32,71,70,42,31


In [21]:
melted_df = pd.melt(df, id_vars=['month'],
                    value_vars=['temp', 'wind', 'area', 'RH', 'rain'],
                    var_name='Metric', value_name='value')
melted_df.head()

Unnamed: 0,month,Metric,value
0,mar,temp,8.2
1,oct,temp,18.0
2,oct,temp,14.6
3,mar,temp,8.3
4,mar,temp,11.4


In [23]:
wide_df = melted_df.pivot_table(index='month', columns='Metric', values='value', aggfunc='mean')
wide_df = wide_df.reset_index()
wide_df.head()

Metric,month,RH,area,rain,temp,wind
0,apr,46.888889,8.891111,0.0,12.044444,4.666667
1,aug,45.48913,12.489076,0.058696,21.631522,4.086413
2,dec,38.444444,13.33,0.0,4.522222,7.644444
3,feb,55.7,6.275,0.0,9.635,3.755
4,jan,89.0,0.0,0.0,5.25,2.0
