In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv('forestfires.csv')
df

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.00
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.00
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.00
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.00
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,4,3,aug,sun,81.6,56.7,665.6,1.9,27.8,32,2.7,0.0,6.44
513,2,4,aug,sun,81.6,56.7,665.6,1.9,21.9,71,5.8,0.0,54.29
514,7,4,aug,sun,81.6,56.7,665.6,1.9,21.2,70,6.7,0.0,11.16
515,1,4,aug,sat,94.4,146.0,614.7,11.3,25.6,42,4.0,0.0,0.00


In [3]:
def classify_area(area):
  if area == 0:
    return 'Not Affected'
  elif area <= 100:
    return 'Partially Affected'
  else:
    return 'Mostly Affected'

df['Impact'] = df['area'].apply(classify_area)
df

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,Impact
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.00,Not Affected
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.00,Not Affected
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.00,Not Affected
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.00,Not Affected
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.00,Not Affected
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,4,3,aug,sun,81.6,56.7,665.6,1.9,27.8,32,2.7,0.0,6.44,Partially Affected
513,2,4,aug,sun,81.6,56.7,665.6,1.9,21.9,71,5.8,0.0,54.29,Partially Affected
514,7,4,aug,sun,81.6,56.7,665.6,1.9,21.2,70,6.7,0.0,11.16,Partially Affected
515,1,4,aug,sat,94.4,146.0,614.7,11.3,25.6,42,4.0,0.0,0.00,Not Affected


In [6]:
not_affected = df[df['Impact'] == 'Not Affected']
partially_affected = df[df['Impact'] == 'Partially Affected']
mostly_affected = df[df['Impact'] == 'Mostly Affected']

print(not_affected.shape)
print(partially_affected.shape)
print(mostly_affected.shape)

(247, 14)
(259, 14)
(11, 14)


In [7]:
merged = pd.concat([not_affected, partially_affected])
merged.shape

(506, 14)

In [8]:
sorted = df.sort_values(by="temp", ascending=False)
print(sorted)

     X  Y month  day  FFMC    DMC     DC   ISI  temp  RH  wind  rain   area  \
498  6  5   aug  tue  96.1  181.1  671.2  14.3  33.3  26   2.7   0.0  40.54   
484  2  5   aug  sun  94.9  130.3  587.1  14.1  33.1  25   4.0   0.0  26.43   
496  4  5   aug  mon  96.2  175.5  661.8  16.8  32.6  26   3.1   0.0   2.77   
491  4  4   aug  thu  95.8  152.0  624.1  13.8  32.4  21   4.5   0.0   0.00   
492  1  3   aug  fri  95.9  158.0  633.6  11.3  32.4  27   2.2   0.0   0.00   
..  .. ..   ...  ...   ...    ...    ...   ...   ...  ..   ...   ...    ...   
278  4  4   dec  mon  85.4   25.4  349.7   2.6   4.6  21   8.5   0.0  22.03   
465  2  2   feb  sat  79.5    3.6   15.3   1.8   4.6  59   0.9   0.0   6.84   
463  6  5   feb  tue  75.1    4.4   16.2   1.9   4.6  82   6.3   0.0   5.39   
282  6  3   feb  sun  84.9   27.5  353.5   3.4   4.2  51   4.0   0.0   0.00   
280  4  6   dec  fri  84.7   26.7  352.6   4.1   2.2  59   4.9   0.0   9.27   

                 Impact  
498  Partially Affected  

In [10]:
transposed = df.T
transposed.shape

(14, 517)

In [12]:
print(transposed.iloc[:3, :4])

         0    1    2    3
X        7    7    7    8
Y        5    4    4    6
month  mar  oct  oct  mar


In [13]:
melted = pd.melt(df, id_vars=["month","day"], value_vars=["wind","rain","area","temp"], var_name="Weather", value_name="Value")

In [14]:
melted

Unnamed: 0,month,day,Weather,Value
0,mar,fri,wind,6.7
1,oct,tue,wind,0.9
2,oct,sat,wind,1.3
3,mar,fri,wind,4.0
4,mar,sun,wind,1.8
...,...,...,...,...
2063,aug,sun,temp,27.8
2064,aug,sun,temp,21.9
2065,aug,sun,temp,21.2
2066,aug,sat,temp,25.6


In [17]:
cast = melted.pivot_table(index=["month","day"], columns="Weather", values="Value", aggfunc="sum")

In [18]:
print(cast)

Weather       area  rain   temp   wind
month day                             
apr   fri     0.00   0.0   16.7    3.1
      mon     3.35   0.0   10.9    3.1
      sat     0.00   0.0    9.3    4.5
      sun    61.13   0.0   44.7   17.0
      thu    15.54   0.0   11.6   11.6
...            ...   ...    ...    ...
sep   sat  1545.11   0.0  538.1   86.5
      sun   378.29   0.0  551.8  106.8
      thu   112.48   0.0  428.2   70.5
      tue   500.69   0.0  355.7   65.2
      wed   182.40   0.0  285.7   47.3

[64 rows x 4 columns]
