In [1]:
import pandas as pd

df = pd.read_csv('forestfires.csv')

df.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


In [2]:
# --- a. Create Data Subsets ---
# Classify area into NotAffected (0), PartiallyAffected (0 < area <= 10), MostlyAffected (> 10)
df['area_class'] = pd.cut(df['area'], 
                         bins=[-0.1, 0, 10, float('inf')], 
                         labels=['NotAffected', 'PartiallyAffected', 'MostlyAffected'])

# Create subsets
df_not = df[df['area_class'] == 'NotAffected']
df_partial = df[df['area_class'] == 'PartiallyAffected']
df_mostly = df[df['area_class'] == 'MostlyAffected']

In [6]:
df_partial.tail()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,area_class
503,2,4,aug,wed,94.5,139.4,689.1,20.0,29.2,30,4.9,0.0,1.95,PartiallyAffected
505,1,2,aug,thu,91.0,163.2,744.4,10.1,26.7,35,1.8,0.0,5.8,PartiallyAffected
509,5,4,aug,fri,91.0,166.9,752.6,7.1,21.1,71,7.6,1.4,2.17,PartiallyAffected
510,6,5,aug,fri,91.0,166.9,752.6,7.1,18.2,62,5.4,0.0,0.43,PartiallyAffected
512,4,3,aug,sun,81.6,56.7,665.6,1.9,27.8,32,2.7,0.0,6.44,PartiallyAffected


In [11]:
# --- b. Merge Two Subsets ---
df_merged = pd.concat([df_not, df_partial])
# df_merged.to_csv("merged_subset.csv", index=False)
df_merged.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,area_class
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0,NotAffected
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0,NotAffected
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0,NotAffected
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0,NotAffected
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0,NotAffected


In [13]:
# --- c. Sort Data ---
df_sorted = df.sort_values(by=['temp', 'wind', 'area'])
# df_sorted.to_csv("sorted_data.csv", index=False)
df_sorted.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,area_class
280,4,6,dec,fri,84.7,26.7,352.6,4.1,2.2,59,4.9,0.0,9.27,PartiallyAffected
282,6,3,feb,sun,84.9,27.5,353.5,3.4,4.2,51,4.0,0.0,0.0,NotAffected
465,2,2,feb,sat,79.5,3.6,15.3,1.8,4.6,59,0.9,0.0,6.84,PartiallyAffected
463,6,5,feb,tue,75.1,4.4,16.2,1.9,4.6,82,6.3,0.0,5.39,PartiallyAffected
279,4,4,dec,mon,85.4,25.4,349.7,2.6,4.6,21,8.5,0.0,9.77,PartiallyAffected


In [16]:
# --- d. Transpose Data ---
df_transposed = df.transpose()
# df_transposed.to_csv("transposed_data.csv")
df_transposed.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,507,508,509,510,511,512,513,514,515,516
X,7,7,7,8,8,8,8,8,8,7,...,2,1,5,6,8,4,2,7,1,6
Y,5,4,4,6,6,6,6,6,6,5,...,4,2,4,5,6,3,4,4,4,3
month,mar,oct,oct,mar,mar,aug,aug,aug,sep,sep,...,aug,aug,aug,aug,aug,aug,aug,aug,aug,nov
day,fri,tue,sat,fri,sun,sun,mon,mon,tue,sat,...,fri,fri,fri,fri,sun,sun,sun,sun,sat,tue
FFMC,86.2,90.6,90.6,91.7,89.3,92.3,92.3,91.5,91.0,92.5,...,91.0,91.0,91.0,91.0,81.6,81.6,81.6,81.6,94.4,79.5
DMC,26.2,35.4,43.7,33.3,51.3,85.3,88.9,145.4,129.5,88.0,...,166.9,166.9,166.9,166.9,56.7,56.7,56.7,56.7,146.0,3.0
DC,94.3,669.1,686.9,77.5,102.2,488.0,495.6,608.2,692.6,698.6,...,752.6,752.6,752.6,752.6,665.6,665.6,665.6,665.6,614.7,106.7
ISI,5.1,6.7,6.7,9.0,9.6,14.7,8.5,10.7,7.0,7.1,...,7.1,7.1,7.1,7.1,1.9,1.9,1.9,1.9,11.3,1.1
temp,8.2,18.0,14.6,8.3,11.4,22.2,24.1,8.0,13.1,22.8,...,25.9,25.9,21.1,18.2,27.8,27.8,21.9,21.2,25.6,11.8
RH,51,33,33,97,99,29,27,86,63,40,...,41,41,71,62,35,32,71,70,42,31


In [21]:
# e. Melting data to long format
melted_df = pd.melt(
    df,
    id_vars=['month', 'day', 'area_class'],
    value_vars=['temp', 'RH', 'wind', 'rain', 'area'],
    var_name='Feature',
    value_name='Value'
)

melted_df.tail()

Unnamed: 0,month,day,area_class,Feature,Value
2580,aug,sun,PartiallyAffected,area,6.44
2581,aug,sun,MostlyAffected,area,54.29
2582,aug,sun,MostlyAffected,area,11.16
2583,aug,sat,NotAffected,area,0.0
2584,nov,tue,NotAffected,area,0.0


In [24]:
# f. Casting data to wide format
# Objective: Pivot melted data back into original shape
wide_df = melted_df.pivot_table(
    index=['month', 'day', 'area_class'],
    columns='Feature',
    values='Value',
    aggfunc='mean',
    observed=False
).reset_index()
wide_df.head()

Feature,month,day,area_class,RH,area,rain,temp,wind
0,apr,fri,NotAffected,20.0,0.0,0.0,16.7,3.1
1,apr,mon,PartiallyAffected,64.0,3.35,0.0,10.9,3.1
2,apr,sat,NotAffected,44.0,0.0,0.0,9.3,4.5
3,apr,sun,NotAffected,51.0,0.0,0.0,15.5,3.8
4,apr,sun,MostlyAffected,33.0,61.13,0.0,13.7,9.4
