In [88]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import pandas as pd
from sklearn.model_selection import train_test_split



In [89]:
fire=pd.read_csv("forestfires.csv")


fire

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.00
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.00
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.00
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.00
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,4,3,aug,sun,81.6,56.7,665.6,1.9,27.8,32,2.7,0.0,6.44
513,2,4,aug,sun,81.6,56.7,665.6,1.9,21.9,71,5.8,0.0,54.29
514,7,4,aug,sun,81.6,56.7,665.6,1.9,21.2,70,6.7,0.0,11.16
515,1,4,aug,sat,94.4,146.0,614.7,11.3,25.6,42,4.0,0.0,0.00


In [90]:
fire.describe()

Unnamed: 0,X,Y,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
count,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0,517.0
mean,4.669246,4.299807,90.644681,110.87234,547.940039,9.021663,18.889168,44.288201,4.017602,0.021663,12.847292
std,2.313778,1.2299,5.520111,64.046482,248.066192,4.559477,5.806625,16.317469,1.791653,0.295959,63.655818
min,1.0,2.0,18.7,1.1,7.9,0.0,2.2,15.0,0.4,0.0,0.0
25%,3.0,4.0,90.2,68.6,437.7,6.5,15.5,33.0,2.7,0.0,0.0
50%,4.0,4.0,91.6,108.3,664.2,8.4,19.3,42.0,4.0,0.0,0.52
75%,7.0,5.0,92.9,142.4,713.9,10.8,22.8,53.0,4.9,0.0,6.57
max,9.0,9.0,96.2,291.3,860.6,56.1,33.3,100.0,9.4,6.4,1090.84


In [91]:
fire.isnull().sum()


X        0
Y        0
month    0
day      0
FFMC     0
DMC      0
DC       0
ISI      0
temp     0
RH       0
wind     0
rain     0
area     0
dtype: int64

In [92]:
feature=fire.iloc[:,0:11]
target=fire.iloc[:,-1]

In [93]:
feature


Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8
...,...,...,...,...,...,...,...,...,...,...,...
512,4,3,aug,sun,81.6,56.7,665.6,1.9,27.8,32,2.7
513,2,4,aug,sun,81.6,56.7,665.6,1.9,21.9,71,5.8
514,7,4,aug,sun,81.6,56.7,665.6,1.9,21.2,70,6.7
515,1,4,aug,sat,94.4,146.0,614.7,11.3,25.6,42,4.0


In [94]:
target

0       0.00
1       0.00
2       0.00
3       0.00
4       0.00
       ...  
512     6.44
513    54.29
514    11.16
515     0.00
516     0.00
Name: area, Length: 517, dtype: float64

In [95]:
x_train,x_test,y_train,y_test=train_test_split(feature,target,test_size=0.2,random_state=214)

In [96]:
categorical_cols = [cname for cname in x_train.columns if
                    x_train[cname].nunique() < 12 and 
                    x_train[cname].dtype == "object"]

categorical_cols

['month', 'day']

In [97]:
numerical_cols = [cname for cname in x_train.columns if 
                x_train[cname].dtype in ['int64', 'float64']]

numerical_cols

['X', 'Y', 'FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH', 'wind']

In [98]:
my_col=categorical_cols+numerical_cols
X_train=x_train[my_col].copy()
X_test = x_test[my_col].copy()

In [99]:
numerical_transformer=Pipeline(steps=[('imputer',SimpleImputer(strategy='constant')),
                                     ('scaling',StandardScaler()),
                                     ('onehot', OneHotEncoder(handle_unknown='ignore'))])
numerical_transformer

In [100]:
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

categorical_transformer

In [101]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)])

In [103]:
model=RandomForestRegressor(max_depth=9, max_features="log2", max_leaf_nodes=9, n_estimators=25)
clf=Pipeline(steps=[('preprocessor', preprocessor),('model', model)])
clf.fit(X_train,y_train)
preds = clf.predict(X_test)

print('MAE:', mean_absolute_error(y_test, preds))

MAE: 14.326319865115067
