## Import Libraries

In [19]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error
from xgboost import XGBRegressor
import joblib

## Read File

In [20]:
df = pd.read_csv('Model Flood.csv')
target = 'Flood'

## Encode the disaster

In [21]:
label_encoder = LabelEncoder()
df['Disaster'] = label_encoder.fit_transform(df['Disaster'])

## Read the Encoded Categorical Values and aplly to dataset

In [22]:
mapping_district = {}
with open ('../../Integration/District_Mapped.txt','r+') as district_file:
    for line in district_file:
        key,value = line.strip().split(':')
        mapping_district[key.strip()] = int(value.strip())
        
mapping_location = {}
with open ('../../Integration/Location_Mapped.txt','r+') as district_file:
    for line in district_file:
        key,value = line.strip().split(':')
        mapping_location[key.strip()] = int(value.strip())
df['District'] = df['District'].map(mapping_district)
df['Location'] = df['Location'].map(mapping_location)

## Model the Prediction

In [23]:
X = df.drop(columns=target)
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21)

random_forest_model = RandomForestRegressor(n_estimators=500,max_depth=12)
random_forest_model.fit(X_train,y_train)

xgb_model = XGBRegressor(n_estimators=500,max_depth=6,learning_rate=0.1)
xgb_model.fit(X_train,y_train)

random_forest_predictions = random_forest_model.predict(X_test)
xgb_predictions = xgb_model.predict(X_test)

random_forest_predictions = np.clip(random_forest_predictions,0.03,0.99)
xgb_predictions = np.clip(xgb_predictions,0.03,0.99)


In [24]:
rfr_mse = mean_squared_error(y_test, random_forest_predictions)
print(f'Mean Squared Error rfr: {rfr_mse}')

xgb_mse = mean_squared_error(y_test,xgb_predictions)
print("Mean Squared Error xgb:", xgb_mse)

rfr_mae = mean_absolute_error(y_test, random_forest_predictions)
print(f'Mean Absolute Error rfr: {rfr_mae}')

xgb_mae = mean_absolute_error(y_test,xgb_predictions)
print("Mean Absolute Error xgb:", xgb_mae)

Mean Squared Error rfr: 0.16980576708018902
Mean Squared Error xgb: 0.17694292022368152
Mean Absolute Error rfr: 0.3523138934454112
Mean Absolute Error xgb: 0.3474152840202047


## Export the model as pkl file

In [25]:
ref_columns = list(X.columns)
joblib.dump(value=[random_forest_model,ref_columns,target],filename='Flood_RFRModel.pkl')
joblib.dump(value=[xgb_model,ref_columns,target],filename='Flood_XGBModel.pkl')


['Flood_XGBModel.pkl']