## Import Libraries

In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor
import joblib

## Read File

In [None]:
df = pd.read_csv('Model Flood.csv')
target = 'Flood'

## Encode the Categorical Values

In [8]:
label_encoder = LabelEncoder()
df['Disaster'] = label_encoder.fit_transform(df['Disaster'])
mapping_disaster = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

df['District'] = label_encoder.fit_transform(df['District'])
mapping_district = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))


df['Location'] = label_encoder.fit_transform(df['Location'])
mapping_location = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))


## Export the encoded values to txt file

In [9]:
with open ('Flood_District_Mapped.txt','w+') as file:
    for key,pair in mapping_district.items():
        file.write(str(key)+' : '+str(pair))
        file.write('\n')
with open ('Flood_Location_Mapped.txt','w+') as file:
    for key,pair in mapping_location.items():
        file.write(str(key)+' : '+str(pair))
        file.write('\n')

## Model the Prediction

In [13]:
X = df.drop(columns=target)
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21)

random_forest_model = RandomForestRegressor(n_estimators=500,max_depth=12)
random_forest_model.fit(X_train,y_train)

xgb_model = XGBRegressor(n_estimators=500,max_depth=6,learning_rate=0.1)
xgb_model.fit(X_train,y_train)

random_forest_predictions = random_forest_model.predict(X_test)
xgb_predictions = xgb_model.predict(X_test)

random_forest_predictions = np.clip(random_forest_predictions,0.03,0.99)
xgb_predictions = np.clip(xgb_predictions,0.03,0.99)

rfr_mse = mean_squared_error(y_test, random_forest_predictions)
print(f'Mean Squared Error rfr: {rfr_mse}')

xgb_mse = mean_squared_error(y_test,xgb_predictions)
print("Mean Squared Error xgb:", xgb_mse)


Mean Squared Error rfr: 0.16982295565060382
Mean Squared Error xgb: 0.17694292022368152


## Export the model as pkl file

In [None]:
ref_columns = list(X.columns)
joblib.dump(value=[random_forest_model,ref_columns,target],filename='Flood_RFRModel.pkl')
joblib.dump(value=[xgb_model,ref_columns,target],filename='Flood_XGBModel.pkl')


['Flood_XGBModel.pkl']