In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error,r2_score
from xgboost import XGBRegressor
import tensorflow as tf
import joblib

In [10]:
df = pd.read_csv('Model Flood.csv')

In [11]:
label_encoder = LabelEncoder()
df['Disaster'] = label_encoder.fit_transform(df['Disaster'])
mapping_disaster = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

df['District'] = label_encoder.fit_transform(df['District'])
mapping_district = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))


df['Location'] = label_encoder.fit_transform(df['Location'])
mapping_location = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))


In [12]:
with open ('District_Mapped.txt','w+') as file:
    for key,pair in mapping_district.items():
        file.write(str(key)+' : '+str(pair))
        file.write('\n')
with open ('Location_Mapped.txt','w+') as file:
    for key,pair in mapping_location.items():
        file.write(str(key)+' : '+str(pair))
        file.write('\n')

In [13]:
X = df.drop(columns=['Flood'])
y = df['Flood']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21)

random_forest_model = RandomForestRegressor(n_estimators=500,max_depth=12)
random_forest_model.fit(X_train,y_train)

xgb_model = XGBRegressor(n_estimators=500,max_depth=6,learning_rate=0.1)
xgb_model.fit(X_train,y_train)

random_forest_predictions = random_forest_model.predict(X_test)
xgb_pred = xgb_model.predict(X_test)

rfr_mse = mean_squared_error(y_test, random_forest_predictions)
print(f'Mean Squared Error rfr: {rfr_mse}')

xgb_mse = mean_squared_error(y_test,xgb_pred)
print("Mean Squared Error xgb:", xgb_mse)

joblib.dump(random_forest_model,'RFRModel.pkl')
joblib.dump(xgb_model,'XGBModel.pkl')


Mean Squared Error rfr: 0.15858323658764303
Mean Squared Error xgb: 0.16249767628858552


['XGBModel.pkl']

In [14]:
rfr_model = joblib.load('RFRModel.pkl')
xgb_model = joblib.load('XGBModel.pkl')

In [15]:
def predict_disaster(disaster,location,location1,location2,location3,location4,district,month,day,rainfall):
    locations = [location,location1,location2,location3,location4]
    for location in locations:
        location_code = mapping_location.get(location)
        if location_code is None:
            continue
        else:
            break
    else:
        location_code = max(mapping_location.values())+1
        
    disaster_code = mapping_disaster.get(disaster)
    district_code = mapping_district.get(district)
    
    user_data = pd.DataFrame({
        'Disaster' : disaster_code,
        'Location' : location_code,
        'District' : district_code,
        'Month' : int(month),
        'Day' : int(day),
        'Rainfall(mm)': float(rainfall)
    },index=[0])
    
    rfr_prediction = np.round(random_forest_model.predict(user_data),4)
    xgb_prediction = np.round(xgb_model.predict(user_data),4)
    
    return rfr_prediction,xgb_prediction

prediction1,prediction2 = predict_disaster('Flood','ds','gfds','Pulungasmulla','ds3','543','Ampara',12,20,553.1)
print('RandomForest prediction',prediction1)
print('XGBRegressor prediction',prediction2)


0 2738 0 12 20 553.1
RandomForest prediction [0.7376]
XGBRegressor prediction [0.7509]


In [16]:
# plot_predicted_vs_true(y_test,random_forest_predictions)
# regression_scatter(y_test,random_forest_predictions)
# plot_residuals(y_test,random_forest_predictions)