In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor

In [3]:
df = pd.read_csv('Model Landslide.csv')
model_df = df

In [4]:
label_encoder = LabelEncoder()
model_df['Disaster'] = label_encoder.fit_transform(df['Disaster'])
mapping_disaster = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

model_df['District'] = label_encoder.fit_transform(df['District'])
mapping_district = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

model_df['Location'] = label_encoder.fit_transform(df['Location'])
mapping_location = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

In [5]:
X = df.drop(columns=['Landslide'])
y = df['Landslide']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21)

random_forest_model = RandomForestRegressor(n_estimators=600)
random_forest_model.fit(X_train,y_train)

xgb_reg = XGBRegressor(n_estimators=500)
xgb_reg.fit(X_train,y_train)

random_forest_predictions = random_forest_model.predict(X_test)
xgb_pred = xgb_reg.predict(X_test)

rfr_mse = mean_squared_error(y_test, random_forest_predictions)
print(f'Mean Squared Error rfr: {rfr_mse}')

xgb_mse = mean_squared_error(y_test,xgb_pred)
print("Mean Squared Error xgb:", xgb_mse)

Mean Squared Error rfr: 0.22362103132192193
Mean Squared Error xgb: 0.2874785700775354


In [6]:
location_row = df[df['Location'] == 'Balangoda']
new_usr_data = pd.DataFrame({
    'Disaster' : 'Landslide',
    'Location' : 'Nochchiyagama',
    'District' : 'Anuradhapura'
}, index=[0])


In [7]:
new_disaster = new_usr_data['Disaster'].iloc[0]
new_location = new_usr_data['Location'].iloc[0]
new_district = new_usr_data['District'].iloc[0]

for location in mapping_location.keys():
    if new_location in location:
        mapping_location_value = mapping_location.get(location)
        print(f"Mapping value for location '{new_location}': {mapping_location_value}")
        break
else:
    print(f"Location '{new_location}' not found in unique locations.")
    
mapping_disaster_value = mapping_disaster.get(new_disaster)
print(f"Mapping value for Disaster '{new_disaster}': {mapping_disaster_value}")

mapping_district_value = mapping_district.get(new_district)
print(f"Mapping value for District '{new_district}': {mapping_district_value}")

Mapping value for location 'Nochchiyagama': 1004
Mapping value for Disaster 'Landslide': 0
Mapping value for District 'Anuradhapura': 0


In [16]:
new_data = pd.DataFrame({
    'Disaster' : mapping_disaster_value,
    'Location' : mapping_location_value,
    'District' : mapping_district_value,
    'Month' : 11,
    'Day' : 20,
    'Rainfall(mm)' : 237.5
}, index=[0])


In [17]:
rfr_prediction = random_forest_model.predict(new_data)
xgb_prediction = xgb_reg.predict(new_data)

print(f'Prediction for the new data using rfr: {rfr_prediction[0]:.4f}')
print(f'Prediction for the new data using xgb: {xgb_prediction[0]:.4f}')


Prediction for the new data using rfr: 0.8643
Prediction for the new data using xgb: 0.5095
