In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor

In [2]:
df = pd.read_csv('Model Cyclone.csv')

In [3]:
label_encoder = LabelEncoder()
df['Disaster'] = label_encoder.fit_transform(df['Disaster'])
mapping_disaster = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

df['District'] = label_encoder.fit_transform(df['District'])
mapping_district = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))


df['Location'] = label_encoder.fit_transform(df['Location'])
mapping_location = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))


In [4]:
X = df.drop(columns=['Cyclone'])
y = df['Cyclone']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21)

random_forest_model = RandomForestRegressor(n_estimators=500)
random_forest_model.fit(X_train,y_train)

xgb_model = XGBRegressor(n_estimators=500)
xgb_model.fit(X_train,y_train)

random_forest_predictions = random_forest_model.predict(X_test)
xgb_pred = xgb_model.predict(X_test)

rfr_mse = mean_squared_error(y_test, random_forest_predictions)
print(f'Mean Squared Error rfr: {rfr_mse}')

xgb_mse = mean_squared_error(y_test,xgb_pred)
print("Mean Squared Error xgb:", xgb_mse)


Mean Squared Error rfr: 0.2384908260727471
Mean Squared Error xgb: 0.27823011352618215


In [5]:
new_usr_data = pd.DataFrame({
    'Disaster' : 'Cyclone',
    'Location' : "Thimbirigasyaya",
    'District' : 'Colombo'
}, index=[0])


In [6]:
new_disaster = new_usr_data['Disaster'].iloc[0]
new_location = new_usr_data['Location'].iloc[0]
new_district = new_usr_data['District'].iloc[0]

for location in mapping_location.keys():
    if new_location in location:
        mapping_location_value = mapping_location.get(location)
        print(f"Mapping value for location '{new_location}': {mapping_location_value}")
        break
else:
    print(f"Location '{new_location}' not found in unique locations.")
    
mapping_disaster_value = mapping_disaster.get(new_disaster)
print(f"Mapping value for Disaster '{new_disaster}': {mapping_disaster_value}")

mapping_district_value = mapping_district.get(new_district)

print(f"Mapping value for District '{new_district}': {mapping_district_value}")

Mapping value for location 'Thimbirigasyaya': 2312
Mapping value for Disaster 'Cyclone': 0
Mapping value for District 'Colombo': 4


In [7]:
new_data = pd.DataFrame({
    'Disaster' : mapping_disaster_value,
    'Location' : mapping_location_value,
    'District' : mapping_district_value,
    'Month' : 10,
    'Day' : 20,
    'Wind Speed(mph)' : 9.8
}, index=[0])


In [8]:
rfr_prediction = random_forest_model.predict(new_data)
xgb_prediction = xgb_model.predict(new_data)

print(f'Prediction for the new data using rfr: {rfr_prediction[0]:.4f}')
print(f'Prediction for the new data using xgb: {xgb_prediction[0]:.4f}')

Prediction for the new data using rfr: 0.8000
Prediction for the new data using xgb: 0.7722
