In [1]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, GradientBoostingRegressor, RandomForestRegressor
from sklearn.metrics import classification_report, accuracy_score, mean_squared_error
import matplotlib.pyplot as plt

# -------------------------------
# Step 1: Load Bohar Flood dataset
# -------------------------------
file_path = r"F:\PythonL\Data\Bihar_Flood_Data.xlsx"
data = pd.read_excel(file_path)# replace with your file path

# Example categorical columns
categorical_cols = ['region']
for col in categorical_cols:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])

# -------------------------------
# Step 2: Split features and targets
# -------------------------------
X = data.drop(['flood_occurred', 'restoration_priority'], axis=1)
y_flood = data['flood_occurred']          # Classification
y_priority = data['restoration_priority'] # Regression / Ordinal

# Train-test split
X_train, X_test, y_train_flood, y_test_flood = train_test_split(
    X, y_flood, test_size=0.2, random_state=42
)
_, _, y_train_priority, y_test_priority = train_test_split(
    X, y_priority, test_size=0.2, random_state=42
)

# -------------------------------
# Step 3: Scale features
# -------------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# -------------------------------
# Step 4: Flood Prediction
# -------------------------------
# Gradient Boosting Classifier
gbc = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gbc.fit(X_train_scaled, y_train_flood)
y_pred_gbc = gbc.predict(X_test_scaled)

print("Gradient Boosting Classifier Accuracy:", accuracy_score(y_test_flood, y_pred_gbc))
print(classification_report(y_test_flood, y_pred_gbc))

# Random Forest Classifier
rfc = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
rfc.fit(X_train_scaled, y_train_flood)
y_pred_rfc = rfc.predict(X_test_scaled)

print("Random Forest Classifier Accuracy:", accuracy_score(y_test_flood, y_pred_rfc))
print(classification_report(y_test_flood, y_pred_rfc))

# -------------------------------
# Step 5: Restoration Priority Prediction
# -------------------------------
# Gradient Boosting Regressor
gbr = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gbr.fit(X_train_scaled, y_train_priority)
y_pred_gbr = gbr.predict(X_test_scaled)
print("Gradient Boosting Regressor RMSE:", np.sqrt(mean_squared_error(y_test_priority, y_pred_gbr)))

# Random Forest Regressor
rfr = RandomForestRegressor(n_estimators=100, max_depth=5, random_state=42)
rfr.fit(X_train_scaled, y_train_priority)
y_pred_rfr = rfr.predict(X_test_scaled)
print("Random Forest Regressor RMSE:", np.sqrt(mean_squared_error(y_test_priority, y_pred_rfr)))

# -------------------------------
# Step 6: Feature Importance
# -------------------------------
def plot_feature_importance(model, X, model_name):
    importance = model.feature_importances_
    features = X.columns
    sorted_idx = np.argsort(importance)
    
    plt.figure(figsize=(10,6))
    plt.barh(features[sorted_idx], importance[sorted_idx])
    plt.title(f'Feature Importance - {model_name}')
    plt.show()

plot_feature_importance(gbc, X, "Gradient Boosting Classifier")
plot_feature_importance(rfc, X, "Random Forest Classifier")
plot_feature_importance(gbr, X, "Gradient Boosting Regressor")
plot_feature_importance(rfr, X, "Random Forest Regressor")

ModuleNotFoundError: No module named 'pandas'