In [None]:
# slope_prediction.py (Save this file in your VS Code project folder)

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
import geopandas as gpd
from shapely.geometry import Point
import joblib

def calculate_distance_to_fault(point, fault_shapefile):
    """Calculates the distance from a point to the nearest fault."""
    try:
        faults = gpd.read_file(fault_shapefile)
        return faults.distance(point).min()
    except Exception as e:
        print(f"Error calculating distance to fault: {e}")
        return None

try:
    data = pd.read_csv('slope_stability_data.csv')
except FileNotFoundError:
    print("Error: slope_stability_data.csv not found. Please place it in the project folder.")
    exit()

numerical_cols = data.select_dtypes(include=np.number).columns
imputer = SimpleImputer(strategy='mean')
data[numerical_cols] = imputer.fit_transform(data[numerical_cols])

data = pd.get_dummies(data, drop_first=True)

if 'rainfall' in data.columns and 'groundwater_pressure' in data.columns:
    data['rainfall_groundwater_ratio'] = data['rainfall'] / (data['groundwater_pressure'] + 1e-9)

scaler = StandardScaler()
scaled_features = scaler.fit_transform(data.drop('slope_failure', axis=1))
scaled_features_df = pd.DataFrame(scaled_features, columns=data.drop('slope_failure', axis=1).columns)

final_data = pd.concat([scaled_features_df, data['slope_failure']], axis=1)

if 'longitude' in final_data.columns and 'latitude' in final_data.columns:
    geometry = [Point(xy) for xy in zip(final_data['longitude'], final_data['latitude'])]
    gdf = gpd.GeoDataFrame(final_data, geometry=geometry)

    # Example: Calculate distance to fault (replace 'faults.shp' with your fault shapefile)
    try:
        gdf['distance_to_fault'] = gdf.geometry.apply(lambda point: calculate_distance_to_fault(point, 'faults.shp'))
        final_data = pd.concat([gdf.drop('geometry', axis=1), gdf['distance_to_fault']], axis=1) #integrate the new column into the final dataframe.

    except FileNotFoundError:
        print("Warning: faults.shp not found. Distance to fault calculation skipped.")
    except Exception as e:
        print(f"An error occurred during spatial calculations: {e}")

X = final_data.drop('slope_failure', axis=1)
y = final_data['slope_failure']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

predictions = model.predict(X_test)

print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))

feature_importance = pd.Series(model.feature_importances_, index=X.columns)
feature_importance.nlargest(10).plot(kind='barh')
plt.show()

sns.heatmap(confusion_matrix(y_test, predictions), annot=True, fmt='d')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

joblib.dump(model, 'slope_failure_model.pkl')

print("Model saved as slope_failure_model.pkl")