In [7]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

from sklearn.linear_model import LinearRegression, Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor

# === Reproducibility
SEED = 42
np.random.seed(SEED)

# === File paths
csv_path = r"C:\Users\vishn\OneDrive\Documents\Machine Learning\Vishnu_phd.csv"
save_dir = r"C:\Users\vishn\OneDrive\Documents\Machine Learning\Traditional model"
os.makedirs(save_dir, exist_ok=True)

# === Load data
df = pd.read_csv(csv_path)
X = df.drop(columns=["FoS", "SeismicFoS"])
y_fos = df["FoS"]
y_seis = df["SeismicFoS"]

# === Split data
X_train, X_test, y_fos_train, y_fos_test, y_seis_train, y_seis_test = train_test_split(
    X, y_fos, y_seis, test_size=0.2, random_state=SEED
)

# === Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# === Models
models = {
    "LinearRegression": LinearRegression(),
    "Ridge": Ridge(random_state=SEED),
    "DecisionTree": DecisionTreeRegressor(random_state=SEED),
    "RandomForest": RandomForestRegressor(random_state=SEED),
    "GradientBoosting": GradientBoostingRegressor(random_state=SEED),
    "SVR": SVR(),
    "KNN": KNeighborsRegressor()
}

# === Evaluation
def evaluate(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)
    return {
        "R2_train": r2_score(y_train, y_pred_train),
        "R2_test": r2_score(y_test, y_pred_test),
        "RMSE_train": mean_squared_error(y_train, y_pred_train, squared=False),
        "RMSE_test": mean_squared_error(y_test, y_pred_test, squared=False),
        "MAE_train": mean_absolute_error(y_train, y_pred_train),
        "MAE_test": mean_absolute_error(y_test, y_pred_test)
    }

# === Run for all models
fos_results = {name: evaluate(model, X_train_scaled, y_fos_train, X_test_scaled, y_fos_test) for name, model in models.items()}
seis_results = {name: evaluate(model, X_train_scaled, y_seis_train, X_test_scaled, y_seis_test) for name, model in models.items()}

fos_df = pd.DataFrame(fos_results).T
seis_df = pd.DataFrame(seis_results).T

# === Save results to CSV
fos_csv_path = os.path.join(save_dir, "model_results_FoS.csv")
seis_csv_path = os.path.join(save_dir, "model_results_SeismicFoS.csv")
fos_df.to_csv(fos_csv_path)
seis_df.to_csv(seis_csv_path)

# === Print results in notebook
print("\n📊 Model Results for FoS:\n")
display(fos_df)

print("\n📊 Model Results for SeismicFoS:\n")
display(seis_df)

# === Combined plot
metrics = ["R2", "RMSE", "MAE"]
targets = ["FoS", "SeismicFoS"]
dataframes = [fos_df, seis_df]

fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(24, 12))
fig.suptitle("Model Performance Comparison (Train vs Test)", fontsize=20)

for row_idx, (target, df) in enumerate(zip(targets, dataframes)):
    for col_idx, metric in enumerate(metrics):
        ax = axes[row_idx, col_idx]
        df[[f"{metric}_train", f"{metric}_test"]].plot(kind="bar", ax=ax)
        ax.set_title(f"{target} - {metric}", fontsize=14)
        ax.set_ylabel(metric)
        ax.set_xlabel("Model")
        ax.tick_params(axis='x', rotation=45)
        ax.legend(["Train", "Test"])

fig.tight_layout(rect=[0, 0, 1, 0.96])
save_path = os.path.join(save_dir, "Combined_Model_Comparison.png")
plt.savefig(save_path, dpi=400)
plt.close()

print(f"\n✅ All results and plots saved to:\n{save_dir}")





📊 Model Results for FoS:





Unnamed: 0,R2_train,R2_test,RMSE_train,RMSE_test,MAE_train,MAE_test
LinearRegression,0.78064,0.707134,0.5400447,0.688463,0.3878795,0.476359
Ridge,0.780635,0.707026,0.540051,0.688589,0.3876124,0.476241
DecisionTree,1.0,0.356961,1.579999e-17,1.020151,2.248553e-18,0.557374
RandomForest,0.97838,0.801437,0.1695423,0.566885,0.1014349,0.351328
GradientBoosting,0.98621,0.8259,0.1354027,0.530818,0.09681323,0.32453
SVR,0.901256,0.802768,0.3623322,0.564982,0.1804944,0.325871
KNN,0.786039,0.654413,0.5333575,0.747868,0.3477772,0.457091



📊 Model Results for SeismicFoS:



Unnamed: 0,R2_train,R2_test,RMSE_train,RMSE_test,MAE_train,MAE_test
LinearRegression,0.694833,0.64074,0.482452,0.562776,0.323165,0.367838
Ridge,0.694828,0.640672,0.482456,0.56283,0.322767,0.367545
DecisionTree,1.0,0.442,0.0,0.701372,0.0,0.400505
RandomForest,0.969911,0.735939,0.151491,0.482484,0.089847,0.298132
GradientBoosting,0.972445,0.845697,0.144972,0.368823,0.099696,0.25042
SVR,0.828186,0.699648,0.362005,0.514573,0.174381,0.28959
KNN,0.721048,0.542072,0.461265,0.635375,0.271982,0.371101



✅ All results and plots saved to:
C:\Users\vishn\OneDrive\Documents\Machine Learning\Traditional model
