In [3]:
# Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, root_mean_squared_error, mean_absolute_error, r2_score

In [4]:
# Load the updated dataset
df = pd.read_csv("updated_train.csv")

# Define the target column
target_column = 'Loan Sanction Amount (USD)'

# Separate features and target
X = df.drop(columns=[target_column])
y = df[target_column]

# Standardize the feature values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Initialize 5-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Store evaluation metrics
results = []

# Run cross-validation
for fold, (train_index, val_index) in enumerate(kf.split(X_scaled), 1):
    X_train, X_val = X_scaled[train_index], X_scaled[val_index]
    y_train, y_val = y.iloc[train_index], y.iloc[val_index]

    # Train the model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Predict on validation set
    y_pred = model.predict(X_val)

    # Calculate metrics
    mse = mean_squared_error(y_val, y_pred)
    rmse = root_mean_squared_error(y_val, y_pred)
    mae = mean_absolute_error(y_val, y_pred)
    r2 = r2_score(y_val, y_pred)
    adj_r2 = 1 - (1 - r2) * (len(y_val) - 1) / (len(y_val) - X_val.shape[1] - 1)

    results.append({
        "Fold": fold,
        "MAE": mae,
        "MSE": mse,
        "RMSE": rmse,
        "R2": r2,
        "Adjusted R2": adj_r2
    })

cv_results_df = pd.DataFrame(results)

average_row = {
    "Fold": "Average",
    "MAE": cv_results_df["MAE"].mean(),
    "MSE": cv_results_df["MSE"].mean(),
    "RMSE": cv_results_df["RMSE"].mean(),
    "R2": cv_results_df["R2"].mean(),
    "Adjusted R2": cv_results_df["Adjusted R2"].mean()
}

cv_results_df = pd.concat([cv_results_df, pd.DataFrame([average_row])], ignore_index=True)

# Display results

# Display results
print(cv_results_df)

      Fold           MAE           MSE          RMSE        R2  Adjusted R2
0        1  21539.365959  1.015307e+09  31863.886748  0.552684     0.551018
1        2  21627.319510  9.686148e+08  31122.576996  0.573056     0.571466
2        3  21507.681453  1.181087e+09  34366.951680  0.488430     0.486525
3        4  21511.178652  9.179253e+08  30297.281442  0.620162     0.618748
4        5  21709.723065  9.933505e+08  31517.464130  0.579387     0.577821
5  Average  21579.053728  1.015257e+09  31833.632199  0.562744     0.561116
