In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

df = pd.read_csv("../data/processed_ctc.csv")

X = df.drop("Expected_CTC", axis=1)
y = df["Expected_CTC"]

lr = joblib.load("../models/linear_regression_model.joblib")
xgb_model = joblib.load("../models/xgb_model.joblib")

y_pred_lr = lr.predict(X)
y_pred_xgb = xgb_model.predict(X)

results = {
    "Model": ["Linear Regression", "XGBoost"],
    "R2": [
        r2_score(y, y_pred_lr),
        r2_score(y, y_pred_xgb)
    ],
    "MSE": [
        mean_squared_error(y, y_pred_lr),
        mean_squared_error(y, y_pred_xgb)
    ],
    "MAE": [
        mean_absolute_error(y, y_pred_lr),
        mean_absolute_error(y, y_pred_xgb)
    ]
}

results_df = pd.DataFrame(results)
print(results_df)

results_df.set_index("Model")[["MSE", "MAE"]].plot(kind="bar", figsize=(8,5))
plt.title("Model Performance Comparison")
plt.ylabel("Error Value")
plt.show()
