# Models testing

In [None]:
import matplotlib.pyplot as plt
import joblib
import os
import pandas as pd
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_error, r2_score, make_scorer, mean_absolute_percentage_error, median_absolute_error, PredictionErrorDisplay

testing = pd.read_csv("data/testing_df.csv")
testing.drop(columns=testing.columns[0], axis=1,  inplace=True)
testing = testing.loc[testing["price"]< 1000, :]
train_set, test_set = train_test_split(testing, test_size=0.2, random_state=874631)
X_train = train_set.drop(["price"], axis=1)
X_test = test_set.drop(["price"], axis=1)
y_train = train_set["price"]
y_test = test_set["price"]



In [None]:
testing.shape

In [None]:
testing.head()


## Multi Layer Perceptron Regression


In [None]:
best_pipe = joblib.load("pickle/MLPR_less1k.pkl")
pred = best_pipe.predict(X_test)
print(
    f"\nExplained variance score is {explained_variance_score(y_true=y_test, y_pred=pred)}",
    f"\nMean Absolute Error is {mean_absolute_error(y_true=y_test, y_pred=pred)}",
    f"\nMean Absolute Percentage error is {round(100 * mean_absolute_percentage_error(y_true=y_test, y_pred=pred), 2)}%",
    f"\nMedian Absolute Error is {median_absolute_error(y_true=y_test, y_pred=pred)}",
    f"\nMean Squared Error is {mean_squared_error(y_true=y_test, y_pred=pred)}",
    f"\nR^2 Error is {r2_score(y_true=y_test, y_pred=pred)}",
)
results = pd.DataFrame(data={"Pred": pred, "y_test": y_test})
results["Difference"] = abs(results["Pred"] - results["y_test"])

plt.figure(figsize=(10, 6))
plt.plot(best_pipe["Model"].loss_curve_, label='Loss Curve', color='blue')
plt.title('Loss Curve During Training')
plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.legend()
plt.grid()
plt.show()

plt.figure(figsize=(10, 6))
plt.scatter(y_test, pred, color='orange', label='Predictions')
plt.plot([y_test.min(), y_test.max()], [pred.min(), pred.max()], 'k--', lw=2, label='Perfect Prediction')
plt.title('True vs Predicted Values')
plt.xlabel('True Values')
plt.ylabel('Predicted Values')
plt.legend()
plt.grid()
plt.show()

display = PredictionErrorDisplay(y_true=y_test, y_pred=pred)
display.plot()
plt.show()


## Random Forest Regression

In [None]:
best_pipe = joblib.load("pickle/RFR.pkl")
pred = best_pipe.predict(X_test)
print(
    f"\nExplained variance score is {explained_variance_score(y_true=y_test, y_pred=pred)}",
    f"\nMean Absolute Error is {mean_absolute_error(y_true=y_test, y_pred=pred)}",
    f"\nMean Absolute Percentage error is {round(100 * mean_absolute_percentage_error(y_true=y_test, y_pred=pred), 2)}%",
    f"\nMedian Absolute Error is {median_absolute_error(y_true=y_test, y_pred=pred)}",
    f"\nMean Squared Error is {mean_squared_error(y_true=y_test, y_pred=pred)}",
    f"\nR^2 Error is {r2_score(y_true=y_test, y_pred=pred)}",
)
results = pd.DataFrame(data={"Pred": pred, "y_test": y_test})
results["Difference"] = abs(results["Pred"] - results["y_test"])
sum(results["Difference"] > 100) / results.shape[0]
plt.figure(figsize=(10, 6))
plt.scatter(y_test, pred, color='orange', label='Predictions')
plt.plot([y_test.min(), y_test.max()], [pred.min(), pred.max()], 'k--', lw=2, label='Perfect Prediction')
plt.title('True vs Predicted Values')
plt.xlabel('True Values')
plt.ylabel('Predicted Values')
plt.legend()
plt.grid()
plt.show()

display = PredictionErrorDisplay(y_true=y_test, y_pred=pred)
display.plot()
plt.show()



## Linear Support Vector Regression

In [None]:
best_pipe = joblib.load("pickle/LinearSVR.pkl")
pred = best_pipe.predict(X_test)
print(
    f"\nExplained variance score is {explained_variance_score(y_true=y_test, y_pred=pred)}",
    f"\nMean Absolute Error is {mean_absolute_error(y_true=y_test, y_pred=pred)}",
    f"\nMean Absolute Percentage error is {round(100 * mean_absolute_percentage_error(y_true=y_test, y_pred=pred), 2)}%",
    f"\nMedian Absolute Error is {median_absolute_error(y_true=y_test, y_pred=pred)}",
    f"\nMean Squared Error is {mean_squared_error(y_true=y_test, y_pred=pred)}",
    f"\nR^2 Error is {r2_score(y_true=y_test, y_pred=pred)}",
)
results = pd.DataFrame(data={"Pred": pred, "y_test": y_test})
results["Difference"] = abs(results["Pred"] - results["y_test"])
sum(results["Difference"] > 100) / results.shape[0]
plt.figure(figsize=(10, 6))
plt.scatter(y_test, pred, color='orange', label='Predictions')
plt.plot([y_test.min(), y_test.max()], [pred.min(), pred.max()], 'k--', lw=2, label='Perfect Prediction')
plt.title('True vs Predicted Values')
plt.xlabel('True Values')
plt.ylabel('Predicted Values')
plt.legend()
plt.grid()
plt.show()
plt.savefig('images/LinearSVR_true_vs_pred.png', format='png')

display = PredictionErrorDisplay(y_true=y_test, y_pred=pred)
display.plot()
plt.show()
plt.savefig('images/LinearSVR_residuals.png', format='png')



## Nu Support Vector Regression

In [None]:
best_pipe = joblib.load("pickle/NuSVR.pkl")
pred = best_pipe.predict(X_test)
print(
    f"\nExplained variance score is {explained_variance_score(y_true=y_test, y_pred=pred)}",
    f"\nMean Absolute Error is {mean_absolute_error(y_true=y_test, y_pred=pred)}",
    f"\nMean Absolute Percentage error is {round(100 * mean_absolute_percentage_error(y_true=y_test, y_pred=pred), 2)}%",
    f"\nMedian Absolute Error is {median_absolute_error(y_true=y_test, y_pred=pred)}",
    f"\nMean Squared Error is {mean_squared_error(y_true=y_test, y_pred=pred)}",
    f"\nR^2 Error is {r2_score(y_true=y_test, y_pred=pred)}",
)
results = pd.DataFrame(data={"Pred": pred, "y_test": y_test})
results["Difference"] = abs(results["Pred"] - results["y_test"])
sum(results["Difference"] > 100) / results.shape[0]
plt.figure(figsize=(10, 6))
plt.scatter(y_test, pred, color='orange', label='Predictions')
plt.plot([y_test.min(), y_test.max()], [pred.min(), pred.max()], 'k--', lw=2, label='Perfect Prediction')
plt.title('True vs Predicted Values')
plt.xlabel('True Values')
plt.ylabel('Predicted Values')
plt.legend()
plt.grid()
plt.show()

display = PredictionErrorDisplay(y_true=y_test, y_pred=pred)
display.plot()
plt.show()



## KNN Regression

In [None]:
best_pipe = joblib.load("pickle/KNNR.pkl")
pred = best_pipe.predict(X_test)
print(
    f"\nExplained variance score is {explained_variance_score(y_true=y_test, y_pred=pred)}",
    f"\nMean Absolute Error is {mean_absolute_error(y_true=y_test, y_pred=pred)}",
    f"\nMean Absolute Percentage error is {round(100 * mean_absolute_percentage_error(y_true=y_test, y_pred=pred), 2)}%",
    f"\nMedian Absolute Error is {median_absolute_error(y_true=y_test, y_pred=pred)}",
    f"\nMean Squared Error is {mean_squared_error(y_true=y_test, y_pred=pred)}",
    f"\nR^2 Error is {r2_score(y_true=y_test, y_pred=pred)}",
)
results = pd.DataFrame(data={"Pred": pred, "y_test": y_test})
results["Difference"] = abs(results["Pred"] - results["y_test"])
sum(results["Difference"] > 100) / results.shape[0]
plt.figure(figsize=(10, 6))
plt.scatter(y_test, pred, color='orange', label='Predictions')
plt.plot([y_test.min(), y_test.max()], [pred.min(), pred.max()], 'k--', lw=2, label='Perfect Prediction')
plt.title('True vs Predicted Values')
plt.xlabel('True Values')
plt.ylabel('Predicted Values')
plt.legend()
plt.grid()
plt.show()

display = PredictionErrorDisplay(y_true=y_test, y_pred=pred)
display.plot()
plt.show()
