In [None]:
import pandas as pd

from math import sqrt

from mlxtend.regressor import StackingRegressor

from sklearn.linear_model import LinearRegression, ElasticNet, BayesianRidge
from sklearn.discriminant_analysis import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [None]:
final_dataset_path = "/home/magnus9102/Mostafa/Py/Github/data-science/mostafa_vahdani_bachelor_project/data/interim/final_flight_tickets_dataset.csv"

In [None]:
df = pd.read_csv(final_dataset_path)

In [None]:
y = df['ticket_price_T']
X = df.drop('ticket_price_T', axis=1)

In [None]:
scaler = StandardScaler()

In [None]:
X_t = scaler.fit_transform(X)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X_t, y, test_size=0.2)

In [None]:
linear_model = LinearRegression()
bayesian_ridge = BayesianRidge(max_iter=250, alpha_1=0.01)
elastic_net = ElasticNet(alpha=0.8, l1_ratio=0.7, max_iter=300)

model = StackingRegressor(regressors=[linear_model, bayesian_ridge, elastic_net], meta_regressor=elastic_net)
model.fit(x_train, y_train)

In [None]:
y_train_pred = model.predict(x_train)
y_train_pred


In [None]:
y_test_pred = model.predict(x_test)
y_test_pred


In [None]:
x_test_inverse = scaler.inverse_transform(x_test)
x_test_df = pd.DataFrame(x_test_inverse, columns=X.columns)
y_test_pred_df = pd.DataFrame(y_test_pred, columns=["ticket_price_T"])
final_df = pd.concat([y_test_pred_df, x_test_df], axis=1)

final_df.head()


In [None]:
# We have a regression problem so we have to use regression metrics to evaluate our prediction.
# We can't use roc_auc_score, classification_report or confusion_matrix bcz these are for classification problem not regression.
# The skill or performance of a regression model must be reported as an "error" in those predictions.


In [None]:
# As regression involves predicting continuous values rather than discrete class, in otherwise
# ‌regression refers to predictive modeling problems that involve predicting a numeric value.


In [None]:
# Evaluating Regression Models: 
# You have to calculate 'error metrics' for regression
# Calculating and reporting mean squared error, root mean squared error, and mean absolute error.


In [None]:
# The best possible score is 1 which is obtained when the predicted values are the same as the actual values.
"R-squared: ", round(r2_score(y_train, y_train_pred), 3)


In [None]:
# A perfect "mean squared error" value is 0.0, which means that all predictions matched the expected values exactly.
"Mean squared error: ", round(mean_squared_error(y_train, y_train_pred), 3)


In [None]:
# A perfect "root mean squared error" value is 0.0, which means that all predictions matched the expected values exactly.
"Root mean squared error: ", round(sqrt(mean_squared_error(y_train, y_train_pred)), 3)


In [None]:
# A perfect "mean absolute error" value is 0.0, which means that all predictions matched the expected values exactly.
"Mean absolute error: ", round(mean_absolute_error(y_train, y_train_pred), 3)
