In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import BayesianRidge
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [11]:
# Load dataset
data = pd.read_csv("../dataset/trains_refined_regression.csv")

# Feature extraction
X = data[['train_ID', 'departure_time', 'arrival_time', 'train_type']]
y = data['delay']

In [12]:
# Dividi il dataset in set di addestramento e test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [13]:

# Crea il modello di regressione lineare
model = LinearRegression()

# Allena il modello
model.fit(X_train, y_train)
# Effettua le predizioni
y_pred = model.predict(X_test)
# Valuta le prestazioni del modello
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)

In [14]:
# Results
print("--------------------- Linear Regression ---------------------\n")
print("RMSE:", mean_squared_error(y_test, y_pred, squared=False))
print("R^2:",r2_score(y_test, y_pred))
print("\n---------------------------------------------------------")


--------------------- Linear Regression ---------------------

RMSE: 7.245390538407914
R^2: 0.0704820284110309

---------------------------------------------------------


In [15]:
# Creazione del modello di regressione bayesiana
bayesian_reg = BayesianRidge()

# Addestramento del modello
bayesian_reg.fit(X_train, y_train)

# Effettuare previsioni
y_pred = bayesian_reg.predict(X_test)

In [16]:
# Results
print("--------------------- Bayesian Regression ---------------------\n")
print("RMSE:", mean_squared_error(y_test, y_pred, squared=False))
print("R^2:",r2_score(y_test, y_pred))
print("\n---------------------------------------------------------")

--------------------- Bayesian Regression ---------------------

RMSE: 7.245371068262149
R^2: 0.07048702409096985

---------------------------------------------------------


In [17]:
model = RandomForestRegressor(n_estimators=50, random_state=42, criterion="poisson")
model.fit(X_train, y_train)

# Effettua le previsioni
y_pred = model.predict(X_test)

In [18]:
# Results
print("--------------------- Random Forest Regressor ---------------------\n")
print("RMSE:", mean_squared_error(y_test, y_pred, squared=False))
print("R^2:",r2_score(y_test, y_pred))
print("\n---------------------------------------------------------")

--------------------- Random Forest Regressor ---------------------

RMSE: 4.912196173479969
R^2: 0.5727465318997378

---------------------------------------------------------
