In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# Load the dataset
data = pd.read_csv('your_dataset.csv')

# Feature engineering
data['start_datetime'] = pd.to_datetime(data['start_date'] + ' ' + data['start_time_hour'].astype(str) + ':' + data['start_time_minute'].astype(str))
data['end_datetime'] = pd.to_datetime(data['end_date'] + ' ' + data['end_time_hour'].astype(str) + ':' + data['end_time_minute'].astype(str))

# Extract relevant features
data['hour'] = data['start_datetime'].dt.hour
data['minute'] = data['start_datetime'].dt.minute
data['day_of_week'] = data['start_datetime'].dt.dayofweek

# Define target variable
target = 'tripDuration'

# Select features
features = ['tripDistance', 'tripSpeed', 'startLatitude', 'startLongitude', 'endLatitude', 'endLongitude', 'hour', 'minute', 'day_of_week']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)

# Linear Regression
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
linear_pred = linear_model.predict(X_test)
print(f"Linear Regression MSE: {mean_squared_error(y_test, linear_pred)}")

# Random Forest Regressor
rf_model = RandomForestRegressor()
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)
print(f"Random Forest MSE: {mean_squared_error(y_test, rf_pred)}")

# XGBoost
xgb_model = XGBRegressor()
xgb_model.fit(X_train, y_train)
xgb_pred = xgb_model.predict(X_test)
print(f"XGBoost MSE: {mean_squared_error(y_test, xgb_pred)}")

# SARIMA (Seasonal Autoregressive Integrated Moving Average)
sarima_model = SARIMAX(data[target], order=(1, 1, 1), seasonal_order=(1, 1, 1, 24))
sarima_results = sarima_model.fit(disp=False)
sarima_pred = sarima_results.get_forecast(steps=len(X_test)).predicted_mean
print(f"SARIMA MSE: {mean_squared_error(y_test, sarima_pred)}")

# Deep Neural Network
scaler = StandardScaler()
mlp_model = MLPRegressor(max_iter=500)
pipeline = Pipeline([('scaler', scaler), ('mlp', mlp_model)])
pipeline.fit(X_train, y_train)
mlp_pred = pipeline.predict(X_test)
print(f"MLP Regression MSE: {mean_squared_error(y_test, mlp_pred)}")
