In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.arima.model import ARIMA
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler

In [4]:
# Load the data
data = pd.read_csv('dataset.csv')  # Replace 'your_data.csv' with the actual file name

In [5]:
# Feature engineering
data['start_datetime'] = pd.to_datetime(data['start_date'] + ' ' + data['start_time_hour'].astype(str) + ':' + data['start_time_minute'].astype(str) + ':00')
data['end_datetime'] = pd.to_datetime(data['end_date'] + ' ' + data['end_time_hour'].astype(str) + ':' + data['end_time_minute'].astype(str) + ':00')
data['trip_count'] = 1

In [6]:
# Aggregate data into 15-minute intervals
demand_data = data.resample('15T', on='start_datetime').sum()

# Feature selection
features = demand_data[['tripDistance', 'tripSpeed', 'tripDuration', 'tripFare', 'paymentType']]
target = demand_data['trip_count']


In [7]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, shuffle=False)


In [8]:
# Linear Regression
model_lr = LinearRegression()
model_lr.fit(X_train, y_train)
lr_predictions = model_lr.predict(X_test)

# Random Forest
model_rf = RandomForestRegressor(n_estimators=100, random_state=42)
model_rf.fit(X_train, y_train)
rf_predictions = model_rf.predict(X_test)

# XGBoost
model_xgb = XGBRegressor(n_estimators=100, random_state=42)
model_xgb.fit(X_train, y_train)
xgb_predictions = model_xgb.predict(X_test)

# ARIMA
model_arima = ARIMA(y_train, order=(5, 1, 0))
arima_predictions = model_arima.fit().forecast(steps=len(y_test))

In [None]:
# # DNN (Deep Neural Network)
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)

# model_dnn = Sequential([
#     Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
#     Dense(32, activation='relu'),
#     Dense(1)
# ])

# model_dnn.compile(optimizer='adam', loss='mean_squared_error')
# model_dnn.fit(X_train_scaled, y_train, epochs=10, batch_size=32, verbose=0)
# dnn_predictions = model_dnn.predict(X_test_scaled).flatten()

In [9]:
# Evaluate models
print("Linear Regression MSE:", mean_squared_error(y_test, lr_predictions))
print("Random Forest MSE:", mean_squared_error(y_test, rf_predictions))
print("XGBoost MSE:", mean_squared_error(y_test, xgb_predictions))
print("ARIMA MSE:", mean_squared_error(y_test, arima_predictions))
# print("DNN MSE:", mean_squared_error(y_test, dnn_predictions))

Linear Regression MSE: 6.571435387044201e-23
Random Forest MSE: 0.0933050356466023
XGBoost MSE: 0.11968702012929736
ARIMA MSE: 511.47982120952094


In [13]:
# Assuming 'features' is a DataFrame with columns 'tripDistance', 'tripSpeed', 'tripDuration', 'tripFare', 'paymentType'
sample_input = pd.DataFrame({
    'tripDistance': [13.0],
    'tripSpeed': [23.0],
    'tripDuration': [18],
    'tripFare': [60],
    'paymentType': [1]
})

# Predictions using the trained models
lr_prediction = model_lr.predict(sample_input)
rf_prediction = model_rf.predict(sample_input)
xgb_prediction = model_xgb.predict(sample_input)
# arima_prediction = arima_predictions.forecast(steps=1)[0]


print("Linear Regression Prediction:", lr_prediction)
print("Random Forest Prediction:", rf_prediction)
print("XGBoost Prediction:", xgb_prediction)
# print("ARIMA Prediction:", arima_prediction)


Linear Regression Prediction: [-11.66666667]
Random Forest Prediction: [1.]
XGBoost Prediction: [1.0002676]


In [15]:
# Assuming 'features' is a DataFrame with columns 'tripDistance', 'tripSpeed', 'tripDuration', 'tripFare', 'paymentType'
sample_input = pd.DataFrame({
    'tripDistance': [4],
    'tripSpeed': [28],
    'tripDuration': [12],
    'tripFare': [27],
    'paymentType': [0]
})

# Predictions using the trained models
lr_prediction = model_lr.predict(sample_input)
rf_prediction = model_rf.predict(sample_input)
xgb_prediction = model_xgb.predict(sample_input)
# arima_prediction = arima_predictions.forecast(steps=1)[0]


print("Linear Regression Prediction:", lr_prediction)
print("Random Forest Prediction:", rf_prediction)
print("XGBoost Prediction:", xgb_prediction)
# print("ARIMA Prediction:", arima_prediction)

Linear Regression Prediction: [-2.16666667]
Random Forest Prediction: [1.]
XGBoost Prediction: [1.0016676]
