In [1]:
#imports and required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy import stats
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
#Load and combine datasets
ubereats = pd.read_csv("-------.csv")  # date, time, weather, traffic, item, price, delivery_vs_pickup
toast = pd.read_csv("---------.csv")        # order date, time, cost, item

data = pd.concat([ubereats, toast], ignore_index=True)

In [None]:
#data pre-processing and feature engineering
data['datetime'] = pd.to_datetime(data['date'] + ' ' + data['time'])
data.sort_values('datetime', inplace=True)
data['day_of_week'] = data['datetime'].dt.dayofweek
data['hour'] = data['datetime'].dt.hour
data['is_weekend'] = data['day_of_week'].isin([5,6]).astype(int)

# Cyclical encoding
data['hour_sin'] = np.sin(2*np.pi*data['hour']/24)
data['hour_cos'] = np.cos(2*np.pi*data['hour']/24)
data['dow_sin']  = np.sin(2*np.pi*data['day_of_week']/7)
data['dow_cos']  = np.cos(2*np.pi*data['day_of_week']/7)

# Aggregate to hourly
agg = data.groupby(pd.Grouper(key='datetime', freq='1H')).agg({
    'price': 'mean',
    'cost': 'mean',
    'weather': 'mean',
    'traffic': 'mean',
    'delivery_vs_pickup': 'mean',
    'item': 'count'
}).rename(columns={'item': 'sales'})

agg = agg.fillna(method='ffill')

In [None]:
#Normalization
scaler = MinMaxScaler()
scaled = scaler.fit_transform(agg)
scaled_df = pd.DataFrame(scaled, columns=agg.columns, index=agg.index)

In [None]:
#Sequence Creation
def create_sequences(data, lookback=24):
    X, y = [], []
    for i in range(lookback, len(data)):
        X.append(data[i-lookback:i, :])
        y.append(data[i, 0])  # target: sales
    return np.array(X), np.array(y)

lookback = 24
X, y = create_sequences(scaled, lookback)

split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

In [None]:
#LSTM Model
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X.shape[1], X.shape[2])),
    Dropout(0.3),
    LSTM(32, return_sequences=False),
    Dense(16, activation='relu'),
    Dense(1Let)
])
model.compile(optimizer='adam', loss='mse')

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(X_train, y_train, validation_split=0.1, epochs=40, batch_size=32, callbacks=[early_stop], verbose=1)

In [None]:
#Forecast and Evaluation
final_preds = model.predict(X_test).flatten()
y_test_true = y_test

mae = mean_absolute_error(y_test_true, final_preds)
rmse = np.sqrt(mean_squared_error(y_test_true, final_preds))
print(f"MAE: {mae:.3f}, RMSE: {rmse:.3f}")

In [None]:
#Confidence Interval
residuals = y_test_true - final_preds
mean_resid = np.mean(residuals)
se_resid = stats.sem(residuals)
n = len(residuals)
t_crit = stats.t.ppf(1 - 0.05, df=n-1)  # 90% confidence
margin = t_crit * se_resid

lower_bound = final_preds - margin
upper_bound = final_preds + margin

print("\nSample forecasts with 90% CI:\n")
for i in range(5):
    print(f"Pred: {final_preds[i]:.3f} | 90% CI: ({lower_bound[i]:.3f}, {upper_bound[i]:.3f}) | Actual: {y_test_true[i]:.3f}")

In [None]:
#Visualization
plt.figure(figsize=(14,6))
plt.plot(y_test_true, label='Actual Sales', color='black')
plt.plot(final_preds, label='Predicted Sales', color='blue')
plt.fill_between(range(len(final_preds)), lower_bound, upper_bound, color='lightblue', alpha=0.4, label='90% CI')
plt.title("Restaurant Sales Forecast (LSTM) with 90% Confidence Interval")
plt.xlabel("Time Steps")
plt.ylabel("Scaled Sales")
plt.legend()
plt.show()

In [None]:
#Continuous Retraining Function
def update_model(new_data):
    new_scaled = scaler.transform(new_data)
    X_new, y_new = create_sequences(new_scaled, lookback)
    model.fit(X_new, y_new, epochs=3, batch_size=32, verbose=0)
    model.save("lstm_sales_model_updated.h5")