In [21]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import seaborn as sns
import matplotlib.pyplot as plt

In [24]:
class WaitTimePredictor:
    def __init__(self):
        self.model = None
        self.scaler_X = MinMaxScaler()
        self.scaler_y = MinMaxScaler()
        self.sequence_length = 5  # Number of time steps to look back

    def create_sequences(self, data):
        """Convert data into sequences for LSTM"""
        X, y = [], []
        for i in range(len(data) - self.sequence_length):
            X.append(data[i:(i + self.sequence_length)])
            y.append(data[i + self.sequence_length])
        return np.array(X), np.array(y)

    def create_time_features(self, df):
        """Create time-based features"""
        # Generate synthetic timestamps
        base_date = datetime.now() - timedelta(days=7)
        timestamps = [base_date + timedelta(minutes=x*60) for x in range(len(df))]

        df['timestamp'] = timestamps
        df['hour'] = pd.to_datetime(df['timestamp']).dt.hour
        df['day_of_week'] = pd.to_datetime(df['timestamp']).dt.dayofweek

        # Create cyclical time features
        df['hour_sin'] = np.sin(2 * np.pi * df['hour']/24)
        df['hour_cos'] = np.cos(2 * np.pi * df['hour']/24)
        df['day_sin'] = np.sin(2 * np.pi * df['day_of_week']/7)
        df['day_cos'] = np.cos(2 * np.pi * df['day_of_week']/7)

        return df

    def build_model(self, input_shape):
        """Build LSTM model"""
        model = Sequential([
            LSTM(32, input_shape=input_shape, return_sequences=True),
            Dropout(0.2),
            LSTM(16),
            Dropout(0.2),
            Dense(8, activation='relu'),
            Dense(1)
        ])

        model.compile(
            optimizer=Adam(learning_rate=0.001),
            loss='mse'
        )

        return model

    def analyze_data(self, df):
        """Analyze the dataset and print insights"""
        print("\nData Analysis:")
        print(f"Number of samples: {len(df)}")

        wait_times = df['Wait Time (in minutes)']
        print("\nWait Time Statistics:")
        print(f"Mean: {wait_times.mean():.2f} minutes")
        print(f"Median: {wait_times.median():.2f} minutes")
        print(f"Std Dev: {wait_times.std():.2f} minutes")
        print(f"Min: {wait_times.min():.2f} minutes")
        print(f"Max: {wait_times.max():.2f} minutes")

        # Plot time series
        plt.figure(figsize=(15, 5))
        plt.plot(df['timestamp'], df['Wait Time (in minutes)'])
        plt.title('Wait Times Over Time')
        plt.xlabel('Time')
        plt.ylabel('Wait Time (minutes)')
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.savefig('wait_time_series.png')
        plt.close()

    def train(self, data_path):
        # Load data
        df = pd.read_csv(data_path)

        # Create time features
        df = self.create_time_features(df)

        # Analyze data
        self.analyze_data(df)

        # Prepare data for sequence prediction
        wait_times = df['Wait Time (in minutes)'].values.reshape(-1, 1)

        # Scale the data
        wait_times_scaled = self.scaler_y.fit_transform(wait_times)

        # Create sequences
        X, y = self.create_sequences(wait_times_scaled)

        # Reshape for LSTM [samples, time steps, features]
        X = X.reshape((X.shape[0], X.shape[1], 1))

        # Split into train and test
        train_size = int(len(X) * 0.8)
        X_train, X_test = X[:train_size], X[train_size:]
        y_train, y_test = y[:train_size], y[train_size:]

        # Build and train model
        self.model = self.build_model((X.shape[1], X.shape[2]))

        history = self.model.fit(
            X_train, y_train,
            epochs=100,
            batch_size=16,
            validation_split=0.2,
            verbose=1
        )

        # Make predictions
        train_pred = self.model.predict(X_train)
        test_pred = self.model.predict(X_test)

        # Inverse transform predictions
        train_pred = self.scaler_y.inverse_transform(train_pred)
        test_pred = self.scaler_y.inverse_transform(test_pred)
        y_train_actual = self.scaler_y.inverse_transform(y_train.reshape(-1, 1))
        y_test_actual = self.scaler_y.inverse_transform(y_test.reshape(-1, 1))

        # Calculate metrics
        train_rmse = np.sqrt(np.mean((y_train_actual - train_pred) ** 2))
        test_rmse = np.sqrt(np.mean((y_test_actual - test_pred) ** 2))

        print("\nModel Performance:")
        print(f"Train RMSE: {train_rmse:.2f} minutes")
        print(f"Test RMSE: {test_rmse:.2f} minutes")

        # Plot predictions vs actual
        plt.figure(figsize=(15, 5))
        plt.plot(y_test_actual, label='Actual')
        plt.plot(test_pred, label='Predicted')
        plt.title('Actual vs Predicted Wait Times')
        plt.xlabel('Time Step')
        plt.ylabel('Wait Time (minutes)')
        plt.legend()
        plt.savefig('predictions.png')
        plt.close()

        # Plot training history
        plt.figure(figsize=(10, 5))
        plt.plot(history.history['loss'], label='Training Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.title('Model Loss During Training')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.savefig('training_history.png')
        plt.close()

        return test_rmse

    def predict_wait_time(self, recent_waits):
        """
        Predict wait time based on recent wait times

        Parameters:
        recent_waits: list of last 5 wait times
        """
        # Convert input to array and reshape
        recent_waits = np.array(recent_waits).reshape(-1, 1)

        # Scale the input
        recent_waits_scaled = self.scaler_y.transform(recent_waits)

        # Reshape for LSTM [samples, time steps, features]
        sequence = recent_waits_scaled.reshape(1, self.sequence_length, 1)

        # Make prediction
        prediction_scaled = self.model.predict(sequence)

        # Inverse transform prediction
        prediction = self.scaler_y.inverse_transform(prediction_scaled)[0][0]

        return max(0, prediction)

In [25]:
if __name__ == "__main__":
    predictor = WaitTimePredictor()
    rmse = predictor.train('Restaurant.csv')

    # Example prediction
    recent_waits = [15, 20, 18, 16, 19]  # Last 5 wait times
    predicted_wait = predictor.predict_wait_time(recent_waits)

    print(f"\nExample Prediction:")
    print(f"Recent wait times: {recent_waits}")
    print(f"Predicted wait time: {predicted_wait:.2f} minutes")


Data Analysis:
Number of samples: 70

Wait Time Statistics:
Mean: 16.93 minutes
Median: 16.13 minutes
Std Dev: 7.07 minutes
Min: 5.25 minutes
Max: 29.97 minutes
Epoch 1/100


  super().__init__(**kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 401ms/step - loss: 0.3001 - val_loss: 0.3752
Epoch 2/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - loss: 0.2744 - val_loss: 0.3615
Epoch 3/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - loss: 0.2774 - val_loss: 0.3494
Epoch 4/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 0.2770 - val_loss: 0.3368
Epoch 5/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - loss: 0.2706 - val_loss: 0.3236
Epoch 6/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 0.2371 - val_loss: 0.3097
Epoch 7/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - loss: 0.2285 - val_loss: 0.2945
Epoch 8/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 0.2039 - val_loss: 0.2779
Epoch 9/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m