### Demand forecasting

In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LeakyReLU, LSTM
from sklearn.preprocessing import MinMaxScaler

from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error

# my package
from utils.gengapy.data_science import modeling
from utils.gengapy.data_engg import sql

In [2]:
df = sql.get_table_df(
    query = """
    SELECT 
        COUNT(r.rental_id) AS rental_count, 
        DATE(r.rental_date) as rental_date
    FROM rental r
    JOIN inventory i ON r.inventory_id = i.inventory_id
    where extract(year from date(r.rental_date)) = 2005
    GROUP BY DATE(r.rental_date)
    ORDER BY rental_date;
    """
)
df
df['rental_date'] = pd.to_datetime(df['rental_date'])
df.set_index('rental_date', inplace=True)
df = df.resample('D').sum().fillna(method='ffill') 
print(df)

postgresql+psycopg2://postgres:postgres@localhost:5432/dvd_rental

    SELECT 
        COUNT(r.rental_id) AS rental_count, 
        DATE(r.rental_date) as rental_date
    FROM rental r
    JOIN inventory i ON r.inventory_id = i.inventory_id
    where extract(year from date(r.rental_date)) = 2005
    GROUP BY DATE(r.rental_date)
    ORDER BY rental_date;
    
             rental_count
rental_date              
2005-05-24              8
2005-05-25            137
2005-05-26            174
2005-05-27            166
2005-05-28            196
...                   ...
2005-08-19            628
2005-08-20            624
2005-08-21            659
2005-08-22            626
2005-08-23            598

[92 rows x 1 columns]


  df = df.resample('D').sum().fillna(method='ffill')


In [3]:
rental_data = df
rental_data = rental_data.sort_index()
rental_data_daily = rental_data.resample('D').sum()
rental_data_daily.fillna(0, inplace=True)

# Scale the rental count data
scaler = MinMaxScaler(feature_range=(0, 1))
rental_counts = rental_data_daily['rental_count'].values.reshape(-1, 1)  # Reshape to 2D

# Apply MinMaxScaler
rental_counts_scaled = scaler.fit_transform(rental_counts)

# 3. Create time series sequences
def create_sequences(data, time_steps):
    sequences = []
    labels = []
    for i in range(len(data) - time_steps):
        sequences.append(data[i:i+time_steps])
        labels.append(data[i+time_steps])  # The next rental count is the label
    return np.array(sequences), np.array(labels)

time_steps = 7
X, y = create_sequences(rental_counts_scaled, time_steps)

# Reshape data for LSTM input
X = X.reshape((X.shape[0], X.shape[1], 1))  # 3D shape: (samples, time_steps, features)

print(f"X shape: {X.shape}")  # (samples, time_steps, 1)
print(f"y shape: {y.shape}")  # (samples, 1)

X shape: (85, 7, 1)
y shape: (85, 1)


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Define LSTM model
model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(1))  # Output layer to predict rentals

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)

  super().__init__(**kwargs)


Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 4s/step - loss: 0.1510 - val_loss: 0.3723
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 131ms/step - loss: 0.1299 - val_loss: 0.3218
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step - loss: 0.1236 - val_loss: 0.2878
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step - loss: 0.1168 - val_loss: 0.2725
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step - loss: 0.0996 - val_loss: 0.2695
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step - loss: 0.1074 - val_loss: 0.2701
Epoch 7/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step - loss: 0.1128 - val_loss: 0.2613
Epoch 8/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step - loss: 0.1080 - val_loss: 0.2498
Epoch 9/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

In [5]:
# Make predictions
y_pred = model.predict(X_test)

# Inverse scaling to get actual values
y_pred_inverse = scaler.inverse_transform(y_pred)
r2 = r2_score(y_test, y_pred_inverse)
mae = mean_absolute_error(y_test, y_pred_inverse)
rmse = root_mean_squared_error(y_test, y_pred_inverse)

print(f"R2: {r2:.2f}")
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 460ms/step
R2: -125822.58
MAE: 149.17
RMSE: 159.31


In [7]:
# 4. Predicting the next 7 days of demand
# Get the last time_steps of the training data
last_sequence = rental_counts_scaled[-time_steps:]  # Last 7 days
predictions = []

# Predicting for the next 7 days
for _ in range(7):
    # Reshape for LSTM input
    input_sequence = last_sequence.reshape((1, time_steps, 1))
    prediction = model.predict(input_sequence)
    predictions.append(prediction[0, 0])  # Get the predicted value
    
    # Update the last_sequence for the next prediction
    last_sequence = np.append(last_sequence[1:], prediction)

# Inverse transform to get the original scale
predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))

# Prepare a DataFrame for the predicted demand
last_date = rental_data_daily.index[-1]  # Get the last date from training
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=7)
predicted_demand_df = pd.DataFrame(predictions, index=future_dates, columns=['Predicted Demand'])

print(predicted_demand_df)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 270ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
            Predicted Demand
2005-08-24        289.439850
2005-08-25        275.619537
2005-08-26        256.487183
2005-08-27        236.358536
2005-08-28        217.407730
2005-08-29        199.941452
2005-08-30        185.476395
