In [7]:
from google.cloud import bigquery
import pandas as pd

client = bigquery.Client()

QUERY = """
SELECT
  timestamp,
  CarParkID,
  AvailableLots
FROM `smart-car-park-availability-1.lta_data.view_carpark_availability`
WHERE CarParkID IS NOT NULL
ORDER BY CarParkID, timestamp
"""
df = client.query(QUERY).to_dataframe()



In [8]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split

# --- Load & preprocess ---
grouped = df.groupby("CarParkID")

sequence_length = 10
X, y = [], []

for carpark_id, group in grouped:
    group = group.sort_values("timestamp")
    values = group["AvailableLots"].values
    for i in range(len(values) - sequence_length):
        input_seq = values[i:i + sequence_length]
        label = values[i + sequence_length]  # Directly use next value as label
        X.append(input_seq)
        y.append(label)

X = np.array(X, dtype=np.float32)
y = np.array(y, dtype=np.float32)

# Reshape for LSTM input
X = X.reshape((X.shape[0], X.shape[1], 1))

# Train-validation split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# --- LSTM Model ---
model = Sequential()
model.add(LSTM(64, input_shape=(X.shape[1], X.shape[2]), return_sequences=False))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32)


  super().__init__(**kwargs)


Epoch 1/10
[1m35559/35559[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 6ms/step - loss: 1618.2556 - mae: 8.9653 - val_loss: 6.2474 - val_mae: 1.4371
Epoch 2/10
[1m35559/35559[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m226s[0m 6ms/step - loss: 6.5880 - mae: 1.1404 - val_loss: 4.7814 - val_mae: 0.9276
Epoch 3/10
[1m35559/35559[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m227s[0m 6ms/step - loss: 5.3589 - mae: 1.0424 - val_loss: 5.0076 - val_mae: 1.0280
Epoch 4/10
[1m35559/35559[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 6ms/step - loss: 4.9895 - mae: 1.0064 - val_loss: 5.4922 - val_mae: 1.0598
Epoch 5/10
[1m35559/35559[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 6ms/step - loss: 5.3687 - mae: 1.0246 - val_loss: 5.1776 - val_mae: 1.3150
Epoch 6/10
[1m35559/35559[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m229s[0m 6ms/step - loss: 5.4237 - mae: 1.0305 - val_loss: 4.3287 - val_mae: 0.9218
Epoch 7/10
[1m35559/35559[0m [32m━━━━━━━━━━━━━

In [14]:
# Predict on validation data
y_pred = model.predict(X_val).flatten()  # Flatten to match shape of y_val
y_true = y_val.flatten()  # Just to be explicit

# Evaluate metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

mae = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")


[1m8890/8890[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 3ms/step
MAE: 0.86
RMSE: 2.11


  mape = (np.abs((y_true - y_pred) / y_true).mean()) * 100


In [None]:

# Save model to GCS
model.save("gs://prediction_models_x/models_lstm_model_x1.keras")