In [27]:
!pip install tqdm
import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

np.random.seed(42)

from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [55]:
DATA_DIR = '/content/drive/MyDrive/car_data'
all_files = sorted([os.path.join(DATA_DIR, f) for f in os.listdir(DATA_DIR) if f.endswith('.csv')])
files = all_files[:13000]  # Change to 'all_files' to use full dataset

def load_and_split(path, input_len=62, pred_len=5, features=None):
    df = pd.read_csv(path)
    if features is None:
        features = df.columns.tolist()
    data = df[features].values
    x = data[:input_len]
    y = data[input_len:input_len+pred_len, :2]
    return x, y

# Build datasets
X_list, Y_list = [], []
for fp in tqdm(files, desc="Loading data"):
    x, y = load_and_split(fp)
    X_list.append(x)
    Y_list.append(y)
X = np.stack(X_list)  # (N, T, D)
Y = np.stack(Y_list)  # (N, pred_len, 2)

# Shuffle and split 80/10/10
N = X.shape[0]
idx = np.arange(N)
np.random.shuffle(idx)
n_train = int(0.8 * N)
n_val   = int(0.1 * N)

train_idx = idx[:n_train]
val_idx   = idx[n_train:n_train+n_val]
test_idx  = idx[n_train+n_val:]

X_train, Y_train = X[train_idx], Y[train_idx]
X_val,   Y_val   = X[val_idx],   Y[val_idx]
X_test,  Y_test  = X[test_idx],  Y[test_idx]

# Normalize using train stats
X_mean = X_train.mean(axis=(0, 1), keepdims=True)
X_std  = X_train.std(axis=(0, 1), keepdims=True) + 1e-6
X_train_norm = (X_train - X_mean) / X_std
X_val_norm   = (X_val   - X_mean) / X_std
X_test_norm  = (X_test  - X_mean) / X_std

# Normalize output coordinates (Local_X and Local_Y)
coord_mean = Y_train.mean(axis=(0, 1), keepdims=True)  # shape (1, 1, 2)
coord_std  = Y_train.std(axis=(0, 1), keepdims=True) + 1e-6

Y_train_norm = (Y_train - coord_mean) / coord_std
Y_val_norm   = (Y_val   - coord_mean) / coord_std
Y_test_norm  = (Y_test  - coord_mean) / coord_std

print(f"Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")

Loading data: 100%|██████████| 9402/9402 [02:27<00:00, 63.93it/s] 


Train: (7521, 62, 12), Val: (940, 62, 12), Test: (941, 62, 12)


In [63]:
class ManualLSTMCell:
    def __init__(self, input_dim, hidden_dim, lr):
        D, H = input_dim, hidden_dim
        self.lr = lr

        # Weight matrices
        self.Wf = np.random.randn(H, H + D) * 0.1
        self.Wi = np.random.randn(H, H + D) * 0.1
        self.Wc = np.random.randn(H, H + D) * 0.1
        self.Wo = np.random.randn(H, H + D) * 0.1

        # Biases
        self.bf = np.zeros((H, 1))
        self.bi = np.zeros((H, 1))
        self.bc = np.zeros((H, 1))
        self.bo = np.zeros((H, 1))

    @staticmethod
    def sigmoid(x):
        pos = (x >= 0)
        neg = ~pos
        out = np.empty_like(x, dtype=float)
        out[pos] = 1.0 / (1.0 + np.exp(-x[pos]))
        exp_x = np.exp(x[neg])
        out[neg] = exp_x / (1.0 + exp_x)
        return out

    @staticmethod
    def dsigmoid(y):
        return y * (1 - y)

    @staticmethod
    def dtanh(y):
        return 1 - y**2

    def forward(self, x_seq):
        T, D = x_seq.shape
        H = self.bf.shape[0]

        h_prev = np.zeros((H, 1))
        c_prev = np.zeros((H, 1))

        hs, cs, caches = [], [], []

        for t in range(T):
            x_t = x_seq[t].reshape(-1, 1)
            concat = np.vstack((h_prev, x_t))

            f_t = self.sigmoid(self.Wf @ concat + self.bf)
            i_t = self.sigmoid(self.Wi @ concat + self.bi)
            c_hat_t = np.tanh(self.Wc @ concat + self.bc)
            c_t = f_t * c_prev + i_t * c_hat_t
            o_t = self.sigmoid(self.Wo @ concat + self.bo)
            h_t = o_t * np.tanh(c_t)

            hs.append(h_t)
            cs.append(c_t)
            caches.append((concat, f_t, i_t, c_hat_t, o_t, c_prev, c_t))

            h_prev, c_prev = h_t, c_t

        return hs, cs, caches

    def backward(self, dh_next, dc_next, caches):
      # Initialize gradients
      dWf = np.zeros_like(self.Wf)
      dWi = np.zeros_like(self.Wi)
      dWc = np.zeros_like(self.Wc)
      dWo = np.zeros_like(self.Wo)
      dbf = np.zeros_like(self.bf)
      dbi = np.zeros_like(self.bi)
      dbc = np.zeros_like(self.bc)
      dbo = np.zeros_like(self.bo)

      dh, dc = dh_next.copy(), dc_next.copy()
      H = self.bf.shape[0]

      for t in reversed(range(len(caches))):
          concat, f_t, i_t, c_hat_t, o_t, c_prev, c_t = caches[t]

          tanh_c_t = np.tanh(c_t)
          do = dh * tanh_c_t
          dao = do * self.dsigmoid(o_t)

          dc_raw = dc + dh * o_t * self.dtanh(tanh_c_t)

          df = dc_raw * c_prev
          di = dc_raw * c_hat_t
          dc_hat = dc_raw * i_t

          daf = df * self.dsigmoid(f_t)
          dai = di * self.dsigmoid(i_t)
          dac = dc_hat * self.dtanh(c_hat_t)

          dWf += daf @ concat.T
          dWi += dai @ concat.T
          dWc += dac @ concat.T
          dWo += dao @ concat.T
          dbf += daf
          dbi += dai
          dbc += dac
          dbo += dao

          dconcat = (
              self.Wf.T @ daf +
              self.Wi.T @ dai +
              self.Wc.T @ dac +
              self.Wo.T @ dao
          )
          dh = dconcat[:H, :]
          dc = dc_raw * f_t

      # Apply gradient descent step
      for param, dparam in [
          (self.Wf, dWf), (self.Wi, dWi), (self.Wc, dWc), (self.Wo, dWo),
          (self.bf, dbf), (self.bi, dbi), (self.bc, dbc), (self.bo, dbo)
      ]:
          param -= self.lr * dparam



In [64]:
# Hyperparameters
INPUT_DIM  = X_train.shape[2]
HIDDEN_DIM = 64
EPOCHS     = 20
LR         = 0.001

cell  = ManualLSTMCell(INPUT_DIM, HIDDEN_DIM, lr=LR)
W_out = np.random.randn(2*5, HIDDEN_DIM) * 0.1
b_out = np.zeros((2*5,1))

train_rmse, val_rmse = [], []
train_start = time.time()

for ep in range(1, EPOCHS+1):
    epoch_start = time.time()
    total_loss = 0

    # --- Training pass ---
    for x_seq, y_true in zip(X_train_norm, Y_train):
        hs, cs, caches = cell.forward(x_seq)
        h_T = hs[-1]
        y_pred = (W_out @ h_T + b_out).reshape(5,2)
        err = y_pred - y_true
        loss = np.sqrt((err**2).mean())
        total_loss += loss

        # backward on output
        dy = 2 * err / (5*2)
        dy = dy.reshape(10,1)
        dW_out = dy @ h_T.T
        db_out = dy
        W_out  -= LR * dW_out
        b_out  -= LR * db_out

        dh  = W_out.T @ dy
        dc0 = np.zeros_like(cs[0])
        cell.backward(dh, dc0, caches)

    avg_train = total_loss / len(X_train_norm)
    train_rmse.append(avg_train)

    # --- Validation pass (no weight updates) ---
    tot_val = 0
    for x_seq, y_true in zip(X_val_norm, Y_val):
        hs, _, _ = cell.forward(x_seq)
        y_pred = (W_out @ hs[-1] + b_out).reshape(5,2)
        tot_val += np.sqrt(((y_pred - y_true)**2).mean())
    avg_val = tot_val / len(X_val_norm)
    val_rmse.append(avg_val)

    epoch_time = time.time() - epoch_start
    print(f"Epoch {ep:2d} — train RMSE: {avg_train:.4f} — val RMSE: {avg_val:.4f} — {epoch_time:.1f}s")

total_time = time.time() - train_start
print(f"\nTotal training time: {total_time:.1f}s")

Epoch  1 — train RMSE: 284.9539 — val RMSE: 209.4570 — 132.9s
Epoch  2 — train RMSE: 187.1766 — val RMSE: 168.7946 — 130.7s
Epoch  3 — train RMSE: 170.6001 — val RMSE: 167.7523 — 131.0s
Epoch  4 — train RMSE: 158.5760 — val RMSE: 158.9072 — 130.4s
Epoch  5 — train RMSE: 175.6770 — val RMSE: 171.3334 — 130.0s
Epoch  6 — train RMSE: 166.1900 — val RMSE: 158.1724 — 129.2s


KeyboardInterrupt: 

In [None]:
# Evaluate on test set
test_preds = []
for x_seq in X_test_norm:
    hs, _, _ = cell.forward(x_seq)
    h_last = hs[-1]
    y_hat = (W_out @ h_last + b_out).reshape(-1)
    test_preds.append(y_hat)

test_preds = np.array(test_preds).reshape(len(X_test), 5, 2)  # reshape to match Y_test
test_rmse = np.sqrt(((test_preds - Y_test)**2).mean())
print(f"Test RMSE: {test_rmse:.4f}")

In [None]:
# Plot RMSE curves
plt.plot(train_rmse, label='Train RMSE')
plt.plot(val_rmse,   label='Val RMSE')
plt.xlabel("Epoch")
plt.ylabel("RMSE")
plt.title("Train vs. Val RMSE")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
rmse_gap = [tr - val for tr, val in zip(train_rmse, val_rmse)]

plt.plot(rmse_gap, label="Train - Val RMSE Gap")
plt.axhline(0, color='gray', linestyle='--')
plt.xlabel("Epoch")
plt.ylabel("RMSE Gap")
plt.title("Training vs Validation RMSE Gap")
plt.legend()
plt.grid(True)
plt.show()