In [None]:
!nvidia-smi


Fri Nov 29 22:53:35 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   49C    P8              10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from datetime import datetime
import pickle

# Load the datasets
pems_bay = pd.read_csv('/content/drive/MyDrive/CS5287FP/PEMS-BAY/PEMS-BAY.csv')
pems_bay_meta = pd.read_csv('/content/drive/MyDrive/CS5287FP/PEMS-BAY/PEMS-BAY-META.csv')

def preprocess_data(row, meta_row):
    features = [int(datetime.strptime(row['Unnamed: 0'], '%Y-%m-%d %H:%M:%S').timestamp())]
    for col in ['Fwy', 'Dir', 'District', 'City', 'State_PM', 'Abs_PM', 'Latitude', 'Longitude', 'Length', 'Lanes']:
        value = meta_row[col]
        if col == 'Dir':
            value = {'N': 4, 'E': 1, 'S': 2, 'W': 3}.get(value, 0)
        elif pd.isna(value):
            value = 0
        elif isinstance(value, str):
            value = ''.join(filter(str.isdigit, value))
            value = float(value) if value and '.' in value else int(value) if value else 0
        features.append(value)
    return features

X = []
y = []

for _, sensor_data in pems_bay.iloc[:, 1:].items():
    sensor_id = int(sensor_data.name)
    meta_row = pems_bay_meta[pems_bay_meta['sensor_id'] == sensor_id]
    if not meta_row.empty:
        meta_row = meta_row.iloc[0]
        for timestamp, speed in zip(pems_bay['Unnamed: 0'], sensor_data):
            X.append(preprocess_data(pd.Series({'Unnamed: 0': timestamp}), meta_row))
            y.append(speed)

X = np.array(X)
y = np.array(y)
#
df_y = pd.DataFrame(y, columns=['Speed'])

# Save the DataFrame to a CSV file
df_y.to_csv('/content/drive/MyDrive/CS5287FP/PEMS-BAY/PEMS-BAY-preprocessed_y.csv', index=False)
print("Preprocessed data saved to PEMS-BAY-preprocessed.csv")
#
column_names = ['Timestamp', 'Fwy', 'Dir', 'District', 'City', 'State_PM', 'Abs_PM', 'Latitude', 'Longitude', 'Length', 'Lanes']
# Create a DataFrame from X
df_x = pd.DataFrame(X, columns=column_names)
# Save the DataFrame to a CSV file
df_x.to_csv('/content/drive/MyDrive/CS5287FP/PEMS-BAY/PEMS-BAY-preprocessed_x.csv', index=False)

print("Preprocessed X data saved to PEMS-BAY-preprocessed_x.csv")
##
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1))
y_test_scaled = scaler_y.transform(y_test.reshape(-1, 1))

X_train_reshaped = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

model = Sequential([
    LSTM(50, activation='relu', input_shape=(1, X_train_reshaped.shape[2])),
    Dense(1)
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

history = model.fit(X_train_reshaped, y_train_scaled, epochs=100, batch_size=32, validation_split=0.2, verbose=1)

model_path = "/content/drive/MyDrive/CS5287FP/lstm_team21.h5"
model.save(model_path)
with open('/content/drive/MyDrive/CS5287FP/history.pkl', 'wb') as file:
    pickle.dump(history.history, file)


# Generate postulated test data
random_row = pems_bay.iloc[np.random.randint(len(pems_bay))]
random_sensor_id = int(random_row.index[np.random.randint(1, len(random_row))])
random_meta_row = pems_bay_meta[pems_bay_meta['sensor_id'] == random_sensor_id].iloc[0]

postulated_test = preprocess_data(random_row, random_meta_row)
postulated_test_scaled = scaler_X.transform(np.array(postulated_test).reshape(1, -1))
postulated_test_reshaped = postulated_test_scaled.reshape((1, 1, postulated_test_scaled.shape[1]))

prediction_scaled = model.predict(postulated_test_reshaped)
prediction = scaler_y.inverse_transform(prediction_scaled)

print("Postulated test data:")
print(postulated_test)
print("\nPrediction:")
print(prediction[0][0])

Preprocessed data saved to PEMS-BAY-preprocessed.csv
Preprocessed X data saved to PEMS-BAY-preprocessed_x.csv


  super().__init__(**kwargs)


Epoch 1/10
[1m211722/211722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m517s[0m 2ms/step - loss: 0.9534 - val_loss: 0.9349
Epoch 2/10
[1m211722/211722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m516s[0m 2ms/step - loss: 0.9338 - val_loss: 0.9321
Epoch 3/10
[1m211722/211722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m558s[0m 2ms/step - loss: 0.9311 - val_loss: 0.9302
Epoch 4/10
[1m211722/211722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m552s[0m 2ms/step - loss: 0.9274 - val_loss: 0.9296
Epoch 5/10
[1m211722/211722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m529s[0m 2ms/step - loss: 0.9272 - val_loss: 0.9284
Epoch 6/10
[1m211722/211722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m523s[0m 2ms/step - loss: 0.9261 - val_loss: 0.9275
Epoch 7/10
[1m211722/211722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m565s[0m 2ms/step - loss: 0.9258 - val_loss: 0.9277
Epoch 8/10
[1m211722/211722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m559s[0m 2ms/step - loss: 0.

In [None]:
type(pems_bay.columns)

pandas.core.indexes.base.Index