In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import wandb
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score
from torchviz import make_dot

# projects t1-t14 (main : project-lstm)
wandb.init(project="t14")



[34m[1mwandb[0m: Currently logged in as: [33mboredxmc[0m ([33mboredxmc-school[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


In [2]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [3]:
# ----------------------------------
# Load & process data
# ----------------------------------
file_path = r'train.csv'
df = pd.read_csv(file_path)
df = df.iloc[:len(df)//16]

seq_len = 2000
input_features = 2  # radar_i, radar_q
output_features = 1  # tfm_ecg2

data = df.values.astype(np.float32)
X = np.array([data[i:i+seq_len, :input_features] for i in range(len(data) - seq_len)])
y = data[seq_len:, input_features:input_features+output_features]

X_tensor = torch.tensor(X)
y_tensor = torch.tensor(y)

dataset = TensorDataset(X_tensor, y_tensor)
batch_size =  100
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

print(f"Loaded dataset: {len(dataset)} samples")


Loaded dataset: 79175 samples


In [None]:
# ----------------------------------
# Model
# ----------------------------------


class LSTMModel(nn.Module):
    # Model Struct 
    # LSTM -> FC
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out




hidden_size = 100
num_layers = 4
learning_rate = 0.0001
epochs = 10

model = LSTMModel(input_features, hidden_size, output_features, num_layers).to(device)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

wandb.watch(model)


In [5]:
# ----------------------------------
# Training and evaluation
# ----------------------------------
def train_model(model, train_loader, epochs):
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0

        for batch_idx, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.to(device), targets.to(device)   #use gpu

            optimizer.zero_grad()
            predictions = model(inputs)
            loss = loss_fn(predictions, targets)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

            wandb.log({"Batch Loss": loss.item()})

            if batch_idx % 100 == 0:
                print(f"Epoch [{epoch+1}/{epochs}], Batch [{batch_idx+1}/{len(train_loader)}], Loss: {loss.item():.4f}")

        avg_epoch_loss = epoch_loss / len(train_loader)
        wandb.log({"Epoch Loss": avg_epoch_loss})
        print(f"Epoch [{epoch+1}/{epochs}] Loss: {avg_epoch_loss:.4f}")

    return model

model = train_model(model, train_loader, epochs)


Epoch [1/10], Batch [1/792], Loss: 0.1709
Epoch [1/10], Batch [101/792], Loss: 0.0034
Epoch [1/10], Batch [201/792], Loss: 0.0663
Epoch [1/10], Batch [301/792], Loss: 0.0070
Epoch [1/10], Batch [401/792], Loss: 0.0083
Epoch [1/10], Batch [501/792], Loss: 0.0209
Epoch [1/10], Batch [601/792], Loss: 0.0091
Epoch [1/10], Batch [701/792], Loss: 0.0039
Epoch [1/10] Loss: 0.0714
Epoch [2/10], Batch [1/792], Loss: 0.1128
Epoch [2/10], Batch [101/792], Loss: 0.0011
Epoch [2/10], Batch [201/792], Loss: 0.0641
Epoch [2/10], Batch [301/792], Loss: 0.0092
Epoch [2/10], Batch [401/792], Loss: 0.0052
Epoch [2/10], Batch [501/792], Loss: 0.0243
Epoch [2/10], Batch [601/792], Loss: 0.0115
Epoch [2/10], Batch [701/792], Loss: 0.0028
Epoch [2/10] Loss: 0.0572
Epoch [3/10], Batch [1/792], Loss: 0.0155
Epoch [3/10], Batch [101/792], Loss: 0.0021
Epoch [3/10], Batch [201/792], Loss: 0.0347
Epoch [3/10], Batch [301/792], Loss: 0.0067
Epoch [3/10], Batch [401/792], Loss: 0.0037
Epoch [3/10], Batch [501/792],

In [6]:
# ----------------------------------
# Save and evaluate model
# ----------------------------------

torch.save(model.state_dict(), "lstm_model.pth")
print("Model saved.")

def evaluate_model(model, data_loader):
    model.eval()
    total_loss = 0

    with torch.no_grad():
        for inputs, targets in data_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            predictions = model(inputs)
            loss = loss_fn(predictions, targets)
            total_loss += loss.item()

    avg_loss = total_loss / len(data_loader)
    print(f"Evaluation Loss: {avg_loss:.4f}")
    wandb.log({"Evaluation Loss": avg_loss})
    return avg_loss

evaluate_model(model, train_loader)


Model saved.
Evaluation Loss: 0.0657


0.06571679511361796

In [7]:
# ----------------------------------
# Save predictions (GDN0009 used)
# ----------------------------------


def evaluate_and_save(model, data_loader, csv_path="predicted_vs_actual.csv"):
    model.eval()
    predictions_list = []
    actual_list = []

    with torch.no_grad():
        for inputs, targets in data_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            predictions = model(inputs)
            predictions_list.append(predictions.cpu().numpy())
            actual_list.append(targets.cpu().numpy())

    predictions_array = np.concatenate(predictions_list, axis=0)
    actual_array = np.concatenate(actual_list, axis=0)
    df = pd.DataFrame({
        "Predicted": predictions_array.flatten(),
        "Actual": actual_array.flatten()
    })
    df.to_csv(csv_path, index=False)
    print(f"Saved predictions vs. actuals to {csv_path}")
    return df

predicted_vs_actual_df = evaluate_and_save(model, train_loader)


Saved predictions vs. actuals to predicted_vs_actual.csv


In [8]:
sample_input = torch.randn(1, seq_len, input_features).to(device)
output = model(sample_input)

dot = make_dot(output, params=dict(model.named_parameters()))
dot.render("lstm_model_architecture", format="png")
print("Model architecture plot saved as lstm_model_architecture.png")


Model architecture plot saved as lstm_model_architecture.png


In [9]:
def replace_and_save_predictions(model, original_csv_path, output_csv_path):
    model.eval()

    df = pd.read_csv(original_csv_path)
    data = df.values.astype(np.float32)
    
    seq_len = 100
    input_features = 2  # radar_i, radar_q
    output_features = 1  # tfm_ecg2

    X = np.array([data[i:i+seq_len, :input_features] for i in range(len(data) - seq_len)])
    X_tensor = torch.tensor(X).to(device)

    predictions = []
    with torch.no_grad():
        for i in range(0, len(X_tensor), batch_size):
            batch_input = X_tensor[i:i+batch_size]
            batch_predictions = model(batch_input)
            predictions.append(batch_predictions.cpu().numpy())

    predictions = np.concatenate(predictions, axis=0)
    new_tfm_ecg2 = np.full((len(data),), np.nan, dtype=np.float32)
    new_tfm_ecg2[seq_len:] = predictions.flatten()

    df.iloc[:, input_features] = new_tfm_ecg2

    df.to_csv(output_csv_path, index=False)
    print(f"Updated CSV saved to {output_csv_path}")

# Replaacing the tfm_ecg2 values in the original csv file with the predicted values
original_csv_path = r'original.csv'
output_csv_path = r'updated.csv'
replace_and_save_predictions(model, original_csv_path, output_csv_path)


Updated CSV saved to D:\24phd7039\biomedical-research\ml_files\utils\plot_folder\GDN0003\combined.csv
