In [4]:
import torch
import pandas as pd
from torch.utils.data import DataLoader
import numpy as np
from model import BiGRU, CustomLoss, preprocess_data, CSVDataset, plot_heatmaps, preprocess_for_test

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
# Read CSV file and load into a pandas DataFrame
save_folder = '/Users/jonathanamichaels/Library/CloudStorage/Dropbox'
csv_file = save_folder + '/data3D.csv'
data = pd.read_csv(csv_file)
norm_constant = 1000

In [6]:
processed_data = preprocess_data(data, norm_constant=norm_constant)

# Set the model parameters
input_size = processed_data.shape[2]
hidden_size = 512
num_layers = 1
output_size = processed_data.shape[2]
dropout = 0.1

# Initialize the model and loss function
model = BiGRU(input_size, hidden_size, num_layers, output_size, dropout)
criterion = CustomLoss()

# Instantiate the custom dataset
dataset = CSVDataset(processed_data, norm_constant=norm_constant)

# Create a DataLoader
batch_size = 32
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
criterion = CustomLoss()

# Set the learning rate
learning_rate = 0.001

# Initialize the Adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Set the number of epochs
epochs = 50

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

model.train()

# Train the model
for epoch in range(epochs):
    for batch_idx, (inputs, targets, confidence) in enumerate(dataloader):
         # Move tensors to the appropriate device (if using GPU)
        inputs, targets, confidence = inputs.to(device), targets.to(device), confidence.to(device)

        # Reset the gradients to zero
        optimizer.zero_grad()

        # Model output and extra information
        output = model(inputs)

        # Calculate loss
        loss = criterion(output, targets, confidence)

        # Backpropagation
        loss.backward()

        # Update the weights
        optimizer.step()

        # Print the loss for the current batch
        print(f"Epoch: {epoch+1}/{epochs}, Batch: {batch_idx+1}/{len(dataloader)}, Loss: {loss.item()}")

Epoch: 1/50, Batch: 1/13, Loss: 0.03115137293934822
Epoch: 1/50, Batch: 2/13, Loss: 0.023346034809947014
Epoch: 1/50, Batch: 3/13, Loss: 0.01905081979930401
Epoch: 1/50, Batch: 4/13, Loss: 0.010391129180788994
Epoch: 1/50, Batch: 5/13, Loss: 0.007071503438055515
Epoch: 1/50, Batch: 6/13, Loss: 0.006996306125074625
Epoch: 1/50, Batch: 7/13, Loss: 0.006300672423094511
Epoch: 1/50, Batch: 8/13, Loss: 0.006515518296509981
Epoch: 1/50, Batch: 9/13, Loss: 0.00537166278809309
Epoch: 1/50, Batch: 10/13, Loss: 0.0045268055982887745
Epoch: 1/50, Batch: 11/13, Loss: 0.00436385115608573
Epoch: 1/50, Batch: 12/13, Loss: 0.003996977582573891
Epoch: 1/50, Batch: 13/13, Loss: 0.005107490345835686


KeyboardInterrupt: 

In [None]:
test_data = preprocess_for_test(data, norm_constant=norm_constant)
model.eval()
with torch.no_grad():
    output = model(test_data)
plot_heatmaps(test_data[0,:,:], output[0,:,:].detach().numpy())

In [None]:
data_aug = pd.DataFrame.copy(data)
# normalize
data_aug.loc[1:,:] = output.detach().numpy() * norm_constant
# fix magnitude of confidence
data_aug.iloc[1:,3::4] = data_aug.iloc[1:, 3::4]  / norm_constant

# if you don't want to interpolate time points with no centerdetect
if False:
    nan_indices = data.isna().stack()[lambda x: x].index.tolist()
    for row, col in nan_indices:
        data_aug.at[row, col] = pd.NA

data_aug.to_csv(save_folder + '/data3D_Smooth.csv', index=False)