# Training VAE for RGB Images

### Imports

In [None]:
import torch
from torch.optim import Adam
from torch.utils.data import DataLoader
from utils.loaders import FeaturesExtendedDataset

from models import FC_VAE
from train_vae import train, evaluate


### SETUP

In [None]:
BATCH_SIZE = 32
EPOCHS = 50
LR = 0.001
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-4
STEP_SIZE = 10
GAMMA = 0.1

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
if torch.backends.mps.is_available():
    DEVICE = torch.device("mps")
    print("------ USING APPLE SILICON GPU ------")

features_file = "saved_features/saved_feat_I3D_25_dense_D1"

### TRAINING

In [None]:
train_dataset = FeaturesExtendedDataset(features_file,'train')
train_loader_rgb = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)


model = FC_VAE(dim_input=1024, nz=64)
model.to(DEVICE)
print(f'Initial model device: {model.device}')

# Create Optimizer & Scheduler objects
optimizer = Adam(model.parameters(), lr=LR, betas=(0.9, 0.98), eps=1e-9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

train(model, optimizer, EPOCHS, DEVICE, train_loader_rgb, train_loader_rgb, BATCH_SIZE, scheduler)

torch.save(model.state_dict(), f'./saved_models/VAE_RGB/final_VAE_RGB_epoch_{EPOCHS}.pth')


### EVALUATION

In [None]:
train_dataset = FeaturesExtendedDataset(features_file,'train')
test_dataset = FeaturesExtendedDataset(features_file,'test')
train_loader_rgb = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
test_loader_rgb = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)

model = FC_VAE(dim_input=1024, nz=64)
model.to(DEVICE)
model.load_state_dict(torch.load(f'./saved_models/VAE_RGB/final_VAE_RGB_epoch_50.pth'))

reconstructed, originals = evaluate(model, DEVICE, train_loader_rgb)
reconstructed2, originals2 = evaluate(model, DEVICE, test_loader_rgb)

# Training VAE for EMG Signals

In [2]:
import torch
from torch.optim import Adam
from torch.utils.data import DataLoader
from utils.loaders import FeaturesExtendedEMGDataset

from models import FC_VAE
from train_vae import train_emg, evaluate_emg
import pandas as pd
import numpy as np

  Referenced from: <0EB69795-4559-3C98-9EA1-35B6A988BB99> /Users/andreavannozzi/GithubProjects/Multimodal-Egocentric-Action-Recognition/env/lib/python3.8/site-packages/torchvision/image.so
  Expected in:     <E4E2FFCA-031E-3974-A7B0-45408D7F4956> /Users/andreavannozzi/GithubProjects/Multimodal-Egocentric-Action-Recognition/env/lib/python3.8/site-packages/torch/lib/libtorch_cpu.dylib
  warn(f"Failed to load image Python extension: {e}")


### SETUP

In [3]:
BATCH_SIZE = 32
EPOCHS = 50
LR = 0.001
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-4
STEP_SIZE = 10
GAMMA = 0.1

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
if torch.backends.mps.is_available():
    DEVICE = torch.device("mps")
    print("------ USING APPLE SILICON GPU ------")

LSTM_features_file_train = "saved_features/EMG_Emb_LSTM_25_dense_D1_train.pkl"
LSTM_features_file_test = "saved_features/EMG_Emb_LSTM_25_dense_D1_test.pkl"
STAT_features_file_train = "saved_features/EMG_Emb_Stat_25_dense_D1_train.pkl"
STAT_features_file_test = "saved_features/EMG_Emb_Stat_25_dense_D1_test.pkl"

------ USING APPLE SILICON GPU ------


### TRAINING

In [6]:
train_dataset = FeaturesExtendedEMGDataset(LSTM_features_file_train)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True, multiprocessing_context='fork' if torch.backends.mps.is_available() else None)

for i in train_loader:
    print(i["features"].shape)
    break

torch.Size([32, 64])


In [None]:
for (rgb_batch_idx, x) in enumerate(train_loader):
    print(x)
    break

In [None]:
model = FC_VAE(dim_input=112, nz=64)
model.to(DEVICE)
print(f'Initial model device: {model.device}')

# Create Optimizer & Scheduler objects
optimizer = Adam(model.parameters(), lr=LR, betas=(0.9, 0.98), eps=1e-9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

train_emg(model, optimizer, EPOCHS, DEVICE, train_loader, train_loader, BATCH_SIZE, scheduler)

torch.save(model.state_dict(), f'./saved_models/VAE_EMG_STAT/final_VAE_EMG_STAT_epoch_{EPOCHS}.pth')

### EVALUATION

In [9]:
train_dataset = FeaturesExtendedEMGDataset(STAT_features_file_train)
test_dataset = FeaturesExtendedEMGDataset(STAT_features_file_test)
train_loader_emg = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
test_loader_emg = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)

model = FC_VAE(dim_input=112, nz=64)
model.to(DEVICE)
model.load_state_dict(torch.load(f'./saved_models/VAE_EMG_STAT/final_VAE_EMG_STAT_epoch_50.pth'))

reconstructed, originals = evaluate_emg(model, DEVICE, train_loader_emg, train_loader_emg)
reconstructed2, originals2 = evaluate_emg(model, DEVICE, test_loader_emg, test_loader_emg)

280it [00:01, 154.37it/s]


Test Loss: 0.0966


31it [00:00, 39.57it/s]


Test Loss: 0.0955


# Fine Tuning Training [ RGB --> EMG ]

### IMPORTS

In [1]:
import torch
from torch.optim import Adam
from torch.utils.data import DataLoader
from utils.loaders import FeaturesExtendedDataset, FeaturesExtendedEMGDataset, ActionNetEmgRgbDataset
from models import I3D
from models import EMG_Feature_Extractor
from utils.args import args
from omegaconf import OmegaConf
import tqdm
import pickle


from models import FC_VAE, LSTM_Emb_Classifier, EMG_Feature_Extractor
from train_vae import train_tuning, evaluate_tuning

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
if torch.backends.mps.is_available():
    DEVICE = torch.device("mps")
    print("------ USING APPLE SILICON GPU ------")

  Referenced from: <0EB69795-4559-3C98-9EA1-35B6A988BB99> /Users/andreavannozzi/GithubProjects/Multimodal-Egocentric-Action-Recognition/env/lib/python3.8/site-packages/torchvision/image.so
  Expected in:     <E4E2FFCA-031E-3974-A7B0-45408D7F4956> /Users/andreavannozzi/GithubProjects/Multimodal-Egocentric-Action-Recognition/env/lib/python3.8/site-packages/torch/lib/libtorch_cpu.dylib
  warn(f"Failed to load image Python extension: {e}")


------ USING APPLE SILICON GPU ------


In [7]:
# ---------------------- I3D ----------------------
# CONFIGURATION FOR I3D
conf_args = OmegaConf.load('configs/I3D_save_feat.yaml')
args = OmegaConf.merge(args, conf_args)

model_rgb = I3D(20, "RGB", args.models['RGB'], **args.models['RGB'].kwargs)
train_augmentations, test_augmentations = model_rgb.get_augmentation('RGB')
model_rgb.to("cpu")

# ---------------------- LSTM ----------------------
# Parametri del modello
input_dim = 16
hidden_dim = 128
embedding_dim = 64
output_dim = 20  # Definisci il numero di classi

model_emg = LSTM_Emb_Classifier(input_dim=input_dim, hidden_dim=hidden_dim, embedding_dim=embedding_dim, num_class=output_dim)
model_emg.load_state_dict(torch.load(f'./saved_models/LSTM_Emb_Classifier/final_LSTM_Emb_epoch_40.pth'))
model_emg.to(DEVICE)

# ---------------------- DATASET ----------------------
# DATASET 25 FRAMES PER CLIP AND 1 SAMPLE PER BATCH
num_frames = 25
num_clips = 1
batch_size = 1
dataset = ActionNetEmgRgbDataset('train', num_frames, num_clips, True, './action-net', 'action-net/saved_emg', "action-net/saved_RGB", 2, train_augmentations)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True)



2024-06-10 16:08:56 LOG INFO Loading Kinetics weights I3D
2024-06-10 16:08:56 LOG INFO  * Skipping Logits weight for 'logits.conv3d.weight'
2024-06-10 16:08:56 LOG INFO  * Skipping Logits weight for 'logits.conv3d.bias'


### FEATURE EXTRACTION ACTION NET s04

In [8]:
embeddings_rgb = []
embeddings_emg = []


model_rgb.train(False)
with torch.no_grad():
    for (idx, (emg,rgb,l)) in enumerate(data_loader):
        e = emg.reshape(batch_size, num_clips, num_frames, -1)  #(num_batch, num_clips, num_frames, num_features)
        # print(e)                  # torch.Size([1, 1, 25, 16])
        emg_input = e[0].float() # torch.Size([25, 16])
        emg_input = emg_input.to(DEVICE)

        batch, _, height, width = rgb.shape
        rgb_reshape = rgb.reshape(1, num_clips, num_frames, -1, height, width)
        rgb_permute = rgb_reshape.permute(1, 0, 3, 2, 4, 5)
        rgb_input = rgb_permute[0].to('cpu')    # CLIP
        # print(rgb_permute.shape)   torch.Size([1, 1, 3 (RGB), 25, 224, 224])
        # print(l)                   tensor([16])
        # ---------------------- RGB EXTRACTION ----------------------
        output_rgb, feat_rgb = model_rgb(rgb_input)
        feat_rgb = feat_rgb["features"]
        sample_rgb = feat_rgb[0]      # torch.Size([1, 1024])
        embeddings_rgb.append(sample_rgb)

        # ---------------------- EMG EXTRACTION ----------------------
        #outputs_emg, feat_emg = model_emg(emg_input)
        #sample_emg = feat_emg[0]      # torch.Size([64])
        #embeddings_emg.append(sample_emg)

        # ---------------------- EMG STAT EXTRACTION ----------------------
        # embeddings = EMG_Feature_Extractor(emg_input[0])
        # embeddings_emg.append(embeddings)

print(len(embeddings_emg))
#print(len(embeddings_emg))

#features_emg = "saved_features/FINE_TUNING_emg_STAT_s04.pkl"
features_rgb = "saved_features/FINE_TUNING_rgb_s04.pkl"

with open(features_rgb, 'wb') as f:
    pickle.dump(embeddings_rgb, f)

#with open(features_emg, 'wb') as f:
    #pickle.dump(embeddings_emg, f)


  Referenced from: <0EB69795-4559-3C98-9EA1-35B6A988BB99> /Users/andreavannozzi/GithubProjects/Multimodal-Egocentric-Action-Recognition/env/lib/python3.8/site-packages/torchvision/image.so
  Expected in:     <E4E2FFCA-031E-3974-A7B0-45408D7F4956> /Users/andreavannozzi/GithubProjects/Multimodal-Egocentric-Action-Recognition/env/lib/python3.8/site-packages/torch/lib/libtorch_cpu.dylib
  warn(f"Failed to load image Python extension: {e}")
  Referenced from: <0EB69795-4559-3C98-9EA1-35B6A988BB99> /Users/andreavannozzi/GithubProjects/Multimodal-Egocentric-Action-Recognition/env/lib/python3.8/site-packages/torchvision/image.so
  Expected in:     <E4E2FFCA-031E-3974-A7B0-45408D7F4956> /Users/andreavannozzi/GithubProjects/Multimodal-Egocentric-Action-Recognition/env/lib/python3.8/site-packages/torch/lib/libtorch_cpu.dylib
  warn(f"Failed to load image Python extension: {e}")
  Referenced from: <0EB69795-4559-3C98-9EA1-35B6A988BB99> /Users/andreavannozzi/GithubProjects/Multimodal-Egocentric-Act

0


### SETUP

In [21]:
BATCH_SIZE = 32
EPOCHS = 50
LR = 0.0001
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-4
STEP_SIZE = 10
GAMMA = 0.1

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
if torch.backends.mps.is_available():
    DEVICE = torch.device("mps")
    print("------ USING APPLE SILICON GPU ------")


------ USING APPLE SILICON GPU ------


In [10]:
import pandas as pd
from utils.loaders import FeaturesTuningDataset

rgb = FeaturesTuningDataset("./saved_features/FINE_TUNING_rgb_s04.pkl")
emg = FeaturesTuningDataset("./saved_features/FINE_TUNING_emg_STAT_s04.pkl")
print(f'RGB dataset size: {len(rgb)}')

loader_rgb = DataLoader(rgb, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, drop_last=True, pin_memory=False)
loader_emg = DataLoader(emg, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, drop_last=True, pin_memory=False)

# print device loaders
for (rgb_batch_idx, x) in enumerate(loader_rgb):
    print(x.device)  # result --> cpu
    break

RGB dataset size: 186
cpu


### TRAINING

In [22]:
from train_vae import loss_function
from utils.logger import setup_logger
from torch.autograd import Variable
from torch.optim import Adam
from train_vae import train_tuning, evaluate_tuning

# LSTM EMG dimension is 64 
# STAT EMG dimension is 112
model_finetune = FC_VAE(dim_input=1024, nz=64, dim_output=112)
model_finetune.to(DEVICE)

# Carica i pesi del modello RGB per l'encoder
checkpoint_rgb = torch.load('./saved_models/VAE_RGB/final_VAE_RGB_epoch_50.pth', map_location=DEVICE)
# Rimuovi il prefisso 'encoder.' dalle chiavi dello state_dict
checkpoint_rgb = {k.replace('encoder.', ''): v for k, v in checkpoint_rgb.items() if 'encoder' in k}
model_finetune.encoder.load_state_dict(checkpoint_rgb)

# Carica i pesi del modello EMG per il decoder
checkpoint_emg = torch.load('./saved_models/VAE_EMG_STAT/final_VAE_EMG_STAT_epoch_50.pth', map_location=DEVICE)
checkpoint_emg = {k.replace('decoder.', ''): v for k, v in checkpoint_emg.items() if 'decoder' in k}
model_finetune.decoder.load_state_dict(checkpoint_emg)

# Create Optimizer & Scheduler objects
optimizer = Adam(model_finetune.parameters(), lr=LR, betas=(0.9, 0.98), eps=1e-9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

train_tuning(model_finetune, optimizer, EPOCHS, DEVICE, loader_rgb, loader_emg, BATCH_SIZE, scheduler)
evaluate_tuning(model_finetune, DEVICE, loader_rgb, loader_emg)

torch.save(model_finetune.state_dict(), f'./saved_models/VAE_Fine_Tuninng/VAE_RGB_to_EMG_STAT_epoch_{EPOCHS}.pth')


2024-06-10 16:13:29 LOG INFO 	Epoch, 1, 	Average Loss: , 3.77432119846344
2024-06-10 16:13:29 LOG INFO 	Epoch, 2, 	Average Loss: , 3.738607108592987
2024-06-10 16:13:29 LOG INFO 	Epoch, 3, 	Average Loss: , 3.5968282222747803
2024-06-10 16:13:29 LOG INFO 	Epoch, 4, 	Average Loss: , 3.693473160266876
2024-06-10 16:13:29 LOG INFO 	Epoch, 5, 	Average Loss: , 3.6377466917037964
2024-06-10 16:13:29 LOG INFO 	Epoch, 6, 	Average Loss: , 3.4457544684410095
2024-06-10 16:13:29 LOG INFO 	Epoch, 7, 	Average Loss: , 3.432223081588745
2024-06-10 16:13:29 LOG INFO 	Epoch, 8, 	Average Loss: , 3.4242323637008667
2024-06-10 16:13:29 LOG INFO 	Epoch, 9, 	Average Loss: , 3.4463340640068054
2024-06-10 16:13:30 LOG INFO 	Epoch, 10, 	Average Loss: , 3.3542668223381042
2024-06-10 16:13:30 LOG INFO 	Epoch, 11, 	Average Loss: , 3.4337971210479736
2024-06-10 16:13:30 LOG INFO 	Epoch, 12, 	Average Loss: , 3.3531784415245056
2024-06-10 16:13:30 LOG INFO 	Epoch, 13, 	Average Loss: , 3.4155396223068237
2024-06-10 16

Test Loss: 0.0716
