In [1]:
import torch
import torchvision
from torchvision import transforms, datasets
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
# from torchvision.models import resnet18
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score
import seaborn as sns
import os
import sys
import argparse
import json
import pandas as pd
import numpy as np
from datetime import datetime
import time
from matplotlib import pyplot as plt
from lopo_dataset import LopoDataset
from image_representations.base_image_representation import BaseImageRepresentation
from models import BaseModel

In [2]:
image_method_type = BaseImageRepresentation.get_by_name("Skeleton-DML")
image_method = image_method_type()

In [3]:
df = pd.read_csv("../00_datasets/dataset_output/libras_ufop/libras_ufop_openpose.csv")

In [4]:
num_features = len(df["category"].unique())

In [5]:
base_model = BaseModel.get_by_name("vit_medium")(num_features)
model = base_model.get_model()

In [6]:
transform = transforms.Compose([
    # transforms.Resize((224, 224)),
    transforms.Resize(base_model.image_size),
    transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [9]:
seed = 1638102311
frames = 60

In [10]:
train_dataset = LopoDataset(df, frames, transform, transform_distance=False, augment=True, person_out=[1], seed=seed, image_method=image_method)
validate_dataset = LopoDataset(df, frames, transform, transform_distance=False, augment=False, person_in=[1], seed=seed, image_method=image_method)

In [11]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
validate_loader = DataLoader(validate_dataset, batch_size=64, shuffle=False)

In [12]:
criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(resnet.parameters(), lr=10e-5)
optimizer_parameters = {
    "lr": 0.0001,
    "weight_decay": 0.0001
}
optimizer = optim.Adam(model.parameters(), **optimizer_parameters)

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 512, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=512, out_features=1536, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=512, out_features=512, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): LayerScale()
      (drop_path1): Identity()
      (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=512, out_features=2048, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identit

In [14]:
num_features

56

In [34]:
model.train()
running_loss = 0.0
correct_train = 0
total_train = 0
for inputs, labels in train_loader:
    if len(inputs) == 1:
        continue
    inputs, labels = inputs.to(device), labels.to(device)
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    running_loss += loss.item() * inputs.size(0)

    _, predicted = torch.max(outputs, 1)
    total_train += labels.size(0)
    correct_train += (predicted == labels).sum().item()

epoch_loss = running_loss / len(train_loader.dataset)
train_accuracy = correct_train / total_train
print(f"Loss: {epoch_loss}")
print(f"Train Accuracy: {train_accuracy}")

Loss: 4.0386378348538425
Train Accuracy: 0.016935150764147047


Loss: 4.040253164649059

Train Accuracy: 0.01486988847583643

In [19]:
outputs.shape

torch.Size([53, 56])

In [20]:
inputs.shape

torch.Size([53, 3, 256, 256])

In [21]:
labels.shape

torch.Size([53])

In [29]:
outputs[0].argmax()

tensor(43, device='cuda:0')

In [30]:
outputs[0][43]

tensor(1., device='cuda:0', grad_fn=<SelectBackward0>)

In [32]:
outputs[0][42]

tensor(4.3818e-17, device='cuda:0', grad_fn=<SelectBackward0>)

In [31]:
outputs

tensor([[5.5101e-18, 2.6741e-17, 5.2376e-16,  ..., 1.3759e-13, 1.1304e-15,
         3.0011e-17],
        [6.2183e-18, 2.9061e-17, 5.8161e-16,  ..., 1.4060e-13, 1.1238e-15,
         3.1449e-17],
        [5.2490e-18, 2.5795e-17, 4.9825e-16,  ..., 1.3753e-13, 1.1392e-15,
         3.0194e-17],
        ...,
        [5.1470e-18, 2.4628e-17, 4.9104e-16,  ..., 1.3858e-13, 1.1332e-15,
         2.9582e-17],
        [5.2126e-18, 2.5506e-17, 4.9321e-16,  ..., 1.3795e-13, 1.1450e-15,
         3.0287e-17],
        [5.3815e-18, 2.6353e-17, 5.0548e-16,  ..., 1.3753e-13, 1.1630e-15,
         3.1239e-17]], device='cuda:0', grad_fn=<SoftmaxBackward0>)

In [33]:
labels[0]

tensor(21, device='cuda:0')

In [38]:
1e-4 == 0.0001

True