# 임종우 최종 정리
## 데이터 처리
- 1. ViTPose를 통해 keypoint 뽑아내기
- 2. 좌우 반전 데이터 증강 적용
- 3. 각 sample당 x좌표, y좌표의 평균 및 표준편차 구해 standardization 적용

## 모델링

### Without CNN
- 1. RandomForest -> acc 0.969
- 2. Multi Layer Perceptron -> acc 0.94

### With CNN
 : CNN의 경우 (25,2) 형태의 keypoint 데이터를 활용하였음
- 3. CNN with Conv1d -> acc 0.94
- 4. CNN with Conv2d -> acc 0.95
- 5. Pre-trained CNN(MobileNet_v2) -> acc 0.91

# 라이브러리 임포트 및 드라이브 마운트

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import cv2
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd

# From TaekwonDataset infer keypoint

In [None]:
keypoints = np.load("/content/gdrive/MyDrive/kubig_pose/keypoints.npy", allow_pickle=True)
labels = np.load("/content/gdrive/MyDrive/kubig_pose/labels.npy", allow_pickle=True)

# From keypoint to classification

In [None]:
def keypoint_flatten(keypoints, label):
  columns = []

  for i in range(25):
    columns.extend([f'y{i}', f'x{i}'])
  columns.append('label')
  ml_data = pd.DataFrame(columns = columns)


  for idx, keypoint in enumerate(keypoints):
    flattend_data = []

    try :
      for i in keypoint[0]:
        flattend_data.append(i[0])
        flattend_data.append(i[1])
      flattend_data.append(label[idx])
      ml_data.loc[idx] = flattend_data
    except :
      pass
  return ml_data

In [None]:
df = keypoint_flatten(keypoints, labels)
x = df.drop(columns = 'label')
y = df['label']

## 데이터 증강 by Flipping Keypoints

In [None]:
# 새로운 DataFrame 생성하여 추가할 데이터 저장
augmented_data = []

for idx, row in df.iterrows():
    augmented_row = {}
    for col_name, value in row.items():
        if col_name.startswith('x'):
            new_value = 1920 - value  # 1920에서 뺀 값으로 대체
            augmented_row[col_name] = new_value
        else:
            augmented_row[col_name] = value
    augmented_data.append(augmented_row)

# 새로운 데이터를 포함한 DataFrame 생성
augmented_df = pd.DataFrame(augmented_data)

# 원래 데이터와 확장된 데이터 결합
final_df = pd.concat([df, augmented_df], ignore_index=True)

In [None]:
x = final_df.drop(columns = 'label')
y = final_df['label']

# 1. RandomForest
- 0.92
- 0.969 with standardization of coordinates

In [None]:
from sklearn.model_selection import train_test_split
train_x, valid_x, train_y, valid_y = train_test_split(x, y, stratify = y, test_size = 0.15, random_state = 42)

In [None]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(random_state = 42)

In [None]:
model.fit(train_x, train_y)

In [None]:
model.score(valid_x, valid_y)

0.9206521739130434

## With Standardization of coordinates
- 이미지 내에서 bbox의 위치도 상관없어지고, 인물의 체형도 상관없어지는 효과 있을 것으로 기대
- 성능 0.92 -> 0.96으로 상승

In [None]:
standardized_df = final_df.copy()

def standardization(value, mean, std):
  return (value - mean) / std

for idx, row in standardized_df.iterrows():
    x_coord = []
    y_coord = []
    for col_name, value in row.items():
        if col_name.startswith('x'):
          x_coord.append(value)
        elif col_name.startswith('y'):
          y_coord.append(value)

    x_mean = np.array(x_coord).mean()
    x_std = np.array(x_coord).std()
    y_mean = np.array(y_coord).mean()
    y_std = np.array(y_coord).std()

    for col_name, value in row.items():
        if col_name.startswith('x'):
          standardized_df.loc[idx, col_name] = standardization(value, x_mean, x_std)
        elif col_name.startswith('y'):
          standardized_df.loc[idx, col_name] = standardization(value, y_mean, y_std)

standardized_df[:5]

Unnamed: 0,y0,x0,y1,x1,y2,x2,y3,x3,y4,x4,...,x20,y21,x21,y22,x22,y23,x23,y24,x24,label
0,-1.21171,-0.084188,-1.273957,0.292913,-1.278465,-0.404937,-1.250055,0.81883,-1.259836,-0.87499,...,0.819882,1.361415,0.474975,1.148089,-0.759188,1.111227,-1.109168,1.014218,-0.625431,0.0
1,-1.229617,-0.876085,-1.280594,-0.789579,-1.283216,-0.837689,-1.249739,-0.39969,-1.266595,-0.083295,...,-0.739517,1.340933,0.114045,1.225158,1.167877,1.196825,1.233221,1.183299,2.159367,0.0
2,-1.22308,-0.091482,-1.283774,0.267598,-1.282121,-0.461548,-1.270756,0.805753,-1.270191,-1.026884,...,0.874417,1.348272,0.543388,1.169636,-0.758669,1.139483,-1.162166,1.03953,-0.803478,0.0
3,-1.244221,-0.738662,-1.294174,-0.586785,-1.299885,-0.698386,-1.261048,-0.019007,-1.26303,-0.087514,...,-1.150055,1.34659,-0.074402,1.202012,0.816008,1.17367,0.879507,1.189782,1.935486,0.0
4,-1.229815,-0.046309,-1.288162,0.28596,-1.285892,-0.375895,-1.26956,0.764366,-1.263482,-0.878845,...,0.800977,1.336187,0.507166,1.168678,-0.920245,1.13877,-1.293106,1.04081,-0.741703,0.0


In [None]:
x = standardized_df.drop(columns = 'label')
y = standardized_df['label']

In [None]:
from sklearn.model_selection import train_test_split
train_x, valid_x, train_y, valid_y = train_test_split(x, y, stratify = y, test_size = 0.15, random_state = 42)

In [None]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(random_state = 42)

In [None]:
model.fit(train_x, train_y)

In [None]:
model.score(valid_x, valid_y)

0.9695652173913043

# 2. Basic MLP
- Adam, 0.001, hidden 레이어 1개, epoch 100, 0.93
- Adam, 0.001, hidden 레이어 2개, epoch 100, 0.93
- Adam, 0.001, hidden 레이어 2개, epoch 150, 0.94

In [None]:
def from_df_to_tensor(df):
  data = []
  label = []
  for idx, row in df.iterrows():
    sample = []
    for col_name, value in row.items():
      if col_name == 'label' :
        label.append(value)
      else:
        sample.append(value)
    data.append(np.array(sample))
  return np.array(data), np.array(label)

In [None]:
x, y = from_df_to_tensor(standardized_df)

In [None]:
from sklearn.model_selection import train_test_split
train_x, valid_x, train_y, valid_y = train_test_split(x, y, stratify = y, test_size = 0.15, random_state = 42)

In [None]:
class CustomKeypointsDataset(torch.utils.data.Dataset):
    def __init__(self, keypoints, labels):
        self.keypoints = keypoints
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        keypoint = torch.tensor(self.keypoints[idx])
        label = torch.tensor(self.labels[idx])
        return keypoint, label

In [None]:
SkeletonTrainDataset = CustomKeypointsDataset(train_x, train_y)
SkeletonValidDataset = CustomKeypointsDataset(valid_x, valid_y)

In [None]:
TrainDataloader = torch.utils.data.DataLoader(SkeletonTrainDataset, batch_size = 128, shuffle = True)
ValidDataloader = torch.utils.data.DataLoader(SkeletonValidDataset, batch_size = 128,shuffle = False)

In [None]:
import torch
from torch import nn
import torch.nn.functional as F

class basic_mlp(nn.Module):
  def __init__(self):
    super().__init__()
    self.fc1 = nn.Linear(50, 64, dtype = float)
    self.fc2 = nn.Linear(64,128, dtype = float)
    self.fc3 = nn.Linear(128, 1024, dtype = float)
    self.fc4 = nn.Linear(1024, 47, dtype = float)
    self.dropout = nn.Dropout(p = 0.3)

  def forward(self, x):
    x = self.fc1(x)
    x = F.relu(x)
    x = self.fc2(x)
    x = F.relu(x)
    x = self.fc3(x)
    x = F.relu(x)
    x = self.dropout(x)
    x = self.fc4(x)
    # x = F.softmax(x) -> nn.CrossEntropyLoss 내에 softmax 연산이 포함됨
    return x

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = basic_mlp()
model.to(device)

basic_mlp(
  (fc1): Linear(in_features=50, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=1024, bias=True)
  (fc4): Linear(in_features=1024, out_features=47, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
for epoch in range(150):
  train_loss = []
  train_acc = []
  valid_loss = []
  valid_acc = []

  for skeleton, cls in iter(TrainDataloader):
    optimizer.zero_grad()

    skeleton, cls = skeleton.to(device), cls.to(device).long()

    output = model(skeleton)
    loss = criterion(output, cls)
    train_loss.append(loss.item())
    train_acc.append(sum(torch.max(output, dim=1)[1] == cls) / skeleton.shape[0])
    loss.backward()
    optimizer.step()

  with torch.no_grad():
    for skeleton, cls in iter(ValidDataloader):
      skeleton, cls = skeleton.to(device), cls.to(device).long()
      output = model(skeleton)
      loss = criterion(output, cls)
      valid_loss.append(loss.item())
      valid_acc.append(sum(torch.max(output, dim=1)[1] == cls) / skeleton.shape[0])

  print(f'epoch {epoch+1} -- train_loss : {sum(train_loss[-len(TrainDataloader):]) / len(TrainDataloader):.5f} \
  train_acc : {sum(train_acc[-len(TrainDataloader):]) / len(TrainDataloader):.5f}\
   valid_loss : {sum(valid_loss[-len(ValidDataloader):]) / len(ValidDataloader):.5f} \
  valid_acc : {sum(valid_acc[-len(ValidDataloader):]) / len(ValidDataloader):.5f}')

epoch 1 -- train_loss : 3.53028   train_acc : 0.14422   valid_loss : 3.36329   valid_acc : 0.15592
epoch 2 -- train_loss : 3.02039   train_acc : 0.20380   valid_loss : 2.70718   valid_acc : 0.24740
epoch 3 -- train_loss : 2.37480   train_acc : 0.31471   valid_loss : 2.04779   valid_acc : 0.40853
epoch 4 -- train_loss : 1.78559   train_acc : 0.45929   valid_loss : 1.58485   valid_acc : 0.51074
epoch 5 -- train_loss : 1.38622   train_acc : 0.56780   valid_loss : 1.27651   valid_acc : 0.61198
epoch 6 -- train_loss : 1.10935   train_acc : 0.64216   valid_loss : 1.01755   valid_acc : 0.67871
epoch 7 -- train_loss : 0.94741   train_acc : 0.68529   valid_loss : 0.88122   valid_acc : 0.71647
epoch 8 -- train_loss : 0.80125   train_acc : 0.73414   valid_loss : 0.79781   valid_acc : 0.73275
epoch 9 -- train_loss : 0.74019   train_acc : 0.74490   valid_loss : 0.83655   valid_acc : 0.69401
epoch 10 -- train_loss : 0.66211   train_acc : 0.77457   valid_loss : 0.70174   valid_acc : 0.75423
epoch 11 

 # CNN(Conv1d)
 - 여러개의 필터 직렬 적용, epoch 100, 0.94
 - 여러개의 필터 병렬 적용, epoch 100, 0.92

In [None]:
def from_df_to_cnn(df):
  data = df.drop(columns = 'label')
  labels = df['label']

  data_list = []

  for idx, rows in data.iterrows():
    data_list.append(np.array(rows).reshape(-1,2))

  return np.array(data_list), np.array(labels)

In [None]:
x, y = from_df_to_cnn(standardized_df)

In [None]:
from sklearn.model_selection import train_test_split
train_x, valid_x, train_y, valid_y = train_test_split(x, y, stratify = y, test_size = 0.15, random_state = 42)

In [None]:
class CustomKeypointsDataset(torch.utils.data.Dataset):
    def __init__(self, keypoints, labels):
        self.keypoints = keypoints
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        keypoint = torch.tensor(self.keypoints[idx].T, dtype = torch.float)
        label = torch.tensor(self.labels[idx], dtype = torch.float)
        return keypoint, label

In [None]:
SkeletonTrainDataset = CustomKeypointsDataset(train_x, train_y)
SkeletonValidDataset = CustomKeypointsDataset(valid_x, valid_y)

In [None]:
TrainDataloader = torch.utils.data.DataLoader(SkeletonTrainDataset, batch_size = 128, shuffle = True)
ValidDataloader = torch.utils.data.DataLoader(SkeletonValidDataset, batch_size = 128,shuffle = False)

In [None]:
# Basic CNN with conv1d

class VanilaCNN(nn.Module):
  def __init__(self) :
    super(VanilaCNN, self).__init__()
    self.conv1 = nn.Conv1d(2, 16, 3)
    self.conv2 = nn.Conv1d(16, 32, 3)
    self.maxpool = nn.MaxPool1d(2)
    self.fc1 = nn.Linear(32*4, 128)
    self.fc2 = nn.Linear(128,47)

  def forward(self, x):
    x = self.conv1(x)
    x = self.maxpool(x)
    x = self.conv2(x)
    x = self.maxpool(x)
    x = x.view(-1, 32*4)
    x = self.fc1(x)
    x = F.relu(x)
    x = self.fc2(x)
    return x

model = VanilaCNN()
print(model(next(iter(TrainDataloader))[0]).shape)
model.to(device)

torch.Size([128, 47])


VanilaCNN(
  (conv1): Conv1d(2, 16, kernel_size=(3,), stride=(1,))
  (conv2): Conv1d(16, 32, kernel_size=(3,), stride=(1,))
  (maxpool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=128, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=47, bias=True)
)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(100):
  train_loss = []
  train_acc = []
  valid_loss = []
  valid_acc = []

  for skeleton, cls in iter(TrainDataloader):
    optimizer.zero_grad()

    skeleton, cls = skeleton.to(device), cls.to(device).long()

    output = model(skeleton)
    loss = criterion(output, cls)
    train_loss.append(loss.item())
    train_acc.append(sum(torch.max(output, dim=1)[1] == cls) / skeleton.shape[0])
    loss.backward()
    optimizer.step()

  with torch.no_grad():
    for skeleton, cls in iter(ValidDataloader):
      skeleton, cls = skeleton.to(device), cls.to(device).long()
      output = model(skeleton)
      loss = criterion(output, cls)
      valid_loss.append(loss.item())
      valid_acc.append(sum(torch.max(output, dim=1)[1] == cls) / skeleton.shape[0])

  print(f'epoch {epoch+1} -- train_loss : {sum(train_loss[-len(TrainDataloader):]) / len(TrainDataloader):.5f} \
  train_acc : {sum(train_acc[-len(TrainDataloader):]) / len(TrainDataloader):.5f}\
   valid_loss : {sum(valid_loss[-len(ValidDataloader):]) / len(ValidDataloader):.5f} \
  valid_acc : {sum(valid_acc[-len(ValidDataloader):]) / len(ValidDataloader):.5f}')

epoch 1 -- train_loss : 3.61090   train_acc : 0.15119   valid_loss : 3.50960   valid_acc : 0.16797
epoch 2 -- train_loss : 3.39223   train_acc : 0.17459   valid_loss : 3.29285   valid_acc : 0.17480
epoch 3 -- train_loss : 3.06773   train_acc : 0.22793   valid_loss : 2.88734   valid_acc : 0.25358
epoch 4 -- train_loss : 2.65635   train_acc : 0.28325   valid_loss : 2.48012   valid_acc : 0.30534
epoch 5 -- train_loss : 2.27329   train_acc : 0.35466   valid_loss : 2.14309   valid_acc : 0.38249
epoch 6 -- train_loss : 1.95540   train_acc : 0.42421   valid_loss : 1.82259   valid_acc : 0.46354
epoch 7 -- train_loss : 1.68365   train_acc : 0.49428   valid_loss : 1.57695   valid_acc : 0.52637
epoch 8 -- train_loss : 1.45865   train_acc : 0.55692   valid_loss : 1.39641   valid_acc : 0.59277
epoch 9 -- train_loss : 1.27839   train_acc : 0.60393   valid_loss : 1.19914   valid_acc : 0.62630
epoch 10 -- train_loss : 1.14055   train_acc : 0.63923   valid_loss : 1.08032   valid_acc : 0.66374
epoch 11 

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from tqdm import tqdm

class CNN_network(nn.Module):

    def __init__(self):
        super(CNN_network,self).__init__()
        self.kernel = [2,3,4]
        self.output_size = 128

        #Convolution layer
        self.conv1 = nn.Conv1d(in_channels = 2, out_channels =self.output_size, kernel_size = self.kernel[0], stride=1)
        self.conv2 = nn.Conv1d(in_channels = 2, out_channels =self.output_size, kernel_size = self.kernel[1], stride=1)
        self.conv3 = nn.Conv1d(in_channels = 2, out_channels =self.output_size, kernel_size = self.kernel[2], stride=1)

        #pooling layer
        self.pool1 = nn.MaxPool1d(self.kernel[0],stride = 1)
        self.pool2 = nn.MaxPool1d(self.kernel[1],stride = 1)
        self.pool3 = nn.MaxPool1d(self.kernel[2],stride = 1)

        #Dropout & FC layer
        self.dropout = nn.Dropout(0.25)
        self.linear1 = nn.Linear(8064,1024)
        self.linear2 = nn.Linear(1024,128)
        self.linear3 = nn.Linear(128,47)

    def forward(self,x):

        x1 = self.conv1(x)
        x1 = self.pool1(x1)


        x2 = self.conv2(x)
        x2 = self.pool2(x2)

        x3 = self.conv3(x)
        x3 = self.pool3(x3)

        x_concat = torch.cat((x1,x2,x3),2)
        x_concat = torch.flatten(x_concat,1)

        out = self.linear1(x_concat)
        out = self.dropout(out)
        out = self.linear2(out)
        out = self.dropout(out)
        out = self.linear3(out)

        return out

In [None]:
model = CNN_network()
print(model(next(iter(TrainDataloader))[0]).shape)
model.to(device)

torch.Size([128, 47])


CNN_network(
  (conv1): Conv1d(2, 128, kernel_size=(2,), stride=(1,))
  (conv2): Conv1d(2, 128, kernel_size=(3,), stride=(1,))
  (conv3): Conv1d(2, 128, kernel_size=(4,), stride=(1,))
  (pool1): MaxPool1d(kernel_size=2, stride=1, padding=0, dilation=1, ceil_mode=False)
  (pool2): MaxPool1d(kernel_size=3, stride=1, padding=0, dilation=1, ceil_mode=False)
  (pool3): MaxPool1d(kernel_size=4, stride=1, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.25, inplace=False)
  (linear1): Linear(in_features=8064, out_features=1024, bias=True)
  (linear2): Linear(in_features=1024, out_features=128, bias=True)
  (linear3): Linear(in_features=128, out_features=47, bias=True)
)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(100):
  train_loss = []
  train_acc = []
  valid_loss = []
  valid_acc = []

  for skeleton, cls in iter(TrainDataloader):
    optimizer.zero_grad()

    skeleton, cls = skeleton.to(device), cls.to(device).long()

    output = model(skeleton)
    loss = criterion(output, cls)
    train_loss.append(loss.item())
    train_acc.append(sum(torch.max(output, dim=1)[1] == cls) / skeleton.shape[0])
    loss.backward()
    optimizer.step()

  with torch.no_grad():
    for skeleton, cls in iter(ValidDataloader):
      skeleton, cls = skeleton.to(device), cls.to(device).long()
      output = model(skeleton)
      loss = criterion(output, cls)
      valid_loss.append(loss.item())
      valid_acc.append(sum(torch.max(output, dim=1)[1] == cls) / skeleton.shape[0])

  print(f'epoch {epoch+1} -- train_loss : {sum(train_loss[-len(TrainDataloader):]) / len(TrainDataloader):.5f} \
  train_acc : {sum(train_acc[-len(TrainDataloader):]) / len(TrainDataloader):.5f}\
   valid_loss : {sum(valid_loss[-len(ValidDataloader):]) / len(ValidDataloader):.5f} \
  valid_acc : {sum(valid_acc[-len(ValidDataloader):]) / len(ValidDataloader):.5f}')

epoch 1 -- train_loss : 5.94230   train_acc : 0.13236   valid_loss : 2.86244   valid_acc : 0.26823
epoch 2 -- train_loss : 2.47795   train_acc : 0.32803   valid_loss : 2.19817   valid_acc : 0.37370
epoch 3 -- train_loss : 1.89937   train_acc : 0.44308   valid_loss : 1.59647   valid_acc : 0.50358
epoch 4 -- train_loss : 1.52847   train_acc : 0.52450   valid_loss : 1.38278   valid_acc : 0.55143
epoch 5 -- train_loss : 1.23272   train_acc : 0.60464   valid_loss : 1.16501   valid_acc : 0.59538
epoch 6 -- train_loss : 1.01281   train_acc : 0.66453   valid_loss : 1.03604   valid_acc : 0.65234
epoch 7 -- train_loss : 0.90995   train_acc : 0.69765   valid_loss : 0.83355   valid_acc : 0.73665
epoch 8 -- train_loss : 0.82497   train_acc : 0.72234   valid_loss : 0.80071   valid_acc : 0.75684
epoch 9 -- train_loss : 0.76278   train_acc : 0.74341   valid_loss : 0.86582   valid_acc : 0.73079
epoch 10 -- train_loss : 0.70594   train_acc : 0.75461   valid_loss : 0.79965   valid_acc : 0.73242
epoch 11 

# CNN with conv2d
- epoch 100, 0.95

In [None]:
class CNN2d(nn.Module):
  def __init__(self) :
    super(CNN2d, self).__init__()
    self.conv1 = nn.Conv2d(1, 32, 3, padding = 1)
    self.conv2 = nn.Conv2d(32, 64, 3, padding = 1)
    self.bn1 = nn.BatchNorm2d(64)
    self.maxpool = nn.MaxPool2d(2)
    self.conv3 = nn.Conv2d(64, 64, 3)
    self.conv4 = nn.Conv2d(64, 128, 3)
    self.bn2 = nn.BatchNorm2d(128)
    self.fc1 = nn.Linear(768, 128)
    self.fc2 = nn.Linear(128,256)
    self.fc3 = nn.Linear(256,47)
    self.drop = nn.Dropout(0.2)

  def forward(self, x):
    x = self.conv1(x)
    x = self.conv2(x)
    x = self.bn1(x)
    x = self.maxpool(x)
    x = F.relu(x)

    x = x.flatten(start_dim = 1)
    x = self.fc1(x)
    x = self.drop(x)
    x = self.fc2(x)
    x = self.drop(x)
    x = self.fc3(x)

    return x

In [None]:
model = CNN2d()
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(100):
  train_loss = []
  train_acc = []
  valid_loss = []
  valid_acc = []

  for skeleton, cls in iter(TrainDataloader):
    optimizer.zero_grad()

    skeleton, cls = skeleton.to(device), cls.to(device).long()

    output = model(skeleton.unsqueeze(1))
    loss = criterion(output, cls)
    train_loss.append(loss.item())
    train_acc.append(sum(torch.max(output, dim=1)[1] == cls) / skeleton.shape[0])
    loss.backward()
    optimizer.step()

  with torch.no_grad():
    for skeleton, cls in iter(ValidDataloader):
      skeleton, cls = skeleton.to(device), cls.to(device).long()
      output = model(skeleton.unsqueeze(1))
      loss = criterion(output, cls)
      valid_loss.append(loss.item())
      valid_acc.append(sum(torch.max(output, dim=1)[1] == cls) / skeleton.shape[0])

  print(f'epoch {epoch+1} -- train_loss : {sum(train_loss[-len(TrainDataloader):]) / len(TrainDataloader):.5f} \
  train_acc : {sum(train_acc[-len(TrainDataloader):]) / len(TrainDataloader):.5f}\
   valid_loss : {sum(valid_loss[-len(ValidDataloader):]) / len(ValidDataloader):.5f} \
  valid_acc : {sum(valid_acc[-len(ValidDataloader):]) / len(ValidDataloader):.5f}')

epoch 1 -- train_loss : 3.31274   train_acc : 0.19105   valid_loss : 2.83573   valid_acc : 0.26530
epoch 2 -- train_loss : 2.37152   train_acc : 0.35911   valid_loss : 1.89673   valid_acc : 0.46322
epoch 3 -- train_loss : 1.55866   train_acc : 0.54163   valid_loss : 1.36447   valid_acc : 0.56901
epoch 4 -- train_loss : 1.07648   train_acc : 0.65700   valid_loss : 0.87632   valid_acc : 0.72266
epoch 5 -- train_loss : 0.78096   train_acc : 0.75239   valid_loss : 0.73884   valid_acc : 0.76400
epoch 6 -- train_loss : 0.67484   train_acc : 0.77791   valid_loss : 0.70109   valid_acc : 0.75423
epoch 7 -- train_loss : 0.58014   train_acc : 0.80678   valid_loss : 0.50901   valid_acc : 0.84635
epoch 8 -- train_loss : 0.47815   train_acc : 0.84282   valid_loss : 0.48043   valid_acc : 0.84342
epoch 9 -- train_loss : 0.46711   train_acc : 0.83958   valid_loss : 0.45843   valid_acc : 0.84701
epoch 10 -- train_loss : 0.42000   train_acc : 0.84805   valid_loss : 0.41661   valid_acc : 0.85547
epoch 11 

# Pre-trained model
- mobilenet_v2, epoch 100, 0.91

In [None]:
import torchvision
model = torchvision.models.mobilenet_v2(pretrained = True)
model.features[0][0] = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
model.classifier[1] = nn.Linear(in_features=1280, out_features=47, bias=True)
model.to(device)



MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(100):
  train_loss = []
  train_acc = []
  valid_loss = []
  valid_acc = []

  for skeleton, cls in iter(TrainDataloader):
    optimizer.zero_grad()

    skeleton, cls = skeleton.to(device), cls.to(device).long()

    output = model(skeleton.unsqueeze(1))
    loss = criterion(output, cls)
    train_loss.append(loss.item())
    train_acc.append(sum(torch.max(output, dim=1)[1] == cls) / skeleton.shape[0])
    loss.backward()
    optimizer.step()

  with torch.no_grad():
    for skeleton, cls in iter(ValidDataloader):
      skeleton, cls = skeleton.to(device), cls.to(device).long()
      output = model(skeleton.unsqueeze(1))
      loss = criterion(output, cls)
      valid_loss.append(loss.item())
      valid_acc.append(sum(torch.max(output, dim=1)[1] == cls) / skeleton.shape[0])

  print(f'epoch {epoch+1} -- train_loss : {sum(train_loss[-len(TrainDataloader):]) / len(TrainDataloader):.5f} \
  train_acc : {sum(train_acc[-len(TrainDataloader):]) / len(TrainDataloader):.5f}\
   valid_loss : {sum(valid_loss[-len(ValidDataloader):]) / len(ValidDataloader):.5f} \
  valid_acc : {sum(valid_acc[-len(ValidDataloader):]) / len(ValidDataloader):.5f}')

epoch 1 -- train_loss : 2.46935   train_acc : 0.34066   valid_loss : 1.60310   valid_acc : 0.49577
epoch 2 -- train_loss : 1.25396   train_acc : 0.59874   valid_loss : 1.11170   valid_acc : 0.64062
epoch 3 -- train_loss : 0.87850   train_acc : 0.69105   valid_loss : 0.91385   valid_acc : 0.68132
epoch 4 -- train_loss : 0.70783   train_acc : 0.75171   valid_loss : 0.82565   valid_acc : 0.70215
epoch 5 -- train_loss : 0.63448   train_acc : 0.76931   valid_loss : 0.73019   valid_acc : 0.76335
epoch 6 -- train_loss : 0.58162   train_acc : 0.79789   valid_loss : 0.77123   valid_acc : 0.76400
epoch 7 -- train_loss : 0.43288   train_acc : 0.84206   valid_loss : 0.65532   valid_acc : 0.77344
epoch 8 -- train_loss : 0.41944   train_acc : 0.84923   valid_loss : 0.68675   valid_acc : 0.77572
epoch 9 -- train_loss : 0.40475   train_acc : 0.85848   valid_loss : 0.65487   valid_acc : 0.79720
epoch 10 -- train_loss : 0.37769   train_acc : 0.86732   valid_loss : 0.56914   valid_acc : 0.82487
epoch 11 