In [1]:
!pip install kaggle



In [2]:
!mkdir ~/.kaggle

In [3]:
!cp kaggle.json ~/.kaggle/

In [4]:
!chmod 600 ~/.kaggle/kaggle.json

In [5]:
! kaggle competitions download ml-intensive-yandex-academy-autumn-2024

Downloading ml-intensive-yandex-academy-autumn-2024.zip to /content
100% 7.90G/7.90G [00:42<00:00, 242MB/s]
100% 7.90G/7.90G [00:42<00:00, 200MB/s]


In [6]:
!mkdir data

In [7]:
!unzip ml-intensive-yandex-academy-autumn-2024.zip -d data

[1;30;43mВыходные данные были обрезаны до нескольких последних строк (5000).[0m
  inflating: data/human_poses_data/img_train/3414.jpg  
  inflating: data/human_poses_data/img_train/3417.jpg  
  inflating: data/human_poses_data/img_train/3418.jpg  
  inflating: data/human_poses_data/img_train/3420.jpg  
  inflating: data/human_poses_data/img_train/3421.jpg  
  inflating: data/human_poses_data/img_train/3422.jpg  
  inflating: data/human_poses_data/img_train/3423.jpg  
  inflating: data/human_poses_data/img_train/3424.jpg  
  inflating: data/human_poses_data/img_train/3425.jpg  
  inflating: data/human_poses_data/img_train/3427.jpg  
  inflating: data/human_poses_data/img_train/3428.jpg  
  inflating: data/human_poses_data/img_train/3429.jpg  
  inflating: data/human_poses_data/img_train/3431.jpg  
  inflating: data/human_poses_data/img_train/3432.jpg  
  inflating: data/human_poses_data/img_train/3434.jpg  
  inflating: data/human_poses_data/img_train/3435.jpg  
  inflating: data/huma

In [10]:
import sys
import os

import torch
import csv
import numpy as np
import matplotlib.pyplot as plt

from torchvision.transforms import v2
from torch.utils.data import Dataset, DataLoader, random_split
from torch import nn
from PIL import Image
from torchsummary import summary

In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [12]:
PATH = 'data/human_poses_data/'
transforms = v2.Compose([v2.PILToTensor(),
                         v2.Resize(size=(640, 640)),
                         v2.ToDtype(torch.float32),
                         ])

In [13]:
def read_image(path):
    return Image.open(path)


def create_dataset(X_path):
    res = []
    ids = sorted(os.listdir(X_path), key=lambda x: int(x[:-4]))
    for i in ids:
        res.append(read_image(X_path + '/' + i))
    return res

In [14]:
X = create_dataset(PATH + 'img_train')

In [15]:
y = []
with open(PATH + 'train_answers.csv') as f:
    table = csv.reader(f)
    y = sorted([i for i in table][1:], key=lambda x: int(x[0]))

In [16]:
train_y = nn.functional.one_hot(torch.tensor([int(i[1]) for i in y]))

In [17]:
categories = []
with open(PATH + 'activity_categories.csv') as f:
    table = csv.reader(f)
    categories =[i for i in table][1:]

In [18]:
counts = [0] * len(categories)
for i in y:
    counts[int(i[1])] += 1
counts

[2512,
 135,
 530,
 1680,
 752,
 982,
 800,
 0,
 533,
 1223,
 390,
 528,
 469,
 442,
 228,
 0,
 753,
 0,
 410,
 0]

In [19]:
class CustomDataset(Dataset):
    def __init__(self, X, y, transforms):
        self.X = X
        self.y = torch.as_tensor(y, dtype=torch.float32)
        self.transforms = transforms

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.transforms(self.X[idx]), self.y[idx]

In [20]:
train_val_dataset = CustomDataset(X, train_y, transforms)
train, val = random_split(train_val_dataset, [0.8, 0.2])

In [21]:
len(train_val_dataset[1][1])

19

In [22]:
class LinearNeuralNetwork(nn.Module):
    def __init__(self, img_shape=(3, 640, 640)):
        super().__init__()
        self.linear = nn.Sequential(
            nn.Flatten(),
            nn.Linear(np.prod(img_shape), 500),
            nn.ReLU(),
            nn.Linear(500, 100),
            nn.Linear(100, 19),
        )

    def forward(self, x):
        return self.linear(x)

In [23]:
model = LinearNeuralNetwork().to(device)

summary(model, (3, 640, 640))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1              [-1, 1228800]               0
            Linear-2                  [-1, 500]     614,400,500
              ReLU-3                  [-1, 500]               0
            Linear-4                  [-1, 100]          50,100
            Linear-5                   [-1, 19]           1,919
Total params: 614,452,519
Trainable params: 614,452,519
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 4.69
Forward/backward pass size (MB): 9.38
Params size (MB): 2343.95
Estimated Total Size (MB): 2358.02
----------------------------------------------------------------


In [24]:
opt = torch.optim.Adam(model.parameters(), lr=1e-3)
loss = nn.CrossEntropyLoss()

In [25]:
from tqdm import tqdm


def run_epoch(model, opt, loss, dataloader, is_train=True):
    model.train(is_train)
    total_loss = 0.0
    with torch.set_grad_enabled(is_train):
        for x, y in tqdm(dataloader):
            x = x.to(device)
            l = loss(model(x), y)
            if (is_train):
                opt.zero_grad()
                l.backward()
                opt.step()
            total_loss += l.item()
    return total_loss / len(dataloader.dataset)

In [26]:
def plot_loss(loss, title, num_epochs):
    plt.title(title)
    plt.plot(loss)
    plt.grid()
    plt.xticks(np.arange(num_epochs))

def plot_losses(train, val, num_epochs):
    plt.figure(figsize=(16, 4))
    plt.subplot(1, 2, 1)
    plot_loss(train, f'Train Loss = {train[-1]}', num_epochs)
    plt.subplot(1, 2, 2)
    plot_loss(val, f'Val Loss = {val[-1]}', num_epochs)
    plt.show()

In [27]:
from IPython.display import clear_output

def run_train_loop(model, opt, loss, train_loader, val_loader, num_epochs):
    train_hist = []
    val_hist = []
    for e in range(num_epochs):
        print("Training...")
        train_loss = run_epoch(model, opt, loss, train_loader)
        train_hist.append(train_loss)
        print("Validating...")
        val_loss = run_epoch(model, opt, loss, val_loader, is_train=False)
        val_hist.append(val_loss)
        clear_output()
        plot_losses(train_hist, val_hist, num_epochs)

In [None]:
batch_size = 64
num_epochs = 4

train_loader = DataLoader(train, batch_size)
val_loader = DataLoader(val, batch_size)

run_train_loop(model, opt, loss, train_loader, val_loader, num_epochs)

Training...


 28%|██▊       | 43/155 [16:52<43:43, 23.42s/it]