In [2]:
import torch
from torch import nn
import matplotlib.pyplot as plt

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

Uzyskiwanie danych:

In [4]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [5]:
import os

data_dir = "/content/drive/MyDrive/images"
os.listdir(data_dir)


['serve', 'backhand', 'ready_position', 'forehand']

In [6]:
from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])


In [7]:
from torchvision.datasets import ImageFolder

dataset = ImageFolder(
    root=data_dir,
    transform=transform
)


In [8]:
dataset.class_to_idx


{'backhand': 0, 'forehand': 1, 'ready_position': 2, 'serve': 3}

In [9]:
from torch.utils.data import DataLoader

dataloader = DataLoader(
    dataset,
    batch_size=32,
    shuffle=True,
    num_workers=2
)


Podział na grupy treningowe, walidacyjne i testowe:



In [10]:
from torch.utils.data import random_split

torch.manual_seed(42)

total_size = len(dataset)

train_size = int(0.7 * total_size)
val_size   = int(0.15 * total_size)
test_size  = total_size - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(
    dataset,
    [train_size, val_size, test_size]
)

In [11]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=32, shuffle=False)

Tworzenie modelu:

In [12]:
class TenisVisionCNN (nn.Module):
  def __init__(self, output_shape: int=4):
    super().__init__()

    self.conv_block_1 = nn.Sequential(
        nn.Conv2d(in_channels=3,
                  out_channels=32,
                  kernel_size=3,
                  padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )

    self.conv_block_2 = nn.Sequential(
        nn.Conv2d(in_channels=32,
                  out_channels=64,
                  kernel_size=3,
                  padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )

    self.conv_block_3 = nn.Sequential(
        nn.Conv2d(in_channels=64,
                  out_channels=128,
                  kernel_size=3,
                  padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )

    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=128*28*28, # policzone używając dummy torch (kod poniżej)
                  out_features=output_shape)
    )

  def forward(self, x: torch.Tensor):
    x = self.conv_block_1(x)
    x = self.conv_block_2(x)
    x = self.conv_block_3(x)
    x = self.classifier(x)
    return x


Sprawdzanie wartości na wyjściu trzeciego bloku, aby sprawdzić wejści do classifier:

In [13]:
model = TenisVisionCNN()

In [14]:
dummy = torch.rand(size=(1, 3, 224, 224))
x = dummy
x = model.conv_block_1(x)
x = model.conv_block_2(x)
x = model.conv_block_3(x)
x.shape

torch.Size([1, 128, 28, 28])

Trenowanie modelu:

In [15]:
model = TenisVisionCNN().to(device)

In [16]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(),
                             lr=0.001)

In [17]:
def accuracy_fn(y_true, y_pred):
    correct = (y_true == y_pred).sum().item()
    acc = correct / len(y_true) * 100
    return acc


Wprowadzony zostaje early stopping:

In [18]:
patience = 10        # ile epok czekać bez poprawy
best_val_loss = float("inf")
epochs_no_improve = 0


In [19]:
epochs = 100

for epoch in range(epochs):
    print(f"Epoch: {epoch}\n-------")

    # === TRAIN ===
    model.train()
    train_loss = 0

    for X, y in train_loader:
        X, y = X.to(device), y.to(device)

        y_pred = model(X)
        loss = loss_fn(y_pred, y)

        train_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    train_loss /= len(train_loader)

    # === VALIDATION ===
    model.eval()
    val_loss, val_acc = 0, 0

    with torch.inference_mode():
        for X_val, y_val in val_loader:
            X_val, y_val = X_val.to(device), y_val.to(device)

            val_pred = model(X_val)
            val_loss += loss_fn(val_pred, y_val).item()
            val_acc += accuracy_fn(
                y_true=y_val,
                y_pred=val_pred.argmax(dim=1)
            )

    val_loss /= len(val_loader)
    val_acc  /= len(val_loader)


    # dane na bierząco
    print(
            f"Train loss: {train_loss:.4f} | "
            f"Val loss: {val_loss:.4f} | "
            f"Val acc: {val_acc:.2f}%"
        )

    # === EARLY STOPPING ===
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_no_improve = 0
        torch.save(model.state_dict(), "best_model.pth")
    else:
        epochs_no_improve += 1

    if epochs_no_improve >= patience:
        print(f"Early stopping at epoch {epoch}")
        break



Epoch: 0
-------
Train loss: 1.3641 | Val loss: 1.1834 | Val acc: 46.48%
Epoch: 1
-------
Train loss: 1.0812 | Val loss: 0.8823 | Val acc: 66.02%
Epoch: 2
-------
Train loss: 0.8214 | Val loss: 0.8191 | Val acc: 70.31%
Epoch: 3
-------
Train loss: 0.7067 | Val loss: 0.6404 | Val acc: 73.44%
Epoch: 4
-------
Train loss: 0.5456 | Val loss: 0.5335 | Val acc: 77.73%
Epoch: 5
-------
Train loss: 0.4715 | Val loss: 0.5066 | Val acc: 80.86%
Epoch: 6
-------
Train loss: 0.4241 | Val loss: 0.4640 | Val acc: 80.47%
Epoch: 7
-------
Train loss: 0.3248 | Val loss: 0.4270 | Val acc: 80.86%
Epoch: 8
-------
Train loss: 0.2854 | Val loss: 0.4395 | Val acc: 83.59%
Epoch: 9
-------
Train loss: 0.2269 | Val loss: 0.4885 | Val acc: 84.38%
Epoch: 10
-------
Train loss: 0.2136 | Val loss: 0.4464 | Val acc: 83.59%
Epoch: 11
-------
Train loss: 0.1663 | Val loss: 0.5660 | Val acc: 83.98%
Epoch: 12
-------
Train loss: 0.1410 | Val loss: 0.4975 | Val acc: 83.59%
Epoch: 13
-------
Train loss: 0.0900 | Val loss:

Test

In [20]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

model.eval()

all_preds = []
all_labels = []

with torch.inference_mode():
    for X, y in test_loader:
        X, y = X.to(device), y.to(device)

        logits = model(X)
        preds = torch.argmax(logits, dim=1)

        all_preds.append(preds.cpu())
        all_labels.append(y.cpu())

# tensor → numpy
all_preds = torch.cat(all_preds).numpy()
all_labels = torch.cat(all_labels).numpy()

# metryki
acc = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds, average="macro")
recall = recall_score(all_labels, all_preds, average="macro")
f1 = f1_score(all_labels, all_preds, average="macro")

print(f"Accuracy : {acc:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1-score : {f1:.4f}")


Accuracy : 0.8792
Precision: 0.8788
Recall   : 0.8813
F1-score : 0.8798
