In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch

# Проверка доступности GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Используемое устройство: {device}")


Используемое устройство: cuda


In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
from transformers import ViTForImageClassification, ViTFeatureExtractor
from sklearn.metrics import f1_score
from PIL import Image


In [None]:
import zipfile
z = zipfile.ZipFile("/content/drive/MyDrive/12 pfl/dataset.zip", "r")
z.extractall()

In [None]:
# Укажите путь к вашему датасету
data_dir = './dataset/train'

# Определим трансформации для изображений
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # изменяем размер изображений
    transforms.ToTensor(),  # преобразуем в тензор
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # нормализация
])

# Загружаем обучающий набор данных
train_dataset = ImageFolder(root=data_dir, transform=transform)

# Делим на обучающую и валидационную выборки
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# Загружаем данные в DataLoader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [None]:
# Загружаем предобученную модель ViT
model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=5)

# Перемещаем модель на GPU
model.to(device)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTSdpaAttention(
            (attention): ViTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_fe

In [None]:
def train_model(model, train_loader, optimizer, criterion, device, num_epochs=5):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)  # Перемещение на GPU
            optimizer.zero_grad()
            outputs = model(images).logits
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

# Обучаем модель
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)
criterion = torch.nn.CrossEntropyLoss()
train_model(model, train_loader, optimizer, criterion, device, num_epochs=5)


Epoch [1/5], Loss: 0.8461
Epoch [2/5], Loss: 0.2119
Epoch [3/5], Loss: 0.1232
Epoch [4/5], Loss: 0.0698
Epoch [5/5], Loss: 0.0479


In [None]:
def evaluate_model(model, val_loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)  # Перемещение на GPU
            outputs = model(images).logits
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    return f1_score(all_labels, all_preds, average='macro')

# Оценка модели
f1 = evaluate_model(model, val_loader, device)
print(f'F1 Score (macro): {f1:.4f}')


F1 Score (macro): 0.9560


In [None]:
# Загрузка тестовых данных
test_data_dir = './dataset/test'
test_images = os.listdir(test_data_dir)

# Преобразования для тестовых изображений
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Подготовка для предсказаний
model.eval()
submission = []

for img_name in test_images:
    img_path = os.path.join(test_data_dir, img_name)
    image = Image.open(img_path).convert("RGB")  # Убедимся, что изображение в формате RGB
    image = test_transform(image).unsqueeze(0).to(device)  # Перемещение на GPU

    with torch.no_grad():
        # Получаем предсказания
        output = model(image).logits
        _, predicted = torch.max(output, 1)

        # Получаем метку класса по индексу
        class_labels = train_dataset.dataset.classes  # Получаем список классов
        predicted_label = class_labels[predicted.item()]  # Извлекаем метку класса

        # Добавляем в список предсказаний
        submission.append({"name": img_name, "label": predicted_label})

# Создание DataFrame для сохранения в CSV
submission_df = pd.DataFrame(submission)

# Сохранение в файл submission.csv
submission_df.to_csv('submission.csv', index=False)
print("Предсказания сохранены в файл submission.csv")


Предсказания сохранены в файл submission.csv
