In [84]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from PIL import Image
from torch.utils.data import Dataset, DataLoader
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
from transformers import ViTModel, ViTFeatureExtractor
from transformers import ViTImageProcessor, ViTForImageClassification
import os

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [85]:
%pip install pycocotools -q

Note: you may need to restart the kernel to use updated packages.


In [86]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import CocoDetection
from torch import nn, optim
import json

In [87]:
# Параметры
data_dir = '/kaggle/input/animalscropped'
train_dir = f'{data_dir}/train'
val_dir = f'{data_dir}/valid'
batch_size = 8
num_epochs = 150
num_classes = 1  # Установите количество классов в вашем датасете

In [88]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [89]:
class CocoDataset(Dataset):
    def __init__(self, img_dir, json_file, transform=None):
        with open(json_file) as f:
            self.coco_data = json.load(f)
        self.img_dir = img_dir
        self.transform = transform

        # Создаем индекс аннотаций по id изображения для быстрого доступа
        self.annotations_index = {}
        for annotation in self.coco_data['annotations']:
            image_id = annotation['image_id']
            if image_id not in self.annotations_index:
                self.annotations_index[image_id] = []
            self.annotations_index[image_id].append(annotation)

    def __len__(self):
        return len(self.coco_data['images'])

    def __getitem__(self, idx):
        img_info = self.coco_data['images'][idx]
        img_path = os.path.join(self.img_dir, img_info['file_name'])
        image = Image.open(img_path).convert('RGB')

        label = None
        # Получаем аннотации для текущего изображения
        if img_info['id'] in self.annotations_index:
            annotations = self.annotations_index[img_info['id']]
            for annotation in annotations:
                label = 0 if annotation["category_id"] == 1 else 1
        if self.transform:
            # image = self.transform(np.array(image))
            # mask = self.transform(np.array(mask_image))
            image = self.transform(image)
        return image, torch.tensor(label).float().unsqueeze(0)

In [90]:
class ViTBinaryClassifier(nn.Module):
    def __init__(self):
        super(ViTBinaryClassifier, self).__init__()
        # Загружаем предобученную модель ViT
        self.vit = ViTModel.from_pretrained('google/vit-base-patch16-224')
        self.processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')

        # Добавляем линейный слой для бинарной классификации
        self.classifier = nn.Sequential(
            nn.Linear(self.vit.config.hidden_size, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        # Пропускаем входные данные через ViT
        outputs = self.vit(x)
        # Берем выходные данные из последнего скрытого состояния [CLS]
        cls_output = outputs.last_hidden_state[:, 0, :]
        # Пропускаем через классификатор
        logits = self.classifier(cls_output)
        return logits

In [91]:
# Загрузка данных COCO
train_dataset = CocoDataset(train_dir, f'{train_dir}/_annotations.coco.json', transform=transform)
val_dataset = CocoDataset(val_dir, f'{val_dir}/_annotations.coco.json', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [92]:
model = ViTBinaryClassifier()


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

criterion = nn.BCELoss() 
optimizer = optim.Adam(model.parameters(), lr=0.001)

Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [93]:
def train(model, train_loader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    for images, targets in train_loader:
        images = images.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    epoch_loss = running_loss 
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')
    return running_loss/len(train_loader)

# Функция валидации
def validate(model, val_loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, targets in val_loader:
            targets = targets.to(device)

            images = images.to(device)
            outputs = model(images)
            loss = criterion(outputs, targets)

            running_loss += loss.item()
            predicted = (outputs > 0.5).float()
            total += len(targets)
            correct += (predicted == targets).sum().item()

    epoch_loss = running_loss / len(val_loader) 
    accuracy = correct / total
    return epoch_loss, accuracy

In [94]:
# Цикл обучения
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, criterion, optimizer)
    val_loss, val_accuracy = validate(model, val_loader, criterion)

    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

print("Training complete.")

Epoch [1/150], Loss: 0.6888
Epoch 1/150, Train Loss: 0.6888, Val Loss: 0.6907, Val Accuracy: 0.6107
Epoch [2/150], Loss: 0.6876
Epoch 2/150, Train Loss: 0.6876, Val Loss: 0.6604, Val Accuracy: 0.6219
Epoch [3/150], Loss: 0.6715
Epoch 3/150, Train Loss: 0.6715, Val Loss: 0.6756, Val Accuracy: 0.6242
Epoch [4/150], Loss: 0.6738
Epoch 4/150, Train Loss: 0.6738, Val Loss: 0.6663, Val Accuracy: 0.6264
Epoch [5/150], Loss: 0.6697
Epoch 5/150, Train Loss: 0.6697, Val Loss: 0.6799, Val Accuracy: 0.6219
Epoch [6/150], Loss: 0.6666
Epoch 6/150, Train Loss: 0.6666, Val Loss: 0.6629, Val Accuracy: 0.6242
Epoch [7/150], Loss: 0.6681
Epoch 7/150, Train Loss: 0.6681, Val Loss: 0.6592, Val Accuracy: 0.6242
Epoch [8/150], Loss: 0.6656
Epoch 8/150, Train Loss: 0.6656, Val Loss: 0.6631, Val Accuracy: 0.6264
Epoch [9/150], Loss: 0.6656
Epoch 9/150, Train Loss: 0.6656, Val Loss: 0.6706, Val Accuracy: 0.6152
Epoch [10/150], Loss: 0.6716
Epoch 10/150, Train Loss: 0.6716, Val Loss: 0.6637, Val Accuracy: 0.624

In [95]:
torch.save(model, "/kaggle/working/saved.pth")

In [96]:
# import os
# import pandas as pd
# import torch
# from transformers import ViTModel, ViTFeatureExtractor
# from PIL import Image
# from tqdm import tqdm

# feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
# model = ViTModel.from_pretrained('google/vit-base-patch16-224')
# # Устройство для вычислений
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)
# model.eval()

# def process_images_in_directory(directory_path, csv_file_path):
#     # Чтение CSV файла
#     df = pd.read_csv(csv_file_path)
#     df.columns = df.columns.str.strip()
#     # Колонки для нового DataFrame
#     all_embeddings = []
#     labels = []

#     for index, row in tqdm(df.iterrows(), total=df.shape[0], desc=f"Processing {directory_path}"):
#         image_filename = row['filename']
#         label = row['good']  # Предполагается, что 'good' является целевым лейблом

#         image_path = os.path.join(directory_path, image_filename)
        
#         # Открытие и предобработка изображения
#         image = Image.open(image_path).convert('RGB')
#         inputs = feature_extractor(images=image, return_tensors="pt")
        
#         # Перемещение тензоров на устройство
#         inputs = {k: v.to(device) for k, v in inputs.items()}
        
#         # Извлечение эмбеддинга
#         with torch.no_grad():
#             outputs = model(**inputs)
#             embedding = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()  # Среднее по всем токенам

#         all_embeddings.append(embedding)
#         labels.append(label)

#     # Преобразование списка эмбеддингов в DataFrame
#     embeddings_df = pd.DataFrame(all_embeddings)
    
#     # Добавление меток
#     embeddings_df['label'] = labels
    
#     return embeddings_df


# # Пути к директориям и CSV файлам
# directories_and_csvs = [
#     ('/kaggle/input/animalsqualitycropped/train', '/kaggle/input/animalsqualitycropped/train/_classes.csv'),
# ]

# # Обработка всех директорий и объединение результатов
# all_dataframes = []

# for directory, csv_file in directories_and_csvs:
#     df = process_images_in_directory(directory, csv_file)
#     all_dataframes.append(df)

# # Объединение всех данных в один DataFrame
# final_df = pd.concat(all_dataframes, ignore_index=True)

# # Сохранение в CSV файл
# final_df.to_csv('/kaggle/working/embeddings_and_labels.csv', index=False)

In [97]:
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from catboost import CatBoostClassifier
# from sklearn.metrics import accuracy_score

# # Предположим, что у вас есть DataFrame df, содержащий эмбеддинги и целевую переменную 'target'
# # Например, колонки 'embedding_1', 'embedding_2', ..., 'embedding_n' содержат эмбеддинги
# # df = pd.read_csv('your_dataset_with_embeddings.csv')

# # Разделяем данные на признаки (включая эмбеддинги) и целевую переменную
# X = df.drop(columns='label')
# y = df['label']

# # Разделяем на тренировочный и тестовый наборы
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Создаем модель CatBoost
# model = CatBoostClassifier(iterations=1000, learning_rate=0.1, depth=4, verbose=100)

# # Обучаем модель
# model.fit(X_train, y_train)

# # Делаем предсказания
# y_pred = model.predict(X_test)

# # Оцениваем точность модели
# accuracy = accuracy_score(y_test, y_pred)
# print(f"Accuracy: {accuracy:.2f}")

In [98]:
model.save_model('/kaggle/working/catboost_model.cbm')

AttributeError: 'ViTBinaryClassifier' object has no attribute 'save_model'