In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, f1_score
from transformers import CLIPProcessor, CLIPModel, CLIPVisionModel
from tqdm import tqdm
from PIL import ImageFile

In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE

device(type='cuda')

In [3]:
# Путь к данным animals
train_path = 'D:\\ProgPrj\\dsProjects\\gazprom-media\\ml\\train'
test_path = 'D:\\ProgPrj\\dsProjects\\gazprom-media\\ml\\test'

In [4]:
# Преобразования для тренировочного и валидационного наборов данных
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomApply(torch.nn.ModuleList([transforms.ColorJitter()]), p=0.25),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.RandomRotation(degrees=(-10, 10)),
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    transforms.RandomErasing(p=0.1, value='random')
])

In [5]:
val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [6]:
# Определение модели
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
model.to(DEVICE)

  return self.fget.__get__(instance, owner)()


CLIPModel(
  (text_model): CLIPTextTransformer(
    (embeddings): CLIPTextEmbeddings(
      (token_embedding): Embedding(49408, 512)
      (position_embedding): Embedding(77, 512)
    )
    (encoder): CLIPEncoder(
      (layers): ModuleList(
        (0-11): 12 x CLIPEncoderLayer(
          (self_attn): CLIPAttention(
            (k_proj): Linear(in_features=512, out_features=512, bias=True)
            (v_proj): Linear(in_features=512, out_features=512, bias=True)
            (q_proj): Linear(in_features=512, out_features=512, bias=True)
            (out_proj): Linear(in_features=512, out_features=512, bias=True)
          )
          (layer_norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (mlp): CLIPMLP(
            (activation_fn): QuickGELUActivation()
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
          )
          (layer_norm2): LayerNorm((512,), eps=1e-05,

In [7]:
# Оптимизатор и функция потерь
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Scheduler для оптимизации
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True)

In [8]:
# Загрузка данных
train_dataset = datasets.ImageFolder(train_path, transform=train_transform)
val_dataset = datasets.ImageFolder(test_path, transform=val_transform)

In [9]:
print(len(train_dataset.class_to_idx))

101


In [10]:
batch_size = 96

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)

val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

In [11]:
# Скалер для смешанной точности
scaler = torch.cuda.amp.GradScaler()

# Обучение модели
num_epochs = 50

for epoch in range(num_epochs):
    ImageFile.LOAD_TRUNCATED_IMAGES = True
    model.train()
    running_loss = 0.0
    train_loader_tqdm = tqdm(train_loader, desc=f"Training Epoch {epoch+1}/{num_epochs}")

    for images, labels in train_loader_tqdm:
        images, labels = images.to(DEVICE, non_blocking=True), labels.to(DEVICE, non_blocking=True)

        optimizer.zero_grad()

        with torch.cuda.amp.autocast():
            outputs = model.get_image_features(images)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item() * images.size(0)
        train_loader_tqdm.set_postfix(loss=loss.item())

    epoch_loss = running_loss / len(train_dataset)
    print(f"Epoch {epoch+1}/{num_epochs} Train Loss: {epoch_loss:.4f}")

Training Epoch 1/50: 100%|██████████| 790/790 [01:32<00:00,  8.52it/s, loss=3.24]


Epoch 1/50 Train Loss: 4.0288


Training Epoch 2/50: 100%|██████████| 790/790 [01:35<00:00,  8.28it/s, loss=2.38]


Epoch 2/50 Train Loss: 3.0986


Training Epoch 3/50: 100%|██████████| 790/790 [01:44<00:00,  7.58it/s, loss=2.52]


Epoch 3/50 Train Loss: 2.6483


Training Epoch 4/50: 100%|██████████| 790/790 [01:45<00:00,  7.50it/s, loss=0.79]


Epoch 4/50 Train Loss: 2.3582


Training Epoch 5/50: 100%|██████████| 790/790 [01:52<00:00,  7.02it/s, loss=1.26]


Epoch 5/50 Train Loss: 2.1273


Training Epoch 6/50: 100%|██████████| 790/790 [01:43<00:00,  7.67it/s, loss=0.774]


Epoch 6/50 Train Loss: 1.9397


Training Epoch 7/50: 100%|██████████| 790/790 [01:38<00:00,  8.04it/s, loss=2.78]


Epoch 7/50 Train Loss: 1.7669


Training Epoch 8/50: 100%|██████████| 790/790 [01:33<00:00,  8.41it/s, loss=1.66]


Epoch 8/50 Train Loss: 1.6268


Training Epoch 9/50: 100%|██████████| 790/790 [01:30<00:00,  8.72it/s, loss=1.9]  


Epoch 9/50 Train Loss: 1.4710


Training Epoch 10/50: 100%|██████████| 790/790 [01:33<00:00,  8.46it/s, loss=1.48] 


Epoch 10/50 Train Loss: 1.3398


Training Epoch 11/50: 100%|██████████| 790/790 [01:31<00:00,  8.64it/s, loss=0.881]


Epoch 11/50 Train Loss: 1.2330


Training Epoch 12/50: 100%|██████████| 790/790 [01:34<00:00,  8.40it/s, loss=2.51] 


Epoch 12/50 Train Loss: 1.1058


Training Epoch 13/50: 100%|██████████| 790/790 [01:32<00:00,  8.56it/s, loss=1.69] 


Epoch 13/50 Train Loss: 1.0156


Training Epoch 14/50: 100%|██████████| 790/790 [01:40<00:00,  7.86it/s, loss=0.787]


Epoch 14/50 Train Loss: 0.9079


Training Epoch 15/50: 100%|██████████| 790/790 [01:33<00:00,  8.41it/s, loss=1.85] 


Epoch 15/50 Train Loss: 0.8298


Training Epoch 16/50: 100%|██████████| 790/790 [01:30<00:00,  8.71it/s, loss=1.45] 


Epoch 16/50 Train Loss: 0.7632


Training Epoch 17/50: 100%|██████████| 790/790 [01:33<00:00,  8.45it/s, loss=0.124]


Epoch 17/50 Train Loss: 0.7030


Training Epoch 18/50: 100%|██████████| 790/790 [01:33<00:00,  8.48it/s, loss=0.0397]


Epoch 18/50 Train Loss: 0.6644


Training Epoch 19/50: 100%|██████████| 790/790 [01:36<00:00,  8.15it/s, loss=0.457]


Epoch 19/50 Train Loss: 0.6180


Training Epoch 20/50: 100%|██████████| 790/790 [01:29<00:00,  8.85it/s, loss=0.836]


Epoch 20/50 Train Loss: 0.5885


Training Epoch 21/50: 100%|██████████| 790/790 [01:32<00:00,  8.54it/s, loss=0.757]


Epoch 21/50 Train Loss: 0.5665


Training Epoch 22/50: 100%|██████████| 790/790 [01:49<00:00,  7.22it/s, loss=1.47] 


Epoch 22/50 Train Loss: 0.5434


Training Epoch 23/50: 100%|██████████| 790/790 [01:38<00:00,  8.01it/s, loss=1.01] 


Epoch 23/50 Train Loss: 0.5215


Training Epoch 24/50: 100%|██████████| 790/790 [01:39<00:00,  7.95it/s, loss=0.785]


Epoch 24/50 Train Loss: 0.4940


Training Epoch 25/50: 100%|██████████| 790/790 [01:34<00:00,  8.40it/s, loss=0.796]


Epoch 25/50 Train Loss: 0.4806


Training Epoch 26/50: 100%|██████████| 790/790 [01:40<00:00,  7.85it/s, loss=0.596]


Epoch 26/50 Train Loss: 0.4607


Training Epoch 27/50: 100%|██████████| 790/790 [01:35<00:00,  8.27it/s, loss=0.921]


Epoch 27/50 Train Loss: 0.4559


Training Epoch 28/50: 100%|██████████| 790/790 [01:32<00:00,  8.56it/s, loss=0.498]


Epoch 28/50 Train Loss: 0.4307


Training Epoch 29/50: 100%|██████████| 790/790 [01:36<00:00,  8.18it/s, loss=0.265]


Epoch 29/50 Train Loss: 0.4157


Training Epoch 30/50: 100%|██████████| 790/790 [01:43<00:00,  7.65it/s, loss=0.386]


Epoch 30/50 Train Loss: 0.4130


Training Epoch 31/50: 100%|██████████| 790/790 [01:39<00:00,  7.94it/s, loss=0.474]


Epoch 31/50 Train Loss: 0.3972


Training Epoch 32/50: 100%|██████████| 790/790 [01:40<00:00,  7.88it/s, loss=0.519] 


Epoch 32/50 Train Loss: 0.3744


Training Epoch 33/50: 100%|██████████| 790/790 [01:37<00:00,  8.13it/s, loss=0.039]


Epoch 33/50 Train Loss: 0.3743


Training Epoch 34/50: 100%|██████████| 790/790 [01:36<00:00,  8.23it/s, loss=0.086]


Epoch 34/50 Train Loss: 0.3544


Training Epoch 35/50: 100%|██████████| 790/790 [01:32<00:00,  8.55it/s, loss=0.212]


Epoch 35/50 Train Loss: 0.3450


Training Epoch 36/50: 100%|██████████| 790/790 [01:46<00:00,  7.41it/s, loss=0.334]


Epoch 36/50 Train Loss: 0.3368


Training Epoch 37/50: 100%|██████████| 790/790 [02:01<00:00,  6.51it/s, loss=0.0158]


Epoch 37/50 Train Loss: 0.3363


Training Epoch 38/50: 100%|██████████| 790/790 [01:45<00:00,  7.46it/s, loss=0.194] 


Epoch 38/50 Train Loss: 0.3155


Training Epoch 39/50: 100%|██████████| 790/790 [01:42<00:00,  7.69it/s, loss=0.000479]


Epoch 39/50 Train Loss: 0.3114


Training Epoch 40/50: 100%|██████████| 790/790 [01:37<00:00,  8.07it/s, loss=0.00833]


Epoch 40/50 Train Loss: 0.3012


Training Epoch 41/50: 100%|██████████| 790/790 [01:33<00:00,  8.41it/s, loss=0.532] 


Epoch 41/50 Train Loss: 0.2925


Training Epoch 42/50: 100%|██████████| 790/790 [01:44<00:00,  7.59it/s, loss=0.00836]


Epoch 42/50 Train Loss: 0.2849


Training Epoch 43/50: 100%|██████████| 790/790 [01:32<00:00,  8.50it/s, loss=0.00131]


Epoch 43/50 Train Loss: 0.2781


Training Epoch 44/50: 100%|██████████| 790/790 [01:32<00:00,  8.54it/s, loss=1.43]  


Epoch 44/50 Train Loss: 0.2689


Training Epoch 45/50: 100%|██████████| 790/790 [01:33<00:00,  8.45it/s, loss=0.119] 


Epoch 45/50 Train Loss: 0.2580


Training Epoch 46/50: 100%|██████████| 790/790 [01:35<00:00,  8.24it/s, loss=0.00494]


Epoch 46/50 Train Loss: 0.2621


Training Epoch 47/50: 100%|██████████| 790/790 [01:32<00:00,  8.55it/s, loss=0.708] 


Epoch 47/50 Train Loss: 0.2422


Training Epoch 48/50: 100%|██████████| 790/790 [01:32<00:00,  8.54it/s, loss=0.106] 


Epoch 48/50 Train Loss: 0.2367


Training Epoch 49/50: 100%|██████████| 790/790 [01:33<00:00,  8.40it/s, loss=0.802] 


Epoch 49/50 Train Loss: 0.2370


Training Epoch 50/50: 100%|██████████| 790/790 [01:38<00:00,  7.99it/s, loss=0.17]  

Epoch 50/50 Train Loss: 0.2401





In [12]:
# Validation loop
model.eval()
val_loss = 0
val_preds = []
val_labels = []
with torch.no_grad():
    for images, labels in tqdm(val_loader, desc="Validation"):
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        outputs = model.get_image_features(images)
        loss = criterion(outputs, labels)
        val_loss += loss.item()

        preds = outputs.argmax(dim=1)
        val_preds.extend(preds.cpu().numpy())
        val_labels.extend(labels.cpu().numpy())

val_loss /= len(val_loader)
val_accuracy = accuracy_score(val_labels, val_preds)
val_f1 = f1_score(val_labels, val_preds, average='weighted')

print("Validation Results")
print(f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}, Val F1: {val_f1:.4f}")

# Step scheduler based on validation loss
scheduler.step(val_loss)

Validation: 100%|██████████| 264/264 [00:32<00:00,  8.01it/s]

Validation Results
Val Loss: 2.0775, Val Accuracy: 0.6199, Val F1: 0.6185



