In [54]:
import os
import pandas as pd
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForImageClassification, Trainer, TrainingArguments
from datasets import Dataset
import evaluate
import torch
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
# Пути к директориям с данными
train_dir = "train/simpsons_dataset"
test_dir = "testset/testset"

In [3]:
# Получение списка классов из директории обучения
characters = sorted(os.listdir(train_dir))
num_classes = len(characters)
character_labels = {character: idx for idx, character in enumerate(characters)}

In [4]:
num_classes

42

In [5]:
character_labels

{'abraham_grampa_simpson': 0,
 'agnes_skinner': 1,
 'apu_nahasapeemapetilon': 2,
 'barney_gumble': 3,
 'bart_simpson': 4,
 'carl_carlson': 5,
 'charles_montgomery_burns': 6,
 'chief_wiggum': 7,
 'cletus_spuckler': 8,
 'comic_book_guy': 9,
 'disco_stu': 10,
 'edna_krabappel': 11,
 'fat_tony': 12,
 'gil': 13,
 'groundskeeper_willie': 14,
 'homer_simpson': 15,
 'kent_brockman': 16,
 'krusty_the_clown': 17,
 'lenny_leonard': 18,
 'lionel_hutz': 19,
 'lisa_simpson': 20,
 'maggie_simpson': 21,
 'marge_simpson': 22,
 'martin_prince': 23,
 'mayor_quimby': 24,
 'milhouse_van_houten': 25,
 'miss_hoover': 26,
 'moe_szyslak': 27,
 'ned_flanders': 28,
 'nelson_muntz': 29,
 'otto_mann': 30,
 'patty_bouvier': 31,
 'principal_skinner': 32,
 'professor_john_frink': 33,
 'rainier_wolfcastle': 34,
 'ralph_wiggum': 35,
 'selma_bouvier': 36,
 'sideshow_bob': 37,
 'sideshow_mel': 38,
 'snake_jailbird': 39,
 'troy_mcclure': 40,
 'waylon_smithers': 41}

In [6]:
# Подготовка данных для тренировочного набора
train_data = []
for character in characters:
    character_folder = os.path.join(train_dir, character)
    for img_name in os.listdir(character_folder):
        img_path = os.path.join(character_folder, img_name)
        train_data.append({"image_path": img_path, "label": character_labels[character]})

# Подготовка данных для тестового набора
test_data = []
for img_name in os.listdir(test_dir):
    img_path = os.path.join(test_dir, img_name)
    image_id = os.path.splitext(img_name)[0]
    test_data.append({"image_path": img_path, "image_id": image_id})

In [7]:
train_data

[{'image_path': 'train/simpsons_dataset\\abraham_grampa_simpson\\pic_0000.jpg',
  'label': 0},
 {'image_path': 'train/simpsons_dataset\\abraham_grampa_simpson\\pic_0001.jpg',
  'label': 0},
 {'image_path': 'train/simpsons_dataset\\abraham_grampa_simpson\\pic_0002.jpg',
  'label': 0},
 {'image_path': 'train/simpsons_dataset\\abraham_grampa_simpson\\pic_0003.jpg',
  'label': 0},
 {'image_path': 'train/simpsons_dataset\\abraham_grampa_simpson\\pic_0004.jpg',
  'label': 0},
 {'image_path': 'train/simpsons_dataset\\abraham_grampa_simpson\\pic_0005.jpg',
  'label': 0},
 {'image_path': 'train/simpsons_dataset\\abraham_grampa_simpson\\pic_0006.jpg',
  'label': 0},
 {'image_path': 'train/simpsons_dataset\\abraham_grampa_simpson\\pic_0007.jpg',
  'label': 0},
 {'image_path': 'train/simpsons_dataset\\abraham_grampa_simpson\\pic_0008.jpg',
  'label': 0},
 {'image_path': 'train/simpsons_dataset\\abraham_grampa_simpson\\pic_0009.jpg',
  'label': 0},
 {'image_path': 'train/simpsons_dataset\\abraham_g

In [8]:
test_data

[{'image_path': 'testset/testset\\img0.jpg', 'image_id': 'img0'},
 {'image_path': 'testset/testset\\img1.jpg', 'image_id': 'img1'},
 {'image_path': 'testset/testset\\img10.jpg', 'image_id': 'img10'},
 {'image_path': 'testset/testset\\img100.jpg', 'image_id': 'img100'},
 {'image_path': 'testset/testset\\img101.jpg', 'image_id': 'img101'},
 {'image_path': 'testset/testset\\img102.jpg', 'image_id': 'img102'},
 {'image_path': 'testset/testset\\img103.jpg', 'image_id': 'img103'},
 {'image_path': 'testset/testset\\img104.jpg', 'image_id': 'img104'},
 {'image_path': 'testset/testset\\img105.jpg', 'image_id': 'img105'},
 {'image_path': 'testset/testset\\img106.jpg', 'image_id': 'img106'},
 {'image_path': 'testset/testset\\img107.jpg', 'image_id': 'img107'},
 {'image_path': 'testset/testset\\img108.jpg', 'image_id': 'img108'},
 {'image_path': 'testset/testset\\img109.jpg', 'image_id': 'img109'},
 {'image_path': 'testset/testset\\img11.jpg', 'image_id': 'img11'},
 {'image_path': 'testset/testset

In [9]:
# Разделение тренировочного набора на тренировочную и валидационную части
train_df, val_df = train_test_split(train_data, test_size=0.1, stratify=[d['label'] for d in train_data])

In [10]:
# Создание объектов Dataset из pandas DataFrame
train_dataset = Dataset.from_pandas(pd.DataFrame(train_df))
val_dataset = Dataset.from_pandas(pd.DataFrame(val_df))
test_df = pd.DataFrame(test_data)
test_dataset = Dataset.from_pandas(test_df)

In [11]:
train_dataset

Dataset({
    features: ['image_path', 'label'],
    num_rows: 18839
})

In [12]:
test_dataset

Dataset({
    features: ['image_path', 'image_id'],
    num_rows: 991
})

In [13]:
# Загрузка процессора и модели ResNet от Microsoft
processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
model = AutoModelForImageClassification.from_pretrained("microsoft/resnet-50", num_labels=num_classes, ignore_mismatched_sizes=True)

Some weights of ResNetForImageClassification were not initialized from the model checkpoint at microsoft/resnet-50 and are newly initialized because the shapes did not match:
- classifier.1.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([42]) in the model instantiated
- classifier.1.weight: found shape torch.Size([1000, 2048]) in the checkpoint and torch.Size([42, 2048]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
# Функция предобработки изображений
def preprocess_function(examples):
    images = [Image.open(image_path).convert("RGB") for image_path in examples["image_path"]]
    inputs = processor(images=images, return_tensors="pt")
    examples["pixel_values"] = inputs["pixel_values"]
    return examples

In [15]:
# Применение предобработки к тренировочному, валидационному и тестовому датасетам
train_dataset = train_dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/18839 [00:00<?, ? examples/s]

In [16]:
train_dataset

Dataset({
    features: ['image_path', 'label', 'pixel_values'],
    num_rows: 18839
})

In [17]:
val_dataset = val_dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/2094 [00:00<?, ? examples/s]

In [18]:
val_dataset

Dataset({
    features: ['image_path', 'label', 'pixel_values'],
    num_rows: 2094
})

In [19]:
test_dataset = test_dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/991 [00:00<?, ? examples/s]

In [20]:
test_dataset

Dataset({
    features: ['image_path', 'image_id', 'pixel_values'],
    num_rows: 991
})

In [22]:
train_dataset = train_dataset.remove_columns(["image_path"])
val_dataset = val_dataset.remove_columns(["image_path"])
test_dataset = test_dataset.remove_columns(["image_path"])

In [23]:
train_dataset

Dataset({
    features: ['label', 'pixel_values'],
    num_rows: 18839
})

In [24]:
# Загрузка метрики F1 из библиотеки evaluate
f1_metric = evaluate.load("f1")

# Функция для расчёта метрик
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)
    return f1_metric.compute(predictions=predictions, references=labels, average="weighted")

In [29]:
# Настройка аргументов тренировки с тюнингом гиперпараметров
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=32,  # Попробуйте различные значения batch_size
    per_device_eval_batch_size=32,
    num_train_epochs=1,  # Попробуйте разные значения для выбора оптимального
    learning_rate=5e-5,  # Начните с этого значения и экспериментируйте
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir='./logs',
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    save_total_limit=2,
    remove_unused_columns=False
)

In [30]:
# Оптимизация использования GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

ResNetForImageClassification(
  (resnet): ResNetModel(
    (embedder): ResNetEmbeddings(
      (embedder): ResNetConvLayer(
        (convolution): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (normalization): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (activation): ReLU()
      )
      (pooler): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    )
    (encoder): ResNetEncoder(
      (stages): ModuleList(
        (0): ResNetStage(
          (layers): Sequential(
            (0): ResNetBottleNeckLayer(
              (shortcut): ResNetShortCut(
                (convolution): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
                (normalization): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              )
              (layer): Sequential(
                (0): ResNetConvLayer(
                  (convolution): Conv2d(64

In [31]:
# Инициализация Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,  # Используем валидационный набор
    compute_metrics=compute_metrics,
    tokenizer=processor,
)

  trainer = Trainer(


In [32]:
# Обучение модели
trainer.train()

  0%|          | 0/589 [00:00<?, ?it/s]

{'loss': 3.6509, 'grad_norm': 1.8901402950286865, 'learning_rate': 4.915110356536503e-05, 'epoch': 0.02}
{'loss': 3.6383, 'grad_norm': 4.453996658325195, 'learning_rate': 4.8302207130730054e-05, 'epoch': 0.03}
{'loss': 3.5765, 'grad_norm': 2.43491530418396, 'learning_rate': 4.7453310696095074e-05, 'epoch': 0.05}
{'loss': 3.5535, 'grad_norm': 4.248633861541748, 'learning_rate': 4.6604414261460106e-05, 'epoch': 0.07}
{'loss': 3.5344, 'grad_norm': 3.6437602043151855, 'learning_rate': 4.5755517826825125e-05, 'epoch': 0.08}
{'loss': 3.5025, 'grad_norm': 10.050701141357422, 'learning_rate': 4.490662139219016e-05, 'epoch': 0.1}
{'loss': 3.4643, 'grad_norm': 2.9539356231689453, 'learning_rate': 4.405772495755518e-05, 'epoch': 0.12}
{'loss': 3.4341, 'grad_norm': 3.23858642578125, 'learning_rate': 4.320882852292021e-05, 'epoch': 0.14}
{'loss': 3.3921, 'grad_norm': 4.959323406219482, 'learning_rate': 4.235993208828523e-05, 'epoch': 0.15}
{'loss': 3.3647, 'grad_norm': 3.0395267009735107, 'learning

  0%|          | 0/66 [00:00<?, ?it/s]

{'eval_loss': 2.925139904022217, 'eval_f1': 0.03691579141028085, 'eval_runtime': 151.7695, 'eval_samples_per_second': 13.797, 'eval_steps_per_second': 0.435, 'epoch': 1.0}
{'train_runtime': 1597.8009, 'train_samples_per_second': 11.791, 'train_steps_per_second': 0.369, 'train_loss': 3.1259124412601387, 'epoch': 1.0}


TrainOutput(global_step=589, training_loss=3.1259124412601387, metrics={'train_runtime': 1597.8009, 'train_samples_per_second': 11.791, 'train_steps_per_second': 0.369, 'total_flos': 4.014482967058637e+17, 'train_loss': 3.1259124412601387, 'epoch': 1.0})

In [50]:
# Предсказания на тестовом наборе
predictions = trainer.predict(test_dataset)
predicted_labels = predictions.predictions.argmax(axis=-1)

  0%|          | 0/31 [00:00<?, ?it/s]

In [51]:
predictions

PredictionOutput(predictions=array([[ 0.46198332, -1.5716109 , -0.08818911, ..., -1.5768465 ,
        -1.9783835 , -0.865121  ],
       [ 0.18649301, -0.99858266,  0.0463847 , ..., -0.8866917 ,
        -1.2029325 , -0.47244984],
       [ 0.5569199 , -2.226527  ,  0.10149281, ..., -2.3168194 ,
        -2.7985673 , -1.2513454 ],
       ...,
       [ 0.45123282, -1.2973677 ,  0.13329583, ..., -1.2690095 ,
        -1.659847  , -0.79832125],
       [ 0.25707057, -1.2821813 ,  0.06671086, ..., -1.1640714 ,
        -1.2387038 , -0.6421496 ],
       [ 0.39317867, -1.1985978 ,  0.09774639, ..., -1.1788081 ,
        -1.3793693 , -0.6515255 ]], dtype=float32), label_ids=None, metrics={'test_runtime': 69.5207, 'test_samples_per_second': 14.255, 'test_steps_per_second': 0.446})

In [52]:
predicted_labels

array([15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 27, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 20, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 27, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 20, 15, 15, 15, 27, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 20, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15

In [58]:
predicted_labels

array([15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 27, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 20, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 27, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 20, 15, 15, 15, 27, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 20, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15

In [36]:
# Преобразование предсказанных меток в имена классов
label_to_character = {v: k for k, v in character_labels.items()}

In [37]:
label_to_character

{0: 'abraham_grampa_simpson',
 1: 'agnes_skinner',
 2: 'apu_nahasapeemapetilon',
 3: 'barney_gumble',
 4: 'bart_simpson',
 5: 'carl_carlson',
 6: 'charles_montgomery_burns',
 7: 'chief_wiggum',
 8: 'cletus_spuckler',
 9: 'comic_book_guy',
 10: 'disco_stu',
 11: 'edna_krabappel',
 12: 'fat_tony',
 13: 'gil',
 14: 'groundskeeper_willie',
 15: 'homer_simpson',
 16: 'kent_brockman',
 17: 'krusty_the_clown',
 18: 'lenny_leonard',
 19: 'lionel_hutz',
 20: 'lisa_simpson',
 21: 'maggie_simpson',
 22: 'marge_simpson',
 23: 'martin_prince',
 24: 'mayor_quimby',
 25: 'milhouse_van_houten',
 26: 'miss_hoover',
 27: 'moe_szyslak',
 28: 'ned_flanders',
 29: 'nelson_muntz',
 30: 'otto_mann',
 31: 'patty_bouvier',
 32: 'principal_skinner',
 33: 'professor_john_frink',
 34: 'rainier_wolfcastle',
 35: 'ralph_wiggum',
 36: 'selma_bouvier',
 37: 'sideshow_bob',
 38: 'sideshow_mel',
 39: 'snake_jailbird',
 40: 'troy_mcclure',
 41: 'waylon_smithers'}

In [38]:
predicted_characters = [label_to_character[label] for label in predicted_labels]
test_img_ids = test_df["image_id"].tolist()

In [40]:
predicted_characters

['homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_simpson',
 'homer_si

In [39]:
test_img_ids

['img0',
 'img1',
 'img10',
 'img100',
 'img101',
 'img102',
 'img103',
 'img104',
 'img105',
 'img106',
 'img107',
 'img108',
 'img109',
 'img11',
 'img110',
 'img111',
 'img112',
 'img113',
 'img114',
 'img115',
 'img116',
 'img117',
 'img118',
 'img119',
 'img12',
 'img120',
 'img121',
 'img122',
 'img123',
 'img124',
 'img125',
 'img126',
 'img127',
 'img128',
 'img129',
 'img13',
 'img130',
 'img131',
 'img132',
 'img133',
 'img134',
 'img135',
 'img136',
 'img137',
 'img138',
 'img139',
 'img14',
 'img140',
 'img141',
 'img142',
 'img143',
 'img144',
 'img145',
 'img146',
 'img147',
 'img148',
 'img149',
 'img15',
 'img150',
 'img151',
 'img152',
 'img153',
 'img154',
 'img155',
 'img156',
 'img157',
 'img158',
 'img159',
 'img16',
 'img160',
 'img161',
 'img162',
 'img163',
 'img164',
 'img165',
 'img166',
 'img167',
 'img168',
 'img169',
 'img17',
 'img170',
 'img171',
 'img172',
 'img173',
 'img174',
 'img175',
 'img176',
 'img177',
 'img178',
 'img179',
 'img18',
 'img180',
 

In [59]:
# Создание файла для отправки
submission= pd.DataFrame({"Id": test_img_ids, "Expected": predicted_characters})

In [60]:
submission

Unnamed: 0,Id,Expected
0,img0,homer_simpson
1,img1,homer_simpson
2,img10,homer_simpson
3,img100,homer_simpson
4,img101,homer_simpson
...,...,...
986,img987,homer_simpson
987,img988,homer_simpson
988,img989,homer_simpson
989,img99,homer_simpson


In [61]:
submission['Id'] = submission['Id'].apply(lambda x: f"{x}.jpg" if not x.endswith('.jpg') else x)

In [62]:
submission

Unnamed: 0,Id,Expected
0,img0.jpg,homer_simpson
1,img1.jpg,homer_simpson
2,img10.jpg,homer_simpson
3,img100.jpg,homer_simpson
4,img101.jpg,homer_simpson
...,...,...
986,img987.jpg,homer_simpson
987,img988.jpg,homer_simpson
988,img989.jpg,homer_simpson
989,img99.jpg,homer_simpson


In [65]:
submission.to_csv("submission.csv", index=False)