In [2]:
import os
import torch
import datasets
import numpy as np
import pandas as pd
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from torchvision.transforms import Compose, Normalize, ToTensor, Resize
from transformers import AutoFeatureExtractor, AutoModelForImageClassification, TrainingArguments, Trainer
from sklearn.model_selection import train_test_split

checkpoint = "microsoft/resnet-18"
folder_path = './ECGs/'
csv_path = './scp_codes.csv'

In [3]:
df = pd.read_csv(csv_path, delimiter=";")
images, labels = [], []

for _, row in df.iterrows():
    image_name = row['filename_hr']
    image_path = os.path.join(folder_path, f"{image_name}.jpg")
    if os.path.exists(image_path):
        image = Image.open(image_path)
        images.append(image)
        labels.append(row['normal'])
        image.close()

In [4]:
len(images), len(labels)

(21798, 21798)

In [5]:
train_X, hold_X, train_y, hold_y = train_test_split(images, labels, test_size=0.2)
eval_X, test_X, eval_y, test_y = train_test_split(hold_X, hold_y, test_size=0.5)

train_dataset = datasets.Dataset.from_dict({"image": train_X, "label": train_y})
eval_dataset = datasets.Dataset.from_dict({"image": eval_X, "label": eval_y})
test_dataset = datasets.Dataset.from_dict({"image": test_X, "label": test_y})

In [6]:
sum(train_dataset['label'])/len(train_dataset['label']),\
sum(eval_dataset['label'])/len(eval_dataset['label']),\
sum(test_dataset['label'])/len(test_dataset['label'])

(0.4074435141644684, 0.4270642201834862, 0.4114678899082569)

In [7]:
extractor = AutoFeatureExtractor.from_pretrained(checkpoint)
normalize = Normalize(mean=extractor.image_mean, std=extractor.image_std)
resize = Resize((extractor.size['shortest_edge'], extractor.size['shortest_edge']))
transform = Compose([resize, ToTensor(), normalize])

def preprocess(example):
    example["pixel_values"] = [transform(image.convert('RGB')) for image in example["image"]]
    return example



In [8]:
train_dataset.set_transform(preprocess)
eval_dataset.set_transform(preprocess)
test_dataset.set_transform(preprocess)

In [9]:
train_dataset[0]

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=2000x1200>,
 'label': 1,
 'pixel_values': tensor([[[1.3584, 1.6495, 1.6495,  ..., 1.6495, 1.6495, 1.6495],
          [1.8893, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2147],
          [1.9064, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2147],
          ...,
          [1.9064, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2147],
          [1.9064, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2147],
          [1.8893, 2.2147, 2.2147,  ..., 2.2147, 2.2147, 2.1975]],
 
         [[1.5182, 1.8158, 1.8158,  ..., 1.8158, 1.8158, 1.8158],
          [2.0609, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.3936],
          [2.0784, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.3936],
          ...,
          [2.0784, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.3936],
          [2.0784, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.3936],
          [2.0609, 2.3936, 2.3936,  ..., 2.3936, 2.3936, 2.3761]],
 
         [[1.7337, 2.0300, 2.0300,  ..., 2.0300, 2.0300, 2.0300],


In [10]:
model = AutoModelForImageClassification.from_pretrained(
    checkpoint,
    label2id = {'Normal': 1, 'Abnormal': 0},
    id2label = {'1': 'Normal', '0': 'Abnormal'},
    ignore_mismatched_sizes = True,
    )

Some weights of ResNetForImageClassification were not initialized from the model checkpoint at microsoft/resnet-18 and are newly initialized because the shapes did not match:
- classifier.1.weight: found shape torch.Size([1000, 512]) in the checkpoint and torch.Size([2, 512]) in the model instantiated
- classifier.1.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
training_args = TrainingArguments(
    output_dir="./output",
    remove_unused_columns=False,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-3,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=10,
    warmup_ratio=0.1,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy"
)

def collate_fn(examples):
    pixel_values = torch.stack([example["pixel_values"] for example in examples])
    labels = torch.tensor([example["label"] for example in examples])
    return {"pixel_values": pixel_values, "labels": labels}

metric = datasets.load_metric("accuracy")

def compute_metrics(eval_pred):
    """Computes accuracy on a batch of predictions"""
    predictions = np.argmax(eval_pred.predictions, axis=1)
    return metric.compute(predictions=predictions, references=eval_pred.label_ids)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=extractor,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
)


In [13]:
trainer.train()



  0%|          | 0/5450 [00:00<?, ?it/s]

{'loss': 0.591, 'learning_rate': 9.174311926605506e-05, 'epoch': 0.02}
{'loss': 0.5438, 'learning_rate': 0.00018348623853211012, 'epoch': 0.04}
{'loss': 0.4967, 'learning_rate': 0.00027522935779816516, 'epoch': 0.06}
{'loss': 0.4336, 'learning_rate': 0.00036697247706422024, 'epoch': 0.07}
{'loss': 0.4888, 'learning_rate': 0.00045871559633027525, 'epoch': 0.09}
{'loss': 0.5323, 'learning_rate': 0.0005504587155963303, 'epoch': 0.11}
{'loss': 0.5061, 'learning_rate': 0.0006422018348623854, 'epoch': 0.13}
{'loss': 0.4262, 'learning_rate': 0.0007339449541284405, 'epoch': 0.15}
{'loss': 0.4328, 'learning_rate': 0.0008256880733944954, 'epoch': 0.17}
{'loss': 0.4161, 'learning_rate': 0.0009174311926605505, 'epoch': 0.18}
{'loss': 0.4432, 'learning_rate': 0.0010091743119266055, 'epoch': 0.2}
{'loss': 0.563, 'learning_rate': 0.0011009174311926607, 'epoch': 0.22}
{'loss': 0.4849, 'learning_rate': 0.0011926605504587156, 'epoch': 0.24}
{'loss': 0.4611, 'learning_rate': 0.0012844036697247708, 'epoch

In [None]:
trainer.evaluate(test_dataset)