In [8]:
from datasets import load_dataset
from transformers import AutoFeatureExtractor  , AutoModelForImageClassification, TrainingArguments, Trainer
from torch.utils.data import Dataset
from torchvision import transforms
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from PIL import Image
import torch

In [9]:
print(torch.cuda.is_available())

True


In [19]:
#Load  dataset

dataset = load_dataset("Piro17/dataset-affecthqnet-fer2013")
#dataset = load_dataset("AutumnQiu/fer2013")

sample_train = 24000
sample_test = 3000
print(dataset)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

dataset['test'] = dataset['train'].shuffle(seed=96).select(range(sample_test))
dataset['train'] = dataset['train'].shuffle(seed=23).select(range(sample_train))

test_valid_split = dataset['test'].train_test_split(test_size=0.65, seed=45)
dataset['test'] = test_valid_split['train']
dataset['validation'] = test_valid_split['test']

#Import Moel from HuggingFace

#model = "google/mobilenet_v2_1.0_224"
#model = "microsoft/resnet-26"
model = "microsoft/resnet-50"
feature_extractor = AutoFeatureExtractor.from_pretrained(model)
model = AutoModelForImageClassification.from_pretrained(model)


DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 56532
    })
})
cuda




In [20]:
print(model)

ResNetForImageClassification(
  (resnet): ResNetModel(
    (embedder): ResNetEmbeddings(
      (embedder): ResNetConvLayer(
        (convolution): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (normalization): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (activation): ReLU()
      )
      (pooler): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    )
    (encoder): ResNetEncoder(
      (stages): ModuleList(
        (0): ResNetStage(
          (layers): Sequential(
            (0): ResNetBottleNeckLayer(
              (shortcut): ResNetShortCut(
                (convolution): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
                (normalization): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              )
              (layer): Sequential(
                (0): ResNetConvLayer(
                  (convolution): Conv2d(64

In [21]:
# Define the transform function
def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = feature_extractor([img.convert("RGB") for img in example_batch['image']], return_tensors='pt')
    inputs['labels'] = example_batch['label']
    return inputs

# Apply the transform to the datasets
dataset['train'] = dataset['train'].map(transform, batched=True)
dataset['validation'] = dataset['validation'].map(transform, batched=True)

# Remove the 'image' column as it's now transformed
dataset['train'] = dataset['train'].remove_columns(['image'])
dataset['validation'] = dataset['validation'].remove_columns(['image'])

# Set the format for PyTorch
dataset.set_format(type='torch')

In [22]:
from transformers import Trainer

In [23]:
def compute_metrics(p):
    preds = p.predictions.argmax(-1)
    labels = p.label_ids
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }

In [24]:
from transformers import EarlyStoppingCallback

In [25]:
#Training Args
training_args = TrainingArguments(
    output_dir='./huggingface_fer_model/results',          # output directory
    num_train_epochs=25,              # total number of training epochs
    per_device_train_batch_size=24,  # batch size for training
    per_device_eval_batch_size=24,   # batch size for evaluation
    evaluation_strategy="epoch",     # evaluation strategy to use at the end of each epoch
    save_strategy="epoch",           # save strategy to use at the end of each epoch
    logging_dir='./huggingface_fer_model/logs',            # directory for storing logs
    logging_steps=25,
    warmup_steps=5000,                 # number of warmup steps for learning rate scheduler
    report_to=[],                    # disable reporting to any integration
    learning_rate=7e-5,
    weight_decay=0.055,
    fp16=True,                     # use mixed precision training
    load_best_model_at_end=True,     # load the best model when finished training (default metric is loss)
    metric_for_best_model="eval_loss",
    greater_is_better=False,          # lower loss is better
    save_total_limit=2,               # limit the total amount of checkpoints, delete the older checkpoints in the output_dir    
)

#Trainer
trainer = Trainer(
    model=model,                         
    args=training_args,                  
    train_dataset=dataset['train'],      
    eval_dataset=dataset['validation'],
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=5) ]  # Stop training if no improvement
)



In [26]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,4.8346,4.817335,0.07641,0.19479,0.07641,0.104305
2,1.7432,1.691896,0.387692,0.358103,0.387692,0.360819
3,1.3272,1.284405,0.505641,0.494202,0.505641,0.496704
4,1.1204,1.120015,0.584615,0.593229,0.584615,0.579575
5,1.0855,1.004841,0.628718,0.622072,0.628718,0.623834
6,0.9206,0.907191,0.674872,0.674375,0.674872,0.67274
7,0.7432,0.855457,0.699487,0.702619,0.699487,0.699495
8,0.707,0.813227,0.72359,0.721056,0.72359,0.721355
9,0.5399,0.791244,0.732821,0.735509,0.732821,0.73315
10,0.5054,0.777031,0.742564,0.746607,0.742564,0.743487


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


TrainOutput(global_step=15000, training_loss=1.2201182902177174, metrics={'train_runtime': 3046.1827, 'train_samples_per_second': 196.968, 'train_steps_per_second': 8.207, 'total_flos': 8.30962565185536e+18, 'train_loss': 1.2201182902177174, 'epoch': 15.0})

In [29]:
eval_results = trainer.evaluate()
print(f"Validation Loss: {eval_results['eval_loss']:.4f}")
print(f"Validation Accuracy: {eval_results['eval_accuracy']:.4f}")
print(f"Validation Precision: {eval_results['eval_precision']:.4f}")
print(f"Validation Recall: {eval_results['eval_recall']:.4f}")
print(f"Validation F1 Score: {eval_results['eval_f1']:.4f}")

Validation Loss: 0.7770
Validation Accuracy: 0.7426
Validation Precision: 0.7466
Validation Recall: 0.7426
Validation F1 Score: 0.7435


In [32]:
model.save_pretrained('./resnet_50_affectnethq-fer2013_model')
feature_extractor.save_pretrained('./resnet_50 _affectnethq-fer2013_model')

['./resnet_50 _affectnethq-fer2013_model/preprocessor_config.json']

In [34]:
print(dataset['test']['label'][:5])

tensor([3, 1, 0, 2, 4])


In [None]:
label2id = {
    'Angry': 0,
    'Disgust': 1,
    'Fear': 2,
    'Happy': 3,
    'Neutral': 4,
    'Sad': 5,
    'Surprise': 6
}