In [1]:
from datasets import load_dataset
from transformers import AutoFeatureExtractor  , AutoModelForImageClassification, TrainingArguments, Trainer
from torch.utils.data import Dataset
from torchvision import transforms
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from PIL import Image
import torch

  from .autonotebook import tqdm as notebook_tqdm
2025-03-05 08:03:14.833497: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-05 08:03:14.846400: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-05 08:03:14.972318: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-05 08:03:15.084377: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741179795.172163  188744 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has

In [4]:
print(torch.cuda.is_available())

True


In [31]:
#Load  dataset

#dataset = load_dataset("Piro17/dataset-affecthqnet-fer2013")
dataset = load_dataset("AutumnQiu/fer2013")

sample_train = 24000
sample_test = 3000
print(dataset)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

dataset['test'] = dataset['train'].shuffle(seed=96).select(range(sample_test))
dataset['train'] = dataset['train'].shuffle(seed=23).select(range(sample_train))

test_valid_split = dataset['test'].train_test_split(test_size=0.65, seed=45)
dataset['test'] = test_valid_split['train']
dataset['validation'] = test_valid_split['test']

#Import Moel from HuggingFace

model = "google/mobilenet_v2_1.0_224"
#model = "microsoft/resnet-26"
feature_extractor = AutoFeatureExtractor.from_pretrained(model)
model = AutoModelForImageClassification.from_pretrained(model)


DatasetDict({
    train: Dataset({
        features: ['label', 'image'],
        num_rows: 28709
    })
    valid: Dataset({
        features: ['label', 'image'],
        num_rows: 3589
    })
    test: Dataset({
        features: ['label', 'image'],
        num_rows: 3589
    })
})
cuda




In [32]:
print(dataset['train'][0])

{'label': 4, 'image': <PIL.PngImagePlugin.PngImageFile image mode=RGB size=48x48 at 0x7F81F77D6980>}


In [33]:
# Define the transform function
def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = feature_extractor([img.convert("RGB") for img in example_batch['image']], return_tensors='pt')
    inputs['labels'] = example_batch['label']
    return inputs

# Apply the transform to the datasets
dataset = dataset.map(transform, batched=True)

# Remove the 'image' column as it's now transformed
dataset = dataset.remove_columns(['image'])

# Set the format for PyTorch
dataset.set_format(type='torch')

Map: 100%|██████████| 24000/24000 [02:19<00:00, 171.59 examples/s]
Map: 100%|██████████| 3589/3589 [00:14<00:00, 254.37 examples/s]
Map: 100%|██████████| 1050/1050 [00:02<00:00, 365.38 examples/s]
Map: 100%|██████████| 1950/1950 [00:07<00:00, 261.12 examples/s]


In [34]:
from transformers import Trainer

In [35]:
def compute_metrics(p):
    preds = p.predictions.argmax(-1)
    labels = p.label_ids
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }

In [36]:
from transformers import EarlyStoppingCallback

In [37]:
#Training Args
training_args = TrainingArguments(
    output_dir='./huggingface_fer_model/results',          # output directory
    num_train_epochs=25,              # total number of training epochs
    per_device_train_batch_size=24,  # batch size for training
    per_device_eval_batch_size=24,   # batch size for evaluation
    evaluation_strategy="epoch",     # evaluation strategy to use at the end of each epoch
    save_strategy="epoch",           # save strategy to use at the end of each epoch
    logging_dir='./huggingface_fer_model/logs',            # directory for storing logs
    logging_steps=25,
    warmup_steps=5000,                 # number of warmup steps for learning rate scheduler
    report_to=[],                    # disable reporting to any integration
    learning_rate=7e-5,
    weight_decay=0.055,
    fp16=True,                     # use mixed precision training
    load_best_model_at_end=True,     # load the best model when finished training (default metric is loss)
    metric_for_best_model="eval_loss",
    greater_is_better=False,          # lower loss is better
    save_total_limit=2,               # limit the total amount of checkpoints, delete the older checkpoints in the output_dir    
)

#Trainer
trainer = Trainer(
    model=model,                         
    args=training_args,                  
    train_dataset=dataset['train'],      
    eval_dataset=dataset['validation'],
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=5) ]  # Stop training if no improvement
)



In [38]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.1596,2.131518,0.429744,0.366782,0.429744,0.391825
2,1.2807,1.296232,0.528718,0.554651,0.528718,0.524495
3,1.0547,0.951088,0.661538,0.678078,0.661538,0.657863
4,0.893,0.793977,0.72,0.73113,0.72,0.71266
5,0.874,0.623072,0.785641,0.796738,0.785641,0.785173
6,0.6413,0.529034,0.825641,0.826642,0.825641,0.825064
7,0.3639,0.423692,0.884615,0.88818,0.884615,0.884418
8,0.2835,0.484553,0.876923,0.882806,0.876923,0.876681
9,0.1977,0.420343,0.899487,0.900888,0.899487,0.900002
10,0.1249,0.46276,0.909744,0.911242,0.909744,0.909857


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


TrainOutput(global_step=14000, training_loss=0.7779143714691912, metrics={'train_runtime': 2794.8439, 'train_samples_per_second': 214.681, 'train_steps_per_second': 8.945, 'total_flos': 1.063992784748544e+18, 'train_loss': 0.7779143714691912, 'epoch': 14.0})

In [39]:
eval_results = trainer.evaluate()
print(f"Validation Loss: {eval_results['eval_loss']:.4f}")
print(f"Validation Accuracy: {eval_results['eval_accuracy']:.4f}")
print(f"Validation Precision: {eval_results['eval_precision']:.4f}")
print(f"Validation Recall: {eval_results['eval_recall']:.4f}")
print(f"Validation F1 Score: {eval_results['eval_f1']:.4f}")

Validation Loss: 0.4203
Validation Accuracy: 0.8995
Validation Precision: 0.9009
Validation Recall: 0.8995
Validation F1 Score: 0.9000


In [None]:
model.save_pretrained('./resnet26_affectnethq-fer2013_model')
feature_extractor.save_pretrained('./resnet26_affectnethq-fer2013_model')

['./mobilenet_v2_affectnet_model/preprocessor_config.json']

In [None]:
model.save_pretrained('./resnet26_fer2013_model')
feature_extractor.save_pretrained('./resnet26_fer2013_model')
