In [2]:
from datasets import load_dataset
from transformers import AutoFeatureExtractor  , AutoModelForImageClassification, TrainingArguments, Trainer, MobileNetV2ForImageClassification
from torch.utils.data import Dataset
from torchvision import transforms
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from PIL import Image
import torch
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm





In [4]:
#Load  dataset
dataset = load_dataset("Piro17/dataset-affecthqnet-fer2013")
#dataset = load_dataset("AutumnQiu/fer2013")

sample_train = 24000
sample_test = 3000

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

dataset['test'] = dataset['train'].shuffle(seed=96).select(range(sample_test))
dataset['train'] = dataset['train'].shuffle(seed=23).select(range(sample_train))

test_valid_split = dataset['test'].train_test_split(test_size=0.65, seed=45)
dataset['test'] = test_valid_split['train']
dataset['validation'] = test_valid_split['test']


#Import Model from HuggingFace
model = "google/mobilenet_v2_1.0_224"
#model = "microsoft/resnet-26"
#model = "microsoft/resnet-50"
feature_extractor = AutoFeatureExtractor.from_pretrained(model)
model = MobileNetV2ForImageClassification.from_pretrained(model)

label2id = {
    "angry": 0,
    "disgust": 1,
    "fear": 2,
    "happy": 3,
    "sad": 4,
    "surprise": 5,
    "neutral": 6,
}

id2label = {v: k for k, v in label2id.items()}

#Fix up the id2label and label2id configurations (Ensure it is consistent with the dataset)
model.config.label2id = label2id
model.config.id2label = id2label


In [5]:
print("Model label2id:", model.config.label2id)
print("Model id2label:", model.config.id2label)

Model label2id: {'angry': 0, 'disgust': 1, 'fear': 2, 'happy': 3, 'sad': 4, 'surprise': 5, 'neutral': 6}
Model id2label: {0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy', 4: 'sad', 5: 'surprise', 6: 'neutral'}


In [6]:
# Define the transform function
def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = feature_extractor([img.convert("RGB") for img in example_batch['image']], return_tensors='pt')
    inputs['labels'] = example_batch['label']
    return inputs

# Apply the transform to the datasets
dataset['train'] = dataset['train'].map(transform, batched=True)
dataset['validation'] = dataset['validation'].map(transform, batched=True)

# Remove the 'image' column as it's now transformed
dataset['train'] = dataset['train'].remove_columns(['image'])
dataset['validation'] = dataset['validation'].remove_columns(['image'])

# Set the format for PyTorch
dataset.set_format(type='torch')


In [7]:
print(torch.cuda.is_available())

True


In [8]:
from transformers import Trainer

In [9]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)

    # Convert numeric labels to string labels
    predicted_labels = [id2label[p] for p in predictions]
    true_labels = [id2label[l] for l in labels]

    accuracy = accuracy_score(true_labels, predicted_labels)
    f1 = f1_score(true_labels, predicted_labels, average='weighted')  # Use 'weighted' for multi-class

    return {"accuracy": accuracy, "f1": f1}

In [10]:
from transformers import EarlyStoppingCallback

In [11]:
#Training Args
training_args = TrainingArguments(
    output_dir='./huggingface_fer_model/results',
    num_train_epochs=25,
    per_device_train_batch_size=24,
    per_device_eval_batch_size=24,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir='./huggingface_fer_model/logs',
    logging_steps=25,
    warmup_steps=5000,
    report_to=[],
    learning_rate=7e-5,
    weight_decay=0.055,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    save_total_limit=2,
)

#Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['validation'],
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=5)]
)



In [12]:
trainer.train()

  0%|          | 25/25000 [00:33<10:44:51,  1.55s/it]

{'loss': 7.6016, 'grad_norm': 24.104385375976562, 'learning_rate': 3.5e-07, 'epoch': 0.03}


  0%|          | 50/25000 [01:13<10:56:01,  1.58s/it]

{'loss': 7.5544, 'grad_norm': 22.815431594848633, 'learning_rate': 7e-07, 'epoch': 0.05}


  0%|          | 51/25000 [01:15<11:31:08,  1.66s/it]

KeyboardInterrupt: 

In [None]:
eval_results = trainer.evaluate()
print(f"Validation Loss: {eval_results['eval_loss']:.4f}")
print(f"Validation Accuracy: {eval_results['eval_accuracy']:.4f}")
print(f"Validation Precision: {eval_results['eval_precision']:.4f}")
print(f"Validation Recall: {eval_results['eval_recall']:.4f}")
print(f"Validation F1 Score: {eval_results['eval_f1']:.4f}")

Validation Loss: 0.7770
Validation Accuracy: 0.7426
Validation Precision: 0.7466
Validation Recall: 0.7426
Validation F1 Score: 0.7435


In [None]:
model.save_pretrained('./mobilenet_v2_affectnethq-fer2013_model')
feature_extractor.save_pretrained('./mobilenet_v2_affectnethq-fer2013_model')

['./resnet_50 _affectnethq-fer2013_model/preprocessor_config.json']

In [None]:
from transformers import pipeline

In [None]:
with torch.device("cuda"):
    model = AutoModelForImageClassification.from_pretrained('./mobilenet_v2_affectnethq-fer2013_model',
                                            torch_dtype=torch.float16,)
    feature_extractor = AutoFeatureExtractor.from_pretrained('./mobilenet_v2_affectnethq-fer2013_model', torch_dtype=torch.float16)

    # Create the pipeline
    classifier = pipeline('image-classification', model=model, feature_extractor=feature_extractor, device=0)

    # Perform predictions
    predicted_labels = classifier(dataset['validation']['image'][:5])
    actual_labels = [id2label[label] for label in dataset['validation']['label'][:5]]

    # Print results
    print(predicted_labels)
    print(actual_labels)

KeyError: 7