In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from mlxtend.plotting import plot_confusion_matrix
from sklearn.model_selection import train_test_split
import os
from torchvision import transforms

In [2]:
data = pd.read_csv('../Khaairi/Data/fer2013_clean.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33325 entries, 0 to 33324
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   pixels   33325 non-null  object
 1   emotion  33325 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 520.8+ KB


In [3]:
# Pertama, pisahkan data train (90%) dan validation (10%)
data_train, data_test = train_test_split(data, test_size=0.1, stratify=data['emotion'], random_state=42)
data_train, data_val = train_test_split(data_train, test_size=0.1, stratify=data_train['emotion'], random_state=42)

# Cek ukuran masing-masing set untuk memastikan proporsi
print(f'Train set size: {len(data_train)}')
print(f'Validation set size: {len(data_val)}')
print(f'Test set size: {len(data_test)}')

Train set size: 26992
Validation set size: 3000
Test set size: 3333


In [4]:
from datasets import Dataset, DatasetDict

train_dataset = Dataset.from_pandas(data_train)
val_dataset = Dataset.from_pandas(data_val)
test_dataset = Dataset.from_pandas(data_test)

# Membuat DatasetDict
dataset = DatasetDict({
    'train': train_dataset,
    'validation': val_dataset,
    'test': test_dataset
})

print(dataset)

  from .autonotebook import tqdm as notebook_tqdm


DatasetDict({
    train: Dataset({
        features: ['pixels', 'emotion', '__index_level_0__'],
        num_rows: 26992
    })
    validation: Dataset({
        features: ['pixels', 'emotion', '__index_level_0__'],
        num_rows: 3000
    })
    test: Dataset({
        features: ['pixels', 'emotion', '__index_level_0__'],
        num_rows: 3333
    })
})


In [5]:
labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

# Mapping dari label ke ID (label2id)
label2id = {c: idx for idx, c in enumerate(labels)}

# Mapping dari ID ke label (id2label)
id2label = {idx: c for idx, c in enumerate(labels)}

print("label2id:", label2id)
print("id2label:", id2label)

label2id: {'Angry': 0, 'Disgust': 1, 'Fear': 2, 'Happy': 3, 'Sad': 4, 'Surprise': 5, 'Neutral': 6}
id2label: {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Sad', 5: 'Surprise', 6: 'Neutral'}


In [6]:
from transformers import AutoImageProcessor, ViTForImageClassification

processor = AutoImageProcessor.from_pretrained('google/vit-base-patch16-224')
processor

Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


ViTImageProcessor {
  "do_normalize": true,
  "do_rescale": true,
  "do_resize": true,
  "image_mean": [
    0.5,
    0.5,
    0.5
  ],
  "image_processor_type": "ViTImageProcessor",
  "image_std": [
    0.5,
    0.5,
    0.5
  ],
  "resample": 2,
  "rescale_factor": 0.00392156862745098,
  "size": {
    "height": 224,
    "width": 224
  }
}

In [7]:
data_augmentation = transforms.Compose([
    transforms.RandomHorizontalFlip(),  # Randomly flip horizontally
    transforms.RandomRotation(10),     # Randomly rotate by 10 degrees
    transforms.RandomResizedCrop(
        size=224,  # Output size
        scale=(0.8, 1.0)  # Range of the random crop size relative to the input size
    ),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),  # Adjust brightness
]) 

In [8]:
from PIL import Image

def string_to_image(image_pixels):
    # Ubah string piksel menjadi array numpy
    pixels = np.array([int(pixel) for pixel in image_pixels.split()], dtype=np.uint8)
    # Bentuk ulang array menjadi gambar 48x48 (sesuaikan dengan resolusi gambar Anda)
    image = pixels.reshape(48, 48)
    # Ubah menjadi gambar RGB
    image = Image.fromarray(image).convert('RGB')
    return image

In [9]:
def transforms(batch):
    # Ubah string piksel menjadi gambar RGB
    batch['pixels'] = [string_to_image(x) for x in batch['pixels']]
    # Apply data augmentation
    batch['pixels'] = [data_augmentation(image) for image in batch['pixels']]
    # Proses gambar dengan tokenizer/processor
    inputs = processor(batch['pixels'], return_tensors='pt')
    # Buat label yang sesuai dengan ID label
    inputs['labels'] = batch['emotion']
    return inputs

In [10]:
processed_dataset = dataset.with_transform(transforms)

In [11]:
import torch

def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

In [12]:
import evaluate

# Memuat metrik
accuracy = evaluate.load('accuracy')
f1 = evaluate.load('f1')

# Fungsi untuk menghitung metrik
def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=1)

    # Menghitung accuracy
    accuracy_score = accuracy.compute(predictions=predictions, references=labels)

    # Menghitung F1 score
    f1_score = f1.compute(predictions=predictions, references=labels, average='weighted')

    # Menggabungkan kedua metrik
    metrics = {
        'accuracy': accuracy_score['accuracy'],
        'f1': f1_score['f1']
    }

    return metrics

In [13]:
vit = ViTForImageClassification.from_pretrained(
    'google/vit-base-patch16-224',
    num_labels = len(labels),
    id2label = id2label,
    label2id = label2id,
    ignore_mismatched_sizes = True
)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([7]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
vit

ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTSdpaAttention(
            (attention): ViTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_fe

In [15]:
# for name,p in model.named_parameters():
#     if not name.startswith('classifier'):
#         p.requires_grad = False

In [16]:
num_params = sum([p.numel() for p in vit.parameters()])
trainable_params = sum([p.numel() for p in vit.parameters() if p.requires_grad])

print(f"{num_params = :,} | {trainable_params = :,}")

num_params = 85,804,039 | trainable_params = 85,804,039


In [17]:
from transformers import TrainerCallback, TrainingArguments, Trainer

# Callback untuk menyimpan model terbaik dan loss
class SaveBestModelCallback(TrainerCallback):
    def __init__(self, save_path, metric_name='eval_accuracy'):
        super().__init__()
        self.best_metric = -float('inf')  # Menyimpan metrik terbaik
        self.metric_name = metric_name
        self.save_path = save_path
        self.train_losses = []  # Menyimpan training loss per epoch
        self.eval_losses = []   # Menyimpan validation loss per epoch
        self.accuracies = []  # Menyimpan accuracy per epoch
        os.makedirs(self.save_path, exist_ok=True)
    
    def on_evaluate(self, args, state, control, metrics, **kwargs):
        if 'eval_loss' in metrics:
            self.eval_losses.append(metrics['eval_loss'])
        if 'eval_accuracy' in metrics:
            self.accuracies.append(metrics['eval_accuracy'])
        # Periksa apakah metrik saat ini lebih baik dari yang terbaik
        if self.metric_name in metrics and metrics[self.metric_name] > self.best_metric:
            self.best_metric = metrics[self.metric_name]
            model_path = os.path.join(self.save_path, "pretrained_best_model.pt")
            torch.save(kwargs['model'].state_dict(), model_path)
            print(f"Model terbaik disimpan di {model_path} dengan {self.metric_name}: {self.best_metric:.4f}")
    
    def on_epoch_end(self, args, state, control, **kwargs):
        # Ambil training loss dan accuracy dari log_history
        if state.log_history:
            if "loss" in state.log_history[-1]:
                self.train_losses.append(state.log_history[-1]["loss"])

        # Simpan grafik loss
        loss_path = os.path.join(self.save_path, "pretrained_loss_plot.png")
        plt.figure(figsize=(10, 5))
        plt.plot(range(1, len(self.train_losses) + 1), self.train_losses, label="Training Loss", marker='o')
        if self.eval_losses:
            plt.plot(range(1, len(self.eval_losses) + 1), self.eval_losses, label="Validation Loss", marker='o')
        plt.title("Loss per Epoch")
        plt.xlabel("Epoch")
        plt.ylabel("Loss")
        plt.legend()
        plt.grid(True)
        plt.savefig(loss_path)
        plt.close()
        print(f"Grafik loss disimpan untuk epoch {state.epoch}")
        
        # Simpan grafik accuracy
        accuracy_path = os.path.join(self.save_path, "pretrained_accuracy_plot.png")
        plt.figure(figsize=(10, 5))
        plt.plot(range(1, len(self.accuracies) + 1), self.accuracies, label="Accuracy", marker='o')
        plt.title("Accuracy per Epoch")
        plt.xlabel("Epoch")
        plt.ylabel("Accuracy")
        plt.legend()
        plt.grid(True)
        plt.savefig(accuracy_path)
        plt.close()
        print(f"Grafik accuracy disimpan untuk epoch {state.epoch}")

In [18]:
training_args = TrainingArguments(
    output_dir="../Khaairi/Hasil",
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    evaluation_strategy="epoch",
    save_strategy="no",
    logging_steps=100,
    num_train_epochs=1000,
    learning_rate=3e-4,
    remove_unused_columns=False,
)



In [19]:
trainer = Trainer(
    model=vit,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["validation"],
    tokenizer=processor,
    callbacks=[SaveBestModelCallback(save_path = '../Khaairi/Hasil')]
)

  trainer = Trainer(


In [20]:
trainer.train()



Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.0735,1.089837,0.592667,0.567496
2,0.9693,1.073842,0.589333,0.579468
3,0.8692,0.936562,0.646667,0.645927
4,0.8041,0.910439,0.655667,0.651385
5,0.7457,0.957419,0.650667,0.639485
6,0.6734,0.920236,0.676333,0.673238
7,0.6001,0.950477,0.668667,0.664142
8,0.5286,0.916908,0.678,0.67583
9,0.4638,1.113695,0.658333,0.653353
10,0.4268,1.071978,0.658,0.654745


Grafik loss disimpan untuk epoch 1.0
Grafik accuracy disimpan untuk epoch 1.0
Model terbaik disimpan di ../Khaairi/Hasil/pretrained_best_model.pt dengan eval_accuracy: 0.5927




Grafik loss disimpan untuk epoch 2.0
Grafik accuracy disimpan untuk epoch 2.0




Grafik loss disimpan untuk epoch 3.0
Grafik accuracy disimpan untuk epoch 3.0
Model terbaik disimpan di ../Khaairi/Hasil/pretrained_best_model.pt dengan eval_accuracy: 0.6467




Grafik loss disimpan untuk epoch 4.0
Grafik accuracy disimpan untuk epoch 4.0
Model terbaik disimpan di ../Khaairi/Hasil/pretrained_best_model.pt dengan eval_accuracy: 0.6557




Grafik loss disimpan untuk epoch 5.0
Grafik accuracy disimpan untuk epoch 5.0




Grafik loss disimpan untuk epoch 6.0
Grafik accuracy disimpan untuk epoch 6.0
Model terbaik disimpan di ../Khaairi/Hasil/pretrained_best_model.pt dengan eval_accuracy: 0.6763




Grafik loss disimpan untuk epoch 7.0
Grafik accuracy disimpan untuk epoch 7.0




Grafik loss disimpan untuk epoch 8.0
Grafik accuracy disimpan untuk epoch 8.0
Model terbaik disimpan di ../Khaairi/Hasil/pretrained_best_model.pt dengan eval_accuracy: 0.6780




Grafik loss disimpan untuk epoch 9.0
Grafik accuracy disimpan untuk epoch 9.0




Grafik loss disimpan untuk epoch 10.0
Grafik accuracy disimpan untuk epoch 10.0




Grafik loss disimpan untuk epoch 11.0
Grafik accuracy disimpan untuk epoch 11.0




Grafik loss disimpan untuk epoch 12.0
Grafik accuracy disimpan untuk epoch 12.0




Grafik loss disimpan untuk epoch 13.0
Grafik accuracy disimpan untuk epoch 13.0




Grafik loss disimpan untuk epoch 14.0
Grafik accuracy disimpan untuk epoch 14.0




Grafik loss disimpan untuk epoch 15.0
Grafik accuracy disimpan untuk epoch 15.0
Model terbaik disimpan di ../Khaairi/Hasil/pretrained_best_model.pt dengan eval_accuracy: 0.6813




Grafik loss disimpan untuk epoch 16.0
Grafik accuracy disimpan untuk epoch 16.0




Grafik loss disimpan untuk epoch 17.0
Grafik accuracy disimpan untuk epoch 17.0




Grafik loss disimpan untuk epoch 18.0
Grafik accuracy disimpan untuk epoch 18.0




Grafik loss disimpan untuk epoch 19.0
Grafik accuracy disimpan untuk epoch 19.0




KeyboardInterrupt: 

In [None]:
trainer.evaluate(processed_dataset["test"])

In [None]:
# Melakukan prediksi pada dataset validasi
predictions = trainer.predict(processed_dataset["test"])

# Mendapatkan logits dan label sebenarnya
logits = predictions.predictions
labels = predictions.label_ids

# Mendapatkan prediksi akhir dengan argmax
y_pred = np.argmax(logits, axis=1)
y_true = labels

In [None]:
# Menghitung Confusion Matrix
cm = confusion_matrix(y_true, y_pred)

# Mengatur ukuran gambar
plt.figure(figsize=(12, 10))

# Menampilkan Confusion Matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=list(id2label.values()))
disp.plot(cmap=plt.cm.Blues, values_format='d')

# Mengatur judul dan ukuran font
plt.title("Confusion Matrix", fontsize=18)
plt.xticks(rotation=45, ha='right', fontsize=8)  # Ukuran font lebih kecil untuk sumbu x
plt.yticks(fontsize=8)  # Ukuran font lebih kecil untuk sumbu y
plt.xlabel('Predicted Labels', fontsize=12)
plt.ylabel('True Labels', fontsize=12)

# Tampilkan plot
plt.show()

In [None]:
vit2 = ViTForImageClassification.from_pretrained(
    'google/vit-base-patch16-224',
    num_labels = len(labels),
    id2label = id2label,
    label2id = label2id,
    ignore_mismatched_sizes = True
)

In [None]:
# Load state_dict
model_path = "../Khaairi/Result/pretrained_best_model.pt"
vit2.load_state_dict(torch.load(model_path))

In [None]:
trainer = Trainer(
    model=vit2,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["validation"],
    tokenizer=processor,
    callbacks=[SaveBestModelCallback(save_path = '../Khaairi/Result')]
)

In [None]:
trainer.evaluate(processed_dataset["test"])

In [None]:
# Melakukan prediksi pada dataset validasi
predictions = trainer.predict(processed_dataset["test"])

# Mendapatkan logits dan label sebenarnya
logits = predictions.predictions
labels = predictions.label_ids

# Mendapatkan prediksi akhir dengan argmax
y_pred = np.argmax(logits, axis=1)
y_true = labels

In [None]:
# Menghitung Confusion Matrix
cm = confusion_matrix(y_true, y_pred)

# Mengatur ukuran gambar
plt.figure(figsize=(12, 10))

# Menampilkan Confusion Matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=list(id2label.values()))
disp.plot(cmap=plt.cm.Blues, values_format='d')

# Mengatur judul dan ukuran font
plt.title("Confusion Matrix", fontsize=18)
plt.xticks(rotation=45, ha='right', fontsize=8)  # Ukuran font lebih kecil untuk sumbu x
plt.yticks(fontsize=8)  # Ukuran font lebih kecil untuk sumbu y
plt.xlabel('Predicted Labels', fontsize=12)
plt.ylabel('True Labels', fontsize=12)

# Tampilkan plot
plt.show()