In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
data_dir = "/content/drive/MyDrive/Colab Notebooks/MultiClass_m-20240806T134043Z-001/MultiClass_m"
!pip install datasets transformers torch torchvision scikit-learn


Collecting datasets
  Downloading datasets-2.20.0-py3-none-any.whl.metadata (19 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting requests>=2.32.2 (from datasets)
  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.5.0,>=2023.1.0 (from fsspec[http]<=2024.5.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.5.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-

In [None]:
import os
from PIL import Image
from transformers import ViTFeatureExtractor, ViTForImageClassification, TrainingArguments, Trainer
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms
import numpy as np
from datasets import load_metric

class CustomDataset(Dataset):
    def __init__(self, img_paths, labels, feature_extractor, transform=None):
        self.img_paths = img_paths
        self.labels = labels
        self.feature_extractor = feature_extractor
        self.transform = transform
        self.class_weights = self.compute_class_weights()

    def compute_class_weights(self):
        class_weights = compute_class_weight('balanced', classes=np.unique(self.labels), y=self.labels)
        return torch.tensor(class_weights, dtype=torch.float)

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        inputs = self.feature_extractor(images=image, return_tensors="pt")
        return {"pixel_values": inputs['pixel_values'].squeeze(), "labels": torch.tensor(label)}


# Data augmentation and transformation
from transformers import ViTFeatureExtractor
from sklearn.model_selection import train_test_split

# Data augmentation and transformation
transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
    transforms.RandomAffine(0, shear=10, scale=(0.8, 1.2)),
    transforms.RandomPerspective(distortion_scale=0.2),
    transforms.ToTensor(),
])


# Load dataset paths and labels
data_dir = "/content/drive/MyDrive/Colab Notebooks/MultiClass_m-20240806T134043Z-001/MultiClass_m"
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')

img_paths = []
labels = []
classes = sorted(os.listdir(data_dir))
class_to_idx = {classes[i]: i for i in range(len(classes))}

for label in classes:
    class_dir = os.path.join(data_dir, label)
    for img_name in os.listdir(class_dir):
        img_paths.append(os.path.join(class_dir, img_name))
        labels.append(class_to_idx[label])

# Split dataset into train, test, and validation sets (80%, 10%, 10%)
train_paths, test_paths, train_labels, test_labels = train_test_split(img_paths, labels, test_size=0.2, stratify=labels)
val_paths, test_paths, val_labels, test_labels = train_test_split(test_paths, test_labels, test_size=0.5, stratify=test_labels)

train_dataset = CustomDataset(train_paths, train_labels, feature_extractor, transform=transform)
val_dataset = CustomDataset(val_paths, val_labels, feature_extractor, transform=transform)
test_dataset = CustomDataset(test_paths, test_labels, feature_extractor, transform=transform)





In [None]:
import torch.nn as nn

class CustomDataset(Dataset):
    def __init__(self, img_paths, labels, feature_extractor, transform=None):
        self.img_paths = img_paths
        self.labels = labels
        self.feature_extractor = feature_extractor
        self.transform = transform
        self.class_weights = self.compute_class_weights()

    def compute_class_weights(self):
        class_weights = compute_class_weight('balanced', classes=np.unique(self.labels), y=self.labels)
        return torch.tensor(class_weights, dtype=torch.float)

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        inputs = self.feature_extractor(images=image, return_tensors="pt")
        return {"pixel_values": inputs['pixel_values'].squeeze(), "labels": torch.tensor(label)}


In [None]:
from transformers import ViTForImageClassification, ViTConfig
class WeightedViTForImageClassification(ViTForImageClassification):
    def __init__(self, config, class_weights):
        super().__init__(config)
        self.class_weights = class_weights

    def forward(self, pixel_values, labels=None, **kwargs):
        outputs = self.vit(pixel_values, **kwargs)
        logits = self.classifier(outputs.last_hidden_state[:, 0, :])

        loss = None
        if labels is not None:
            loss_fct = FocalLoss(weight=self.class_weights.to(logits.device), gamma=2)
            loss = loss_fct(logits.view(-1, self.config.num_labels), labels.view(-1))

        return (loss, logits) if loss is not None else logits

class FocalLoss(nn.Module):
    def __init__(self, weight=None, gamma=2):
        super(FocalLoss, self).__init__()
        self.weight = weight
        self.gamma = gamma

    def forward(self, input, target):
        ce_loss = nn.CrossEntropyLoss(weight=self.weight)(input, target)
        pt = torch.exp(-ce_loss)
        focal_loss = (1 - pt) ** self.gamma * ce_loss
        return focal_loss.mean()


In [None]:
from transformers import TrainingArguments, Trainer
import numpy as np
from datasets import load_metric
from sklearn.metrics import precision_recall_fscore_support

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=20,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch",
    logging_dir='./logs',
    learning_rate=2e-5,  )


# Define Trainer with evaluation metrics
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(p.label_ids, preds, average='weighted')
    acc = load_metric("accuracy").compute(predictions=preds, references=p.label_ids)['accuracy']
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

# Train and evaluate
trainer.train()
trainer.evaluate()

# Evaluate on the test set
test_results = trainer.evaluate(eval_dataset=test_dataset)
print("Test Results:")
print(f"Accuracy: {test_results['eval_accuracy']}")
print(f"Precision: {test_results['eval_precision']}")
print(f"Recall: {test_results['eval_recall']}")
print(f"F1 Score: {test_results['eval_f1']}")




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,2.335924,0.127907,0.01636,0.127907,0.02901
2,No log,2.297208,0.052326,0.002738,0.052326,0.005204
3,No log,2.232949,0.238372,0.056821,0.238372,0.091768
4,No log,2.242438,0.215116,0.046275,0.215116,0.076166
5,No log,2.22052,0.215116,0.046275,0.215116,0.076166


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Test Results:
Accuracy: 0.21511627906976744
Precision: 0.04627501352082206
Recall: 0.21511627906976744
F1 Score: 0.07616557249360188


In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.utils.class_weight import compute_class_weight
from transformers import ViTForImageClassification, ViTFeatureExtractor, ViTConfig, TrainingArguments, Trainer
import os
!pip install datasets
from PIL import Image
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split
import numpy as np
from datasets import load_metric
from collections import Counter
import random

transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(degrees=30),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
])

class CustomDataset(Dataset):
    def __init__(self, img_paths, labels, feature_extractor, transform=None):
        self.img_paths = img_paths
        self.labels = labels
        self.feature_extractor = feature_extractor
        self.transform = transform
        self.class_weights = self.compute_class_weights()

    def compute_class_weights(self):
        class_weights = compute_class_weight('balanced', classes=np.unique(self.labels), y=self.labels)
        return torch.tensor(class_weights, dtype=torch.float)

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        inputs = self.feature_extractor(images=image, return_tensors="pt")
        return {"pixel_values": inputs['pixel_values'].squeeze(), "labels": torch.tensor(label)}

from google.colab import drive
drive.mount('/content/drive')
data_dir = "/content/drive/MyDrive/Colab Notebooks/MultiClass_m-20240806T134043Z-001/MultiClass_m"
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')

img_paths = []
labels = []
classes = sorted(os.listdir(data_dir))
class_to_idx = {classes[i]: i for i in range(len(classes))}

for label in classes:
    class_dir = os.path.join(data_dir, label)
    for img_name in os.listdir(class_dir):
        img_paths.append(os.path.join(class_dir, img_name))
        labels.append(class_to_idx[label])

def oversample_dataset(img_paths, labels):
    class_counts = Counter(labels)
    max_count = max(class_counts.values())

    new_img_paths = img_paths.copy()
    new_labels = labels.copy()

    for class_label, count in class_counts.items():
        if count < max_count:
            diff = max_count - count
            class_indices = [i for i, label in enumerate(labels) if label == class_label]
            for _ in range(diff):
                idx = random.choice(class_indices)
                new_img_paths.append(img_paths[idx])
                new_labels.append(labels[idx])

    return new_img_paths, new_labels

# Oversample the dataset
oversampled_img_paths, oversampled_labels = oversample_dataset(img_paths, labels)

train_paths, test_paths, train_labels, test_labels = train_test_split(oversampled_img_paths, oversampled_labels, test_size=0.2, stratify=oversampled_labels)
val_paths, test_paths, val_labels, test_labels = train_test_split(test_paths, test_labels, test_size=0.5, stratify=test_labels)

train_dataset = CustomDataset(train_paths, train_labels, feature_extractor, transform=transform)
val_dataset = CustomDataset(val_paths, val_labels, feature_extractor, transform=transform)
test_dataset = CustomDataset(test_paths, test_labels, feature_extractor, transform=transform)

class WeightedViTForImageClassification(ViTForImageClassification):
    def __init__(self, config, class_weights):
        super().__init__(config)
        self.class_weights = class_weights
        for param in self.vit.parameters():
            param.requires_grad = True

    def forward(self, pixel_values, labels=None, **kwargs):
        outputs = self.vit(pixel_values, **kwargs)
        logits = self.classifier(outputs.last_hidden_state[:, 0, :])

        loss = None
        if labels is not None:
            loss_fct = nn.CrossEntropyLoss(weight=self.class_weights.to(logits.device))
            loss = loss_fct(logits.view(-1, self.config.num_labels), labels.view(-1))

        return (loss, logits) if loss is not None else logits

config = ViTConfig.from_pretrained('google/vit-base-patch16-224', num_labels=len(class_to_idx))
model = WeightedViTForImageClassification(config=config, class_weights=train_dataset.class_weights)



Mounted at /content/drive


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/69.7k [00:00<?, ?B/s]

In [None]:
from transformers import ViTForImageClassification, ViTFeatureExtractor, ViTConfig, TrainingArguments, Trainer
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=10,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch",
    logging_dir='./logs',
    learning_rate=1e-4, # FINE TUNE KORSI 3/4 BR
)
from transformers import TrainingArguments, Trainer
import numpy as np
from datasets import load_metric
from sklearn.metrics import precision_recall_fscore_support
# Define Trainer with evaluation metrics
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(p.label_ids, preds, average='weighted')
    acc = load_metric("accuracy").compute(predictions=preds, references=p.label_ids)['accuracy']
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

trainer.train()
trainer.evaluate()

In [None]:
test_results = trainer.evaluate(eval_dataset=test_dataset)
print("Training Results per Epoch:")
print(f"Accuracy: {test_results['eval_accuracy']}")
print(f"Precision: {test_results['eval_precision']}")
print(f"Recall: {test_results['eval_recall']}")
print(f"F1 Score: {test_results['eval_f1']}")


Training Results per Epoch:

 Epoch  Accuracy  Precision   Recall       F1
     1  0.557907   0.526360 0.527465 0.529010
     2  0.582326   0.552738 0.552326 0.495202
     3  0.528372   0.563821 0.538390 0.531790
     4  0.515116   0.546275 0.521511 0.576134
     5  0.575116   0.526275 0.521511 0.576134
     6  0.597907   0.566539 0.574650 0.589051
     7  0.612326   0.582348 0.594524 0.590253
     8  0.628372   0.633845 0.632317 0.631763
     9  0.645116   0.641753 0.622567 0.668139
    10  0.670020   0.626275 0.671446 0.652187

Test Results:
Accuracy : 0.670020
Precision: 0.626275
Recall   : 0.671446
F1 Score : 0.652187
