In [None]:
!pip install datasets



In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
import glob
path = '/content/drive/MyDrive/ML_final/'
print(glob.glob(path+'data/*')[:5])

['/content/drive/MyDrive/ML_final/data/cat_1284.png', '/content/drive/MyDrive/ML_final/data/cat_1285.png', '/content/drive/MyDrive/ML_final/data/cat_1287.png', '/content/drive/MyDrive/ML_final/data/cat_1286.png', '/content/drive/MyDrive/ML_final/data/cat_1290.png']


In [None]:
from transformers import TrainingArguments, Trainer, ViTFeatureExtractor, ViTForImageClassification, TrainerCallback
import torch
from PIL import Image
import pandas as pd
import numpy as np
import datasets
from datasets import Dataset

In [None]:
fname = 'dataset.csv'
label_map = {'neg': 0, 'neu': 1, 'pos': 2}

def preprocess(fname):
    df = pd.read_csv(path + fname)
    df['image'] = df['image'].apply(lambda x: path + 'data/' + x)
    df['label'] = df['label'].map(label_map)
    df['label'] = df['label'].astype(int)
    return df

def gen(df):
  for i, row in df.iterrows():
    yield {'image': Image.open(row['image']).convert('RGB'), 'labels': row['label']}

df = preprocess(fname)
dataset = Dataset.from_generator(lambda: gen(df))

In [None]:
print(dataset)
print(dataset[0])

Dataset({
    features: ['image', 'labels'],
    num_rows: 500
})
{'image': <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1446x1660 at 0x7BEB27689410>, 'labels': 2}


In [None]:
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')

def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = feature_extractor([x for x in example_batch['image']], return_tensors='pt')

    # Don't forget to include the labels!
    inputs['labels'] = example_batch['labels']
    return inputs

def shuffle_split(dataset):
    dataset = dataset.with_transform(transform).train_test_split(test_size=0.2, seed=23)
    return dataset['train'], dataset['test']

def load_model():
    """ Load a text model for classifiying num_labels """
    model = ViTForImageClassification.from_pretrained(
        'google/vit-base-patch16-224-in21k',
        num_labels=3,
    )
    return model

def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }


class EpochReportCallback(TrainerCallback):
    def __init__(self):
        self.epoch_losses = []
        self.current_epoch_losses = []

    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs is not None and "loss" in logs:
            self.current_epoch_losses.append(logs["loss"])

    def on_epoch_end(self, args, state, control, **kwargs):
        if self.current_epoch_losses:
            avg_loss = np.mean(self.current_epoch_losses)
            self.epoch_losses.append(avg_loss)
            print(f"Epoch {state.epoch}: Average loss = {avg_loss:.4f}")
            self.current_epoch_losses = []

def train_model(model, train_data):
    """ Train text model for classification """
    feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
    training_args = TrainingArguments(
          output_dir="./vit-base-cat-demo",
          per_device_train_batch_size=32,
          report_to="none",
          num_train_epochs=7,
          save_steps=100,
          logging_steps=100,
          logging_strategy="epoch",
          learning_rate=2e-4,
          save_total_limit=2,
          remove_unused_columns=False,
          push_to_hub=False,
        )

    epoch_reporter = EpochReportCallback()

    trainer = Trainer(
        model=model,
        args=training_args,
        data_collator=collate_fn,
        train_dataset=train_data,
        tokenizer=feature_extractor,
        callbacks=[epoch_reporter],
    )

    trainer.train()

    return epoch_reporter.epoch_losses

@torch.no_grad()
def get_predictions(m, data):
    m.eval()
    m.to('cpu')

    predictions = []
    data = collate_fn(data)
    batchSize = 20
    for batch_idx in range(0, len(data['pixel_values']), batchSize):
        input = data['pixel_values'][batch_idx:batch_idx+batchSize]
        predictions += list(torch.argmax(m(input).logits, dim=-1).numpy())

    return torch.tensor(predictions), data['labels']


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
from sklearn.metrics import classification_report

train_data, test_data = shuffle_split(dataset)
model = load_model()

preds, labels = get_predictions(model, test_data)
accuracy = (preds == labels).float().mean().item()
print(f'initial accuracy: {accuracy * 100:.2f}%')

#print(preds, labels)
y_pred = preds.tolist()
y_true = labels.tolist()

label_names = ['neg', 'neu', 'pos']

print(classification_report(y_true, y_pred, target_names=label_names))

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


initial accuracy: 40.00%
              precision    recall  f1-score   support

         neg       0.45      0.39      0.42        36
         neu       0.40      0.64      0.50        39
         pos       0.14      0.04      0.06        25

    accuracy                           0.40       100
   macro avg       0.33      0.36      0.33       100
weighted avg       0.36      0.40      0.36       100



In [None]:
example = train_data[0]
print(example['pixel_values'].shape)
print(example['labels'])

torch.Size([3, 224, 224])
2


In [None]:
train_model(model, train_data)

preds, labels = get_predictions(model, test_data)
accuracy = (preds == labels).float().mean().item()
print(f'final accuracy: {accuracy * 100:.2f}%')

#print(preds, labels)
y_pred = preds.tolist()
y_true = labels.tolist()

label_names = ['neg', 'neu', 'pos']

print(classification_report(y_true, y_pred, target_names=label_names))

  trainer = Trainer(


Step,Training Loss
13,1.0888
26,0.9011
39,0.5955
52,0.2584
65,0.1129
78,0.0635
91,0.0514


Epoch 2.0: Average loss = 1.0888
Epoch 3.0: Average loss = 0.9011
Epoch 4.0: Average loss = 0.5955
Epoch 5.0: Average loss = 0.2584
Epoch 6.0: Average loss = 0.1129
Epoch 7.0: Average loss = 0.0635
final accuracy: 55.00%
              precision    recall  f1-score   support

         neg       0.57      0.67      0.62        36
         neu       0.55      0.62      0.58        39
         pos       0.50      0.28      0.36        25

    accuracy                           0.55       100
   macro avg       0.54      0.52      0.52       100
weighted avg       0.54      0.55      0.54       100

