In [None]:
!pip install datasets

In [4]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [5]:
import glob
path = '/content/drive/MyDrive/ML_final/'
print(glob.glob(path+'data/*')[:5])

['/content/drive/MyDrive/ML_final/data/cat_1284.png', '/content/drive/MyDrive/ML_final/data/cat_1285.png', '/content/drive/MyDrive/ML_final/data/cat_1287.png', '/content/drive/MyDrive/ML_final/data/cat_1286.png', '/content/drive/MyDrive/ML_final/data/cat_1290.png']


In [7]:
from transformers import TrainingArguments, Trainer, ViTFeatureExtractor, ViTForImageClassification
import torch
from PIL import Image
import pandas as pd
import datasets
from datasets import Dataset

In [27]:
fname = 'dataset.csv'
label_map = {'neg': 0, 'neu': 1, 'pos': 2}

def preprocess(fname):
  df = pd.read_csv(path + fname)
  df['image'] = df['image'].apply(lambda x: path + 'data/' + x)
  df['label'] = df['label'].map(label_map)
  return df

def gen(df):
  for i, row in df.iterrows():
    yield {'image': Image.open(row['image']).convert('RGB'), 'label': row['label']}

df = preprocess(fname)
dataset = Dataset.from_generator(lambda: gen(df))

Generating train split: 0 examples [00:00, ? examples/s]

In [28]:
print(dataset)

Dataset({
    features: ['image', 'label'],
    num_rows: 100
})


In [25]:
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')

def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = feature_extractor([x for x in example_batch['image']], return_tensors='pt')

    # Don't forget to include the labels!
    inputs['label'] = example_batch['label']
    return inputs

def shuffle(dataset):
    dataset = dataset.with_transform(transform)
    return dataset.shuffle(seed=42)

def load_model():
    """ Load a text model for classifiying num_labels """
    model = ViTForImageClassification.from_pretrained(
        'google/vit-base-patch16-224-in21k',
        num_labels=3,
    )
    return model

def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'label': torch.tensor([x['label'] for x in batch])
    }

@torch.no_grad()
def get_predictions(m, data):
    m.eval()
    m.to('cpu')

    predictions = []
    data = collate_fn(data)
    batchSize = 20
    for batch_idx in range(0, len(data['pixel_values']), batchSize):
        input = data['pixel_values'][batch_idx:batch_idx+batchSize]
        predictions += list(torch.argmax(m(input).logits, dim=-1).numpy())

    return torch.tensor(predictions), data['label']


In [29]:
dataset = shuffle(dataset)
model = load_model()

preds, labels = get_predictions(model, dataset)
accuracy = (preds == labels).float().mean().item()
print(f'initial accuracy: {accuracy * 100:.2f}%')

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


initial accuracy: 43.00%


In [31]:
from sklearn.metrics import classification_report

#print(preds, labels)
y_pred = preds.tolist()
y_true = labels.tolist()

label_names = ['neg', 'neu', 'pos']

print(classification_report(y_true, y_pred, target_names=label_names))

              precision    recall  f1-score   support

         neg       0.61      0.41      0.49        46
         neu       0.33      0.04      0.07        24
         pos       0.35      0.77      0.48        30

    accuracy                           0.43       100
   macro avg       0.43      0.41      0.35       100
weighted avg       0.47      0.43      0.39       100

