In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torch.utils.data.dataset import random_split 
from transformers import ViTFeatureExtractor,  ViTForImageClassification
from torch.utils.data import TensorDataset
from datasets import load_dataset, load_from_disk, Dataset
import torch.optim as optim
import torch.nn as nn
import json
import pandas as pd
import numpy as np
import transformers
import evaluate



In [2]:
#Initialise Cuda and check that Cuda is available
device = torch.device("cuda")
print(device)
print(torch.cuda.is_available())

cuda
False


In [5]:
#Set model path and create feature_extractor from pretrained Google Model

model_name_or_path = 'google/vit-base-patch16-224-in21k'
feature_extractor = ViTFeatureExtractor.from_pretrained(model_name_or_path)



In [6]:
#Load Dataset
ds = load_dataset('data/', data_dir='val')


Resolving data files:   0%|          | 0/100000 [00:00<?, ?it/s]

Found cached dataset imagefolder (C:/Users/felix/.cache/huggingface/datasets/imagefolder/data-042cfdaaf3375786/0.0.0/37fbb85cc714a338bea574ac6c7d0b5be5aff46c1862c1989b20e0771199e93f)


  0%|          | 0/1 [00:00<?, ?it/s]

In [178]:
#Open Json File used to remap annotation label to Class
with open('data/val.json') as json_file:
    json_data = json.load(json_file)


In [179]:
#Iterates through Json File returning the new annotation of class for each index in dataset
cat_ids = list([i['category_id'] for i in json_data['annotations'][:]])

labels = []


for i in cat_ids:
    labels.append(json_data['categories'][i]['class'])
    
# cat_labels = list([json_data['categories'][i]['class'] for i in cats_ids])
cats = pd.Series(labels, dtype='category').cat.codes
print(cats[0:10])
print(labels[0:10])

0     8
1     8
2     1
3     8
4    21
5     8
6    30
7     1
8    30
9    32
dtype: int8
['Aves', 'Aves', 'Agaricomycetes', 'Aves', 'Gastropoda', 'Aves', 'Liliopsida', 'Agaricomycetes', 'Liliopsida', 'Magnoliopsida']


In [182]:
#Remove old label and add new label then reshuffle dataset for sampling
ds['train'] = ds['train'].remove_columns('labels')
ds['train'] = ds['train'].add_column(column=cats, name='labels')



In [183]:
x = list(cats.value_counts().loc[lambda x : x>1000].keys())
include_index = [i for i, j in enumerate(cats) if j in x]
valid_images = ds['train'].select(include_index)

In [184]:
include = [j for i, j in enumerate(cats) if j in x]
print(len(include))
print(len(set(include)))

95530
12


In [185]:
print(set(include))

{32, 1, 34, 2, 0, 4, 8, 42, 45, 21, 26, 30}


In [186]:
def select_indexes(lst, lst_index):
    idx_dict = {}
    for i, val in enumerate(lst):
        if val not in idx_dict:
            idx_dict[val] = []
        elif len(idx_dict[val]) < 1000:
            idx_dict[val].append(lst_index[i])
        elif all(len(v) == 1000 for v in idx_dict.values()):
            break
    return idx_dict

sample = []
for key, value in select_indexes(include, include_index).items():
    print(key, len(value))
    sample += value

print(len(sample))

8 1000
1 1000
21 1000
30 1000
32 1000
34 1000
2 1000
45 1000
26 1000
0 1000
42 1000
4 1000
12000


In [238]:
model_ds = ds['train'].select(sample)


In [241]:
s = pd.Series(include)
st = s.iloc[sample].astype('category').cat.codes

# model_ds = model_ds.remove_columns('labels')
model_ds = model_ds.add_column(column=st, name='labels')

Flattening the indices:   0%|          | 0/12000 [00:00<?, ? examples/s]

In [246]:
print(model_ds[0])

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=500x281 at 0x208EFB7A770>, 'labels': 4}


In [247]:
def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = feature_extractor([x for x in example_batch['image']], return_tensors='pt')

    # Don't forget to include the labels!
    inputs['labels'] = example_batch['labels']
    return inputs



In [248]:
model_ds = model_ds.with_transform(transform)

In [249]:
model_ds = model_ds.train_test_split(test_size=0.2, shuffle=True)
ds_valid = model_ds['test'].train_test_split(test_size=0.5)
model_ds['valid'] = ds_valid['test']
model_ds['test'] = ds_valid['train']

In [None]:
print(model_ds['test'][0:50]['labels'])
train_counter = pd.DataFrame(model_ds['train'])
test_counter = pd.DataFrame(model_ds['test'])
valid_counter = pd.DataFrame(model_ds['valid'])

print(train_counter['labels'].value_counts())
print(test_counter['labels'].value_counts())
print(valid_counter['labels'].value_counts())


# plt.bar(list(set(train_counter['label'])), height=train_counter['label'].value_counts(), log=True)
# plt.bar(list(set(test_counter['label'].value_counts())), height=test_counter['label'].value_counts(), log=True)
# plt.bar(list(set(valid_counter['label'].value_counts())), height=valid_counter['label'].value_counts(), log=True)

# print(max(cats))

In [250]:
def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch]),
    }

In [251]:
metric = evaluate.load("accuracy")

def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)


In [252]:
model = ViTForImageClassification.from_pretrained(
    model_name_or_path,
    num_labels=12
)


loading configuration file config.json from cache at C:\Users\felix/.cache\huggingface\hub\models--google--vit-base-patch16-224-in21k\snapshots\7cbdb7ee3a6bcdf99dae654893f66519c480a0f8\config.json
Model config ViTConfig {
  "_name_or_path": "google/vit-base-patch16-224-in21k",
  "architectures": [
    "ViTModel"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10",
    "11": "LABEL_11"
  },
  "image_size": 224,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_10": 10,
    "LABEL_11": 11,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6,
    "LABEL_7": 7,

In [253]:
from transformers import TrainingArguments

training_args = TrainingArguments(
  output_dir="./vit-base",
  per_device_train_batch_size=16,
  evaluation_strategy="steps",
  num_train_epochs=4,
  fp16=False,
  save_steps=100,
  eval_steps=100,
  logging_steps=10,
  learning_rate=2e-4,
  save_total_limit=2,
  remove_unused_columns=False,
  push_to_hub=False,
  report_to='tensorboard',
  load_best_model_at_end=True,
)


PyTorch: setting up devices


In [254]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,  
    compute_metrics=compute_metrics,
    train_dataset=model_ds["train"],
    eval_dataset=model_ds["valid"],
    tokenizer=feature_extractor,
)


In [255]:
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

***** Running training *****
  Num examples = 9600
  Num Epochs = 4
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 2400
  Number of trainable parameters = 85807884


Step,Training Loss,Validation Loss


KeyboardInterrupt: 

In [None]:
evaluator = trainer.evaluate(model_ds['valid'])
predictor = trainer.predict(model_ds['test'])

predictions = [np.argmax(i) for i in predictor.predictions]
print(evaluator)
print(predictions)


# trainer.log_metrics("train", metrics)
# trainer.save_metrics("train", metrics)
# trainer.save_metrics("eval", metrics["eval_f1", "eval_accuracy"])



In [None]:
print(model_ds['test'][:10]['label'])