## Run pre-trained model on a sample image

In [14]:
from transformers import ViTImageProcessor, TFViTForImageClassification
from PIL import Image
import tensorflow as tf

img_path = r'TRAIN_IMAGES\activity_category\train\Out of Home Media\212345_00102046-002_164547-PT-Xmas-D15-KV-1276x1790-px.jpg'
image = Image.open(img_path)

feature_extractor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224', 
                                                        proxies={'https': 'proxy-ir.intel.com:912'})
model = TFViTForImageClassification.from_pretrained('google/vit-base-patch16-224', 
                                                  proxies={'https': 'proxy-ir.intel.com:912'})

inputs = feature_extractor(images=image, return_tensors="tf")
outputs = model(**inputs)
logits = outputs.logits
# model predicts one of the 1000 ImageNet classes
predicted_class_idx = tf.math.argmax(logits, -1).numpy()[0]
print("Predicted class:", model.config.id2label[predicted_class_idx])


All model checkpoint layers were used when initializing TFViTForImageClassification.

All the layers of TFViTForImageClassification were initialized from the model checkpoint at google/vit-base-patch16-224.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTForImageClassification for predictions without further training.


Predicted class: web site, website, internet site, site


## Fine tune pretrained model

Initialise the model and feature extractor

In [10]:
from transformers import ViTFeatureExtractor

model_name_or_path = 'google/vit-base-patch16-224-in21k'
feature_extractor = ViTFeatureExtractor.from_pretrained(model_name_or_path, proxies={'https': 'proxy-ir.intel.com:912'})



In [2]:
from transformers import ViTImageProcessor, TFViTForImageClassification
from PIL import Image
import tensorflow as tf
import pandas as pd
import os
import numpy as np

stat_df = pd.read_csv("data/TRAIN_images_metadata.csv")

stat_df = stat_df.head(20)

def process_image(image_file):
    img_pil = Image.open(os.path.join("I:/TRAIN_IMAGES/", image_file)).convert("RGB")
    # print(os.path.join("I:/TRAIN_IMAGES/", image_file))
    inp_img_enc = feature_extractor(img_pil, return_tensors='tf')
    return inp_img_enc['pixel_values']

stat_df['pixel_values'] = stat_df['image_name'].map(process_image)


In [3]:
import evaluate
import numpy as np

label_col = 'POA_attribution'

train_data = stat_df[['pixel_values', label_col]].loc[:14]

valid_data = stat_df[['pixel_values', label_col]].loc[15:]

def collate_fn(batch):
    return {
        'pixel_values': tf.stack([x['pixel_values'] for x in batch]),
        'labels': tf.tensor([x['labels'] for x in batch])
    }



In [4]:
from sklearn.metrics import accuracy_score

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy_score(y_true = labels, y_pred = predictions)

In [11]:
from transformers import TFViTForImageClassification
from src.util import get_label_map

poa_labels = get_label_map()[label_col]

model = TFViTForImageClassification.from_pretrained(
    model_name_or_path,
    num_labels=len(poa_labels),
    id2label={v : k for k,v in poa_labels.items()},
    label2id = poa_labels,
    proxies={'https': 'proxy-ir.intel.com:912'}
)


from transformers import TrainingArguments

training_args = TrainingArguments(
  output_dir="./vit-base-AIE-sample",
  per_device_train_batch_size=16,
  evaluation_strategy="steps",
  num_train_epochs=10,
  fp16=True,
  save_steps=100,
  eval_steps=100,
  logging_steps=10,
  learning_rate=1e-6,
  save_total_limit=2,
  remove_unused_columns=False,
  push_to_hub=False,
  report_to='tensorboard',
  load_best_model_at_end=True,
)


from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=train_data,
    eval_dataset=valid_data,
    tokenizer=feature_extractor,
)

Some layers from the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing TFViTForImageClassification: ['vit/pooler/dense/kernel:0', 'vit/pooler/dense/bias:0']
- This IS expected if you are initializing TFViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ImportError: 
Trainer requires the PyTorch library but it was not found in your environment.
However, we were able to find a TensorFlow installation. TensorFlow classes begin
with "TF", but are otherwise identically named to our PyTorch classes. This
means that the TF equivalent of the class you tried to import would be "TFTrainer".
If you want to use TensorFlow, please use TF classes instead!

If you really do want to use PyTorch please go to
https://pytorch.org/get-started/locally/ and follow the instructions that
match your environment.


In [None]:
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

In [None]:
metrics = trainer.evaluate(valid_data)
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

In [4]:
import torch
from pynvml import *


def print_gpu_utilization():
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print(f"GPU memory occupied: {info.used//1024**2} MB.")
torch.ones((1, 1)).to("cuda")
print_gpu_utilization()
torch.cuda.empty_cache()
print_gpu_utilization()

GPU memory occupied: 5631 MB.
GPU memory occupied: 5629 MB.


In [5]:
import torch

print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.current_device())
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))



True
1
0
<torch.cuda.device object at 0x7fa7ffa0a310>
Tesla V100-SXM2-16GB


In [5]:
from transformers import ViTImageProcessor, ViTForImageClassification, ViTConfig
from PIL import Image
import torch

img_path = r'../TRAIN_IMAGES/POA_attribution/validation/Yes/101727_00113894-001_POA - 06022022.png'

model_path = "../vit-base-aie-15k/POA_attribution/checkpoint-800"

image = Image.open(img_path).convert("RGB")
# fe_config = ViTConfig.from_pretrained('../test-vit-base/POA_attribution/preprocessor_config.json')
feature_extractor = ViTImageProcessor.from_pretrained(model_path)

# m_config = ViTConfig.from_pretrained('../test-vit-base/POA_attribution/')
model = ViTForImageClassification.from_pretrained(model_path)
model = model.to('cuda')
inputs = feature_extractor(images=image, return_tensors="pt")
inputs.to('cuda')
outputs = model(**inputs)
logits = outputs.logits
logits = logits.to('cpu')
# model predicts one of the 1000 ImageNet classes
predicted_class_idx = torch.argmax(logits, -1).numpy()[0]
print("Predicted class:", predicted_class_idx)

Predicted class: 0


In [22]:
from torchvision import datasets
import torch


image_dir = "../TRAIN_IMAGES/POA_attribution"
def transform_image(self, image_files):
    inputs = self.feature_ext(
        [x.convert("RGB") for x in image_files["image"]], return_tensors="pt"
    )
    return inputs

def collate_fn(examples):
    pixel_values = torch.stack([example["pixel_values"] for example in examples])
    return {"pixel_values": pixel_values}

prediction_ds = datasets.ImageFolder(image_dir)
pred_ds = prediction_ds.with_transform(transform_image)
pred_loader = torch.utils.data.DataLoader(prediction_ds, batch_size = 32, shuffle=False, num_workers=4, collate_fn=collate_fn)

for inputs in pred_loader:
    print(inputs)

AttributeError: 'ImageFolder' object has no attribute 'with_transform'

In [2]:
import os
os.getcwd()

'/home/jovyan/team3/MSOAInternGang/experiments'

In [1]:
os.chdir("/home/jovyan/team3/MSOAInternGang/")
from src.train import TrainModel

tm = TrainModel(
        model_name=model_name, label_col=label_col, output_dir="vit-base-aie-15k"
    )
trm = tm.train()
tstm = tm.test()
return (trm, tstm)

ImportError: attempted relative import with no known parent package