In [1]:
from transformers import ViTFeatureExtractor, ViTForImageClassification
import os
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset, Subset
from sklearn import model_selection, metrics
import cv2
from PIL import Image
import random

In [2]:
transform_train = transforms.Compose([
         transforms.Resize((384, 384)),
         transforms.ToTensor(),
         transforms.Normalize(mean=[0.5, 0.5, 0.5], 
                              std=[0.5, 0.5, 0.5]),
])

In [3]:
train_data_path = '../input/d/datasets/mostovik71/vagonetkidata/vagonetkinew/train'
train_data = datasets.ImageFolder(train_data_path, transform=transform_train)


In [4]:
def seed_everything(seed = 1234):
     random.seed(seed)
     
     os.environ['PYTHONHASHSEED'] = str(seed)
     
     np.random.seed(seed)     
     
     torch.manual_seed(seed)
     
     torch.cuda.manual_seed(seed)
     
     torch.backends.cudnn.deterministic = True

In [5]:
seed_everything()

In [10]:
batch_size = 8
train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=0)
#val_dataloader = DataLoader(valid_data, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=0)

In [15]:
def train_model(model, loss, optimizer, scheduler, num_epochs):
    dataloader = train_dataloader
    for epoch in range(num_epochs):
            print('Epoch {}/{}:'.format(epoch, num_epochs - 1), flush=True)
       
            running_loss = 0.
            running_acc = 0.

            
            for inputs, labels in tqdm(dataloader):
                inputs = inputs.to(device)
                labels = labels.to(device) 
                optimizer.zero_grad()
                preds = model(inputs).logits
                loss_value = loss(preds, labels)
                preds_class = preds.argmax(dim=1)
                loss_value.backward()
                optimizer.step()
                running_loss += loss_value.item()
                running_acc += (preds_class == labels.data).float().mean()

            epoch_loss = running_loss / len(dataloader)
            epoch_acc = running_acc / len(dataloader)

            print(epoch_loss, epoch_acc)

    return model

In [None]:
model = ViTForImageClassification.from_pretrained('google/vit-base-patch32-384', output_hidden_states=True)
for param in model.parameters():
    param.requires_grad = False
model.classifier = torch.nn.Linear(model.classifier.in_features, 2)
device = 'cuda:0'
model.to(device)

In [None]:
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
train_model(model, loss, optimizer, scheduler, num_epochs=7)

In [27]:
class MyDataset(Dataset):
    def __init__(self, image_paths, transform):
        self.image_paths = image_paths
        self.transform = transform
        
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        
        image_filepath = self.image_paths[idx]
        
        image = cv2.imread('../input/d/datasets/mostovik71/vagonetkidata/vagonetkinew/test/'+image_filepath)
    
        image = Image.fromarray(image)
        image = self.transform(image)
       
        return image, image_filepath

In [28]:
test = os.listdir('../input/d/datasets/mostovik71/vagonetkidata/vagonetkinew/test')

In [29]:
test_dataset = MyDataset(test, transform_train)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

In [None]:
model.eval()

test_predictions = []
test_img_paths = []
for inputs, paths in tqdm(test_dataloader):
    print(inputs)
    inputs = inputs.to(device)
    #labels = labels.to(device)
    with torch.set_grad_enabled(False):
        preds = model(inputs).logits
    test_predictions.append(
        torch.nn.functional.softmax(preds, dim=1)[:,1].data.cpu().numpy())
    test_img_paths.extend(paths)
    
test_predictions = np.concatenate(test_predictions)

In [33]:
mean = np.array([0.5, 0.5, 0.5])
std = np.array([0.5, 0.5, 0.5])

In [34]:
def show_input(input_tensor, title=''):
    image = input_tensor.permute(1, 2, 0).numpy()
    image = std * image + mean
    plt.imshow(image)
    plt.title(title)
    plt.show()
    plt.pause(0.001)

In [36]:
inputs, labels = next(iter(test_dataloader))

for img, pred in zip(inputs, test_predictions):
    show_input(img, title=pred)