In [1]:
from dataset_functions import from_path_to_dataloader
import torch
import torch.nn as nn
import torch.nn.functional as F
import transformers
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
#plot   
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#create the dataloaders
path_train = './chaoyang-data/train'
path_test = './chaoyang-data/test'
batch_size = 16

train_dataloader = from_path_to_dataloader(path_train, batch_size, True, True)
test_dataloader = from_path_to_dataloader(path_test, batch_size, False, False)

#split the train dataset into train and validation
train_size = int(0.9 * len(train_dataloader.dataset))
val_size = len(train_dataloader.dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(train_dataloader.dataset, [train_size, val_size])
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

#use the GPU if available

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#use huggingface's pretrained model for image classification vit-base-patch16-224
#model = transformers.ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k', num_labels=1000)
# use huggingface's pretrained model for image classification swin-base-patch4-window7-224
from transformers import AutoImageProcessor, AutoModelForImageClassification, AutoConfig
path_swin="microsoft/swinv2-base-patch4-window16-256"
path_vit="google/vit-base-patch16-384"
config = AutoConfig.from_pretrained(path_vit )
config.num_labels = 4
model = AutoModelForImageClassification.from_pretrained(path_vit)
#initialize the weights of models using kaiming normal
#model.init_weights()
#change the number of output classes
model.classifier = nn.Linear(768, 4)
model.config = config
#model.classifier = nn.Linear(1024, 4)
model.to(device)
#use the AdamW optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
#use the cross entropy loss
criterion = nn.CrossEntropyLoss()


In [3]:
from tqdm import tqdm
#import classification_report from sklearn
from sklearn.metrics import classification_report
def evaluate(model, val_dataloader, criterion):
    #initialize the loss and the number of correct predictions
    val_loss = 0
    correct = 0
    predictions=[]
    labels=[]
    #for each batch
    with torch.no_grad():
        for data, target in tqdm(val_dataloader):
            #send the data and the target to the GPU
            images, target = data.to(device), target.to(device)
            images['pixel_values']=images['pixel_values'].to(torch.float32)
            s = images['pixel_values'].shape
            images['pixel_values']=images['pixel_values'].view(s[0],s[-3],s[-2],s[-1])
            #forward pass
            output = model(**images)
            #compute the loss
            val_loss += criterion(output.logits, target).item()
            #compute the number of correct predictions
            pred = output.logits.argmax(dim=1, keepdim=True)
            #append the predictions
            predictions.extend(pred.cpu().numpy().tolist())
            #append the labels
            labels.extend(target.cpu().numpy().tolist())

    #compute the average loss
    val_loss /= len(val_dataloader.dataset)
    #return the average loss and the number of correct predictions
    #print the classification report
    print(classification_report(labels, predictions))
    return val_loss, correct

In [4]:

#train the model
output_path = './models/vit_384_finetuning/'
def train(model, train_dataloader, val_dataloader, optimizer, criterion, epochs):
    #set the model in training mode
    model.train()
    train_losses = []
    val_losses = []
    #for each epoch
    for epoch in range(epochs):
        train_losses_inner = []
        loss=0
        #for each batch
        for (data, target) in tqdm(train_dataloader):
            #send the data and the target to the GPU
            images, target = data.to(device), target.to(device)
            images['pixel_values']=images['pixel_values'].to(torch.float32)
            s = images['pixel_values'].shape
            images['pixel_values']=images['pixel_values'].view(s[0],s[-3],s[-2],s[-1])
            #forward pass
            output = model(**images)
            optimizer.zero_grad()
            loss = criterion(output.logits, target)
            train_losses_inner.append(loss.cpu().detach().numpy())
            loss += loss.item()
            loss.backward()
            optimizer.step()
        train_losses.append(np.mean(train_losses_inner))
        #evaluate the model on the validation set
        print('Epoch: ', epoch)
        print('Validation set:')
        val_loss, val_accuracy = evaluate(model, val_dataloader, criterion)
        val_losses.append(val_loss)
        print('Test set:')
        test_loss, test_accuracy = evaluate(model, test_dataloader, criterion)
        #if the path doesn't exist, create it
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        #save the model
        #torch.save(model.state_dict(), output_path + 'model_' + str(epoch) + '.pth')
        model.save_pretrained(output_path + 'model_' + str(epoch) + '.pth')
    #plot the train and validation losses after transferring to cpu
    

    print(train_losses)
    plt.plot(train_losses, label='train loss')
    plt.plot(val_losses, label='validation loss')
    plt.legend()
    plt.show()
    

        

In [5]:

#train the model
#train(model, val_dataloader, val_dataloader, optimizer, criterion, 20)

train(model, train_dataloader, val_dataloader, optimizer, criterion, 50)

  9%|▉         | 29/316 [00:35<05:16,  1.10s/it]