In [1]:
import torch
from torch.utils.data import DataLoader
from transformers import AdamW, ViTImageProcessor, ViTForImageClassification
from NWRD_dataset import NWRD
from tqdm import tqdm
import numpy as np
import torch.nn.functional as F
import os
import torch.optim as optim
from torchvision import transforms


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
# If you are using CUDA, set this for further deterministic behavior
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
    # Below settings are recommended for deterministic behavior when using specific convolution operations,
    # but may reduce performance
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
CUDA_LAUNCH_BLOCKING=1
TORCH_USE_CUDA_DSA=1
print(device)

cpu


In [4]:
transformations = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize the image to 224x224
    transforms.ToTensor()            # Convert the image to a PyTorch tensor
])

In [5]:
train_ds = NWRD(root_dir="C:\\Users\\hasee\\Desktop\\Germany_2024\\Dataset\\NWRDprocessed\\train\\calssification", train=True, transform=transformations)
val_ds = NWRD(root_dir="C:\\Users\\hasee\\Desktop\\Germany_2024\\Dataset\\NWRDprocessed\\val\\calssification", train=False, transform=transformations)
                                                                            
train_loader = DataLoader(train_ds, batch_size=8, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=8, shuffle=True)

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\hasee\\Desktop\\Germany_2024\\Dataset\\NWRDprocessed\\train\\calssification/rust'

In [6]:
mean = [0.485, 0.456, 0.406]  # Mean values for RGB channels
std = [0.229, 0.224, 0.225]   # Standard deviation values for RGB channels
#processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224',transform={'mean': mean, 'std': std})
processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
# processor.image_mean=mean
# processor.image_std=std
#print(processor)

In [8]:
model.classifier = torch.nn.Linear(model.config.hidden_size, 2)
model.to(device)

ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTSdpaAttention(
            (attention): ViTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_fe

Finetuning of the model based on pretraining weights.

In [8]:
# model_weights = torch.load('/home/Hirra/coding_files/crossvit/weights/wandb_vit_base_final_med_val_NWRD_epoch_50_lr_0.000000001_wd_0.001_batch_size_8_unaugmented_unequlaized/49.pth')
# model.load_state_dict(model_weights.state_dict())

In [9]:
optimizer = optim.SGD(model.parameters(), lr=0.00000003, weight_decay=0.001)
criterion = torch.nn.CrossEntropyLoss()
weights_directory = 'wandb_vit_base_final_for_time_NWRD_epoch_50_lr_0.000000003_wd_0.001_batch_size_8_unaugmented_training'
weight_loc = f"weights/{weights_directory}"
os.makedirs(weight_loc, exist_ok=True)

In [10]:
import wandb, os
#wandb.login()
wandb.login(key="4e8a21c26ae61cced8d70053c80bbe1b112fec12")
#4e8a21c26ae61cced8d70053c80bbe1b112fec12

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mgptautomated[0m ([33mtukl_labwork[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\hasee\.netrc


True

In [11]:
%env WANDB_PROJECT=crossvit_rust_classifier_new
os.environ["WANDB_PROJECT"] = "<crossvit>"
os.environ["WANDB_REPORT_TO"] = "wandb"

env: WANDB_PROJECT=crossvit_rust_classifier_new


In [12]:
wandb.init()

best_epoch = {}
train_losses = []
for epoch in range(50):
    model.train
    train_losses=[]
    loop = tqdm(enumerate(train_loader), total=len(train_loader))
    for batch_idx, (images, labels) in loop:
        inputs = processor(images=images, return_tensors="pt", do_rescale=False).to(device)
        labels = labels.to(device)

        outputs = model(**inputs)
        logits = outputs.logits
        predication = logits.argmax(axis=1)
        
        # print("logits", logits)
        # print("prediction", predication)
        # print("labels", labels)
        
        loss = criterion(logits, labels)
        train_losses.append(loss.item())
        loss.backward()
        optimizer.step()
        loop.set_description(f"Epoch {epoch} train Loss {np.mean(train_losses):.4f}")


    print("Epoch "+str(epoch)+" Train Loss "+str(np.mean(train_losses)))
    torch.save(model, weight_loc+'/{}.pth'.format(epoch))
    wandb.log({"train_loss": np.mean(train_losses), "epoch": epoch})

    #validation
    optimizer.zero_grad()
    model.eval
    val_losses=[]

    loop = tqdm(enumerate(val_loader), total=len(val_loader))
    with torch.no_grad():
        for batch_idx, (images, labels) in loop:
            inputs = processor(images=images, return_tensors="pt", do_rescale=False).to(device)
            labels = labels.to(device)

            outputs = model(**inputs)
            logits = outputs.logits
            
            loss = criterion(logits, labels)
            val_losses.append(loss.item())

            predication = logits.argmax(axis=1)

            loss = criterion(logits, labels)
            val_losses.append(loss.item())
        
            loop.set_description(f"Epoch {epoch} Val Loss {np.mean(val_losses):.4f}")
    wandb.log({"val_loss": np.mean(val_losses), "epoch": epoch})
torch.cuda.empty_cache()


  context_layer = torch.nn.functional.scaled_dot_product_attention(
Epoch 0 train Loss 0.6551:  21%|██        | 51/241 [00:27<01:42,  1.85it/s]


KeyboardInterrupt: 