In [1]:
import os
import json
import random
import argparse
import itertools
import math
import torch
import numpy as np
from torch import nn, optim
from torch.nn import functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR
from torchvision import transforms
from tqdm import tqdm
import matplotlib.pyplot as plt

import utils

from data_utils import WBCdataset_Mask

from transformers import ViTForImageClassification, ViTMAEConfig

from torch.utils.tensorboard import SummaryWriter

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    
def get_WBC_transform():
    data_transforms = []
    data_transforms.append(transforms.Resize((224, 224)))
    data_transforms.append(transforms.ToTensor())
    return transforms.Compose(data_transforms)

def run(device, hps):
    wbc_subset = "wbc100"
    pretrain_options = "pRCC"
    use_mask=True
    
    out_dir = os.path.join(hps.out_dir, f'{wbc_subset}', f'{pretrain_options}')
    if use_mask:
        out_dir = os.path.join(out_dir, 'mask')
    os.makedirs(out_dir, exist_ok = True)
    writer = SummaryWriter(out_dir)
    
    if wbc_subset == "wbc1":
        training_files = hps.WBCdata.training_files_1
    elif wbc_subset == "wbc10":
        training_files = hps.WBCdata.training_files_10
    elif wbc_subset == "wbc50":
        training_files = hps.WBCdata.training_files_50
    else:
        training_files = hps.WBCdata.training_files_100
    
    train_data = WBCdataset_Mask(training_files, hps.WBCdata.label_dict, transform=get_WBC_transform(), use_mask=use_mask, is_train=True)
    valid_data = WBCdataset_Mask(hps.WBCdata.validation_files, hps.WBCdata.label_dict, transform=get_WBC_transform())
    
    label2id = {}
    id2label = {}

    for label in hps.WBCdata.label_dict.keys():
        label2id[label] = hps.WBCdata.label_dict[label]
        id2label[hps.WBCdata.label_dict[label]] = label
    
    if pretrain_options == "pRCC":
        model = ViTForImageClassification.from_pretrained("Mo0310/vitmae_pRCC_80epochs", 
            label2id=label2id,
            id2label=id2label,
            ignore_mismatched_sizes = True,
        ).to(device)
    elif pretrain_options == "facebook":
        model = ViTForImageClassification.from_pretrained("facebook/vit-mae-base", 
            label2id=label2id,
            id2label=id2label,
            ignore_mismatched_sizes = True,
        ).to(device)
    else:
        config = ViTMAEConfig.from_pretrained("facebook/vit-mae-base",
            label2id=label2id,
            id2label=id2label,
            ignore_mismatched_sizes = True)
        model = ViTForImageClassification(config).to(device)
        
    masked_pixel = torch.rand(1).to(device)
    
    train_loader = DataLoader(dataset = train_data, batch_size=hps.finetune.batch_size, shuffle=True)
    valid_loader = DataLoader(dataset = valid_data, batch_size=hps.finetune.batch_size, shuffle=False)
    
    # loss function
    criterion = nn.CrossEntropyLoss()
    # finetune optimizer
    learnable_params = list(model.parameters())
    learnable_params.append(masked_pixel)
    ft_optimizer = optim.AdamW(learnable_params, lr=hps.finetune.learning_rate)
    #ft_optimizer.param_groups.append({'params': masked_pixel })
    # finetune scheduler
    ft_scheduler = optim.lr_scheduler.MultiStepLR(ft_optimizer, milestones=[1, 2], gamma=hps.pretrain.lr_decay)
    #ft_scheduler = StepLR(ft_optimizer, step_size=5, gamma=hps.finetune.lr_decay)
    
    for epoch in range(hps.finetune.epochs):
        train_and_evaluate(device, epoch, model, masked_pixel, criterion, ft_optimizer, ft_scheduler, [train_loader, valid_loader], writer)
        
    return model
    

def train_and_evaluate(device, epoch, model, masked_pixel, criterion, optimizer, scheduler, loaders, writer):
    train_loader, valid_loader = loaders
    epoch_loss = 0
    epoch_accuracy = 0

    for data, label, mask in tqdm(train_loader):
        optimizer.zero_grad()
        
        data = data.to(device)
        mask = mask.to(device)
        label = label.to(device)
        
        p = torch.rand(1).item()
        if p > (1/(epoch + 1)):
            data = data * mask + masked_pixel * (1 - mask)

        output = model(data)
        loss = criterion(output.logits, label)
        
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        
        optimizer.step()

        acc = (output.logits.argmax(dim=1) == label).float().mean()
        epoch_accuracy += acc / len(train_loader)
        epoch_loss += loss / len(train_loader)

    with torch.no_grad():
        model.eval()
        epoch_val_accuracy = 0
        epoch_val_loss = 0
        for data, label, mask in valid_loader:
            data = data.to(device)
            label = label.to(device)

            val_output = model(data)
            val_loss = criterion(val_output.logits, label)

            acc = (val_output.logits.argmax(dim=1) == label).float().mean()
            epoch_val_accuracy += acc / len(valid_loader)
            epoch_val_loss += val_loss / len(valid_loader)
        model.train()
        
    scheduler.step()
    
    writer.add_scalar('./Loss/train', epoch_loss, epoch+1)
    writer.add_scalar('./ACC/train', epoch_accuracy, epoch+1)
    writer.add_scalar('./Loss/val', epoch_val_loss, epoch+1)
    writer.add_scalar('./ACC/val', epoch_val_accuracy, epoch+1)
    print(
        f"Epoch : {epoch+1} - loss : {epoch_loss:.4f} - acc: {epoch_accuracy:.4f} - val_loss : {epoch_val_loss:.4f} - val_acc: {epoch_val_accuracy:.4f}\n"
    )

  from .autonotebook import tqdm as notebook_tqdm
2023-10-23 06:55:27.701075: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-10-23 06:55:27.752619: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
hps = utils.get_hparams_from_file('./configs/base.json')
seed_everything(hps.seed)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

model = run(device, hps)

You are using a model of type vit_mae to instantiate a model of type vit. This is not supported for all configurations of models and can yield errors.
Some weights of ViTForImageClassification were not initialized from the model checkpoint at Mo0310/vitmae_pRCC_80epochs and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 132/132 [02:09<00:00,  1.02it/s]


Epoch : 1 - loss : 0.2341 - acc: 0.9177 - val_loss : 0.2257 - val_acc: 0.9306



100%|██████████| 132/132 [02:07<00:00,  1.03it/s]


Epoch : 2 - loss : 0.0578 - acc: 0.9815 - val_loss : 0.0558 - val_acc: 0.9832



100%|██████████| 132/132 [02:06<00:00,  1.04it/s]


Epoch : 3 - loss : 0.0247 - acc: 0.9929 - val_loss : 0.0444 - val_acc: 0.9878



100%|██████████| 132/132 [02:06<00:00,  1.04it/s]


Epoch : 4 - loss : 0.0145 - acc: 0.9947 - val_loss : 0.0613 - val_acc: 0.9832



100%|██████████| 132/132 [02:06<00:00,  1.04it/s]


Epoch : 5 - loss : 0.0114 - acc: 0.9963 - val_loss : 0.0489 - val_acc: 0.9838



100%|██████████| 132/132 [02:05<00:00,  1.05it/s]


Epoch : 6 - loss : 0.0103 - acc: 0.9969 - val_loss : 0.0585 - val_acc: 0.9844



100%|██████████| 132/132 [02:07<00:00,  1.03it/s]


Epoch : 7 - loss : 0.0056 - acc: 0.9985 - val_loss : 0.0679 - val_acc: 0.9850



100%|██████████| 132/132 [02:06<00:00,  1.04it/s]


Epoch : 8 - loss : 0.0097 - acc: 0.9970 - val_loss : 0.0436 - val_acc: 0.9902



100%|██████████| 132/132 [02:05<00:00,  1.05it/s]


Epoch : 9 - loss : 0.0043 - acc: 0.9985 - val_loss : 0.0625 - val_acc: 0.9861



100%|██████████| 132/132 [02:06<00:00,  1.04it/s]


Epoch : 10 - loss : 0.0036 - acc: 0.9987 - val_loss : 0.0837 - val_acc: 0.9826



In [3]:
token = "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"
model.push_to_hub("5242_w_pRCC_wbc100_mask", token=token)

pytorch_model.bin:   0%|          | 0.00/343M [00:00<?, ?B/s]'(MaxRetryError("HTTPSConnectionPool(host='hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com', port=443): Max retries exceeded with url: /repos/6e/bd/6ebdc123f5e327462b8fadcf0a374e7ab79b10aa5f047ca5c215db42c66495cf/9ca25b4c240189238a485f6566a71b2a15b5d146c3cfd3cb1024aac37a8be3cc?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQFN2FTF47%2F20231022%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231022T231819Z&X-Amz-Expires=86400&X-Amz-Signature=58d75334f3646120f7cd5e19a6b3fba4b37179d728c7cee691e1dad91832a3a8&X-Amz-SignedHeaders=host&partNumber=1&uploadId=MV4YsfW_EZHebY2fxktqZBKkTyKUyBzWNMGFRhsttRqN0CzdFkDgsCDJgdSuRomROaX5RiTD2aMnckrjIdZ03EyEeBnVhgg7enHtcqXZTE7XWtHNQwfUbSufy_zsr355&x-id=UploadPart (Caused by ProxyError('Cannot connect to proxy.', ConnectionResetError(104, 'Connection reset by peer')))"), '(Request ID: 3b345549-2a28-4501-9df5-9d9968b5fca8)')' thrown while requesting

CommitInfo(commit_url='https://huggingface.co/Mo0310/5242_w_pRCC_wbc100_mask/commit/6456145d2b5eaa9836647bdb31499de4364ce0c8', commit_message='Upload ViTForImageClassification', commit_description='', oid='6456145d2b5eaa9836647bdb31499de4364ce0c8', pr_url=None, pr_revision=None, pr_num=None)

In [4]:
print(model)

ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTAttention(
            (attention): ViTSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_features=7