In [None]:
!pip install -q git+https://github.com/huggingface/peft.git transformers bitsandbytes datasets accelerate
!pip install -i https://pypi.org/simple/ bitsandbytes
!pip install evaluate

In [2]:
import warnings
warnings.filterwarnings(action='ignore')

In [None]:
from datasets import load_dataset
import torch
from PIL import Image
from torch.utils.data import DataLoader
from tqdm import tqdm
import pickle
from transformers import AutoProcessor, Blip2ForConditionalGeneration
from peft import prepare_model_for_kbit_training

processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", 
                                                 device_map="auto", )
     
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    target_modules=["q_proj", "k_proj"]
)

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, config)
model.print_trainable_parameters()

In [4]:
import os
from shutil import copyfile

# setting for kaggle format
lib_PATH = '/kaggle/input/vizwiz-dataset'

from os import listdir
from os.path import isfile, join
lib_files = [f for f in listdir(lib_PATH) if isfile(join(lib_PATH, f))]

for lib_f in lib_files:
    copyfile(src = os.path.join(lib_PATH, lib_f), 
             dst = os.path.join("../working", lib_f))

# import all our functions
from preprocessing import *
from prepare_data import *
from vqa import *

#-------------------------------download VIZWIZ dataset--------------------------#

vizwiz_data, VIZWIZ_TRAIN_PATH, VIZWIZ_VALIDATION_PATH = load_dataset_vizwiz("/kaggle/input/vizwiz")

vizwiz_train_dataset = VQADataset(dataset=vizwiz_data['train'],
                        processor=processor,
                        img_path=VIZWIZ_TRAIN_PATH)
vizwiz_valid_dataset = VQADataset(dataset=vizwiz_data['valid'],
                        processor=processor,
                        img_path=VIZWIZ_VALIDATION_PATH)


#-------------------------------download KVQA dataset--------------------------#

kvqa_data, KVQA_TRAIN_PATH, KVQA_VALIDATION_PATH = load_dataset_kvqa("/kaggle/input/vqa-blind-ko")

kvqa_train_dataset = VQADataset(dataset=kvqa_data['train'],
                        processor=processor,
                        img_path=KVQA_TRAIN_PATH)
kvqa_valid_dataset = VQADataset(dataset=kvqa_data['valid'],
                        processor=processor,
                        img_path=KVQA_VALIDATION_PATH)

loading dataset into memory...
0:00:06.067868
Training sets: 491479->20000 - Validating set: 70211->2000


In [5]:
!python -c "from huggingface_hub.hf_api import HfFolder; \
        HfFolder.save_token('__YOUR/HUGGINGFACE/TOKEN/HERE__')"
!wandb login __YOUR/WANDB/TOKEN/HERE__

import wandb

wandb.init(
    project="finetuning_kvqa_vizwiz_QA",
)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


[34m[1mwandb[0m: Currently logged in as: [33msooh-j[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [6]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from transformers import TrainingArguments

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
    }

vizwiz_training_args = TrainingArguments(
    output_dir='../results',          # output directory
    num_train_epochs=1,              # total number of training epochs
    per_device_train_batch_size=1,   # batch size per device during training
    per_device_eval_batch_size=1,   # batch size for evaluation
    warmup_steps=200,               # number of warmup steps for learning rate scheduler
    logging_dir='../logs',            # directory for storing logs
    logging_steps=2000,               # How often to print logs
    do_train=True,                   # Perform trainingwandb
    fp16=True,                       # Use mixed precision
    fp16_opt_level="02",             # mixed precision mode
    run_name="blip-2_QA_finetuning-VIZWIZ",       # experiment name
    seed=3,                           # Seed for experiment reproducibility 3x3,
)

KVQA_training_args = TrainingArguments(
    output_dir='../results',          # output directory
    num_train_epochs=1,              # total number of training epochs
    per_device_train_batch_size=1,   # batch size per device during training
    per_device_eval_batch_size=1,   # batch size for evaluation
    warmup_steps=200,               # number of warmup steps for learning rate scheduler
    logging_dir='../logs',            # directory for storing logs
    logging_steps=2000,               # How often to print logs
    do_train=True,                   # Perform trainingwandb
    fp16=True,                       # Use mixed precision
    fp16_opt_level="02",             # mixed precision mode
    run_name="blip-2_QA_finetuning-KVQA",       # experiment name
    seed=3,                           # Seed for experiment reproducibility 3x3,
    save_safetensors=False
)

In [None]:
#-------------------------------finetuning with VIZWIZ dataset--------------------------#

#initialize Trainer
vizwiz_trainer = Trainer(
    model=model,
    args=vizwiz_training_args,
    train_dataset=vizwiz_train_dataset,
    eval_dataset=vizwiz_valid_dataset,
    compute_metrics=compute_metrics,
)

vizwiz_trainer.train()

import torch
import torch.nn as nn

model_vizwiz = model.merge_and_unload()
# push to the hub
model_vizwiz.push_to_hub("VQA-for-blind")

In [None]:
#-------------------------------finetuning with KVQA dataset--------------------------#

#initialize Trainer
kvqa_trainer = Trainer(
    model=model,
    args=KVQA_training_args,
    train_dataset=kvqa_train_dataset,
    eval_dataset=kvqa_valid_dataset,
    compute_metrics=compute_metrics,
)

kvqa_trainer.train()

model = model.merge_and_unload()

model.push_to_hub("VQA-for-blind")