In [1]:
# Imports
import os
import torch
import random
import argparse
import numpy as np
# import accelerate.utils
from pathlib import Path
import torch.backends.mps
import torch.backends.cudnn
from torch.cuda import (
    max_memory_allocated,
    reset_peak_memory_stats,
    reset_max_memory_allocated,
    memory_allocated,
)
from transformers import set_seed
# from accelerate import Accelerator
from distutils.util import strtobool
from model.optimizer import getOptim
from dataloader.logger import get_logger
from traineval.eval import calc_val_loss
from traineval.train import calc_train_loss
from dataloader.model_data import get_model_data
from transformers.utils.logging import (
    set_verbosity_error as transformers_vb_err,
)
from datasets.utils.logging import (
    set_verbosity_error as datasets_vb_err,
)

In [2]:
from bert_arg_parser import parser
args = parser.parse_args('')

In [3]:
args

Namespace(savepath='/scratch/vipul/models', epochs=3, model_name='bert-base-uncased', task_name='cola', sortby='alpha', max_length=128, batch_size=32, learning_rate=2e-05, seed=7, freeze=True, num_layers=0, alpha_ascending=False, slow_tokenizer=True, pad_to_max_length=True, max_train_steps=1000, grad_acc_steps=1, accelerate=False, add_layer_norm=False, verbose=False, debug=True, memlog=False)

In [4]:
args.num_layers = 2
args.verbose = True

In [5]:
# Set Cache Directory
cache_dir = "/scratch/vipul/cache"

# Memory Log Path
mempath = (
    f"/scratch/vipul/GLUE/trainseed_{args.seed}"
    + f"/task_{args.task_name}/{args.sortby}"  # _asc_{args.alpha_ascending}"
)

In [6]:
os.environ["TRANSFORMERS_CACHE"] = cache_dir
cuda_device = torch.cuda.current_device()

# Control randomness
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
# accelerate.utils.set_seed(args.seed)
torch.cuda.manual_seed_all(args.seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
set_seed(args.seed)  # transformers seed

# Memory Stats Initialization
reset_peak_memory_stats(device=cuda_device)
reset_max_memory_allocated(device=cuda_device)
start_memory = memory_allocated(device=cuda_device)

if args.verbose:
    print("SEED:", args.seed)
    task_info = (
        f"\n\n\nTask to finetune: {args.task_name}\n\n\n"
        + f"alpha Decreasing: {not args.alpha_ascending}\n\n\n"
        + f"Layers to train: {args.num_layers}\n\n\n"
        + f"Train randomly: {'random' in args.sortby.lower()}\n\n\n"
    )
    print(task_info)
else:
    datasets_vb_err()
    transformers_vb_err()
    global _tqdm_active
    _tqdm_active = False

device = None
if torch.cuda.is_available():
    device = torch.device("cuda:7")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
# if args.accelerate:
#     device = accelerator.device

SEED: 7



Task to finetune: cola


alpha Decreasing: True


Layers to train: 2


Train randomly: False





In [7]:
from datasets import load_dataset


In [8]:
# Get Model, Data, Optimizer
model, train_dataloader, eval_dataloader = get_model_data(args, cache_dir=cache_dir)
model.to(device)  # type: ignore
optimizer = getOptim(args, model, vary_lyre=False, factor=1)

if args.verbose:
    print(f"Training data size: {len(train_dataloader)}")
    print(f"Validation data size: {len(eval_dataloader)}")

# Accelerator
# if args.accelerate:
#     model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(
#         model, optimizer, train_dataloader, eval_dataloader
#     )

# Get Initial Validation Loss
i_val_loss, i_val_acc = calc_val_loss(args, model, eval_dataloader, device)
if args.verbose:
    print(
        f"\nEpoch 0/{args.epochs}"
        + f"|Val Loss: {i_val_loss:.2f} "
        + f"|Val Acc: {i_val_acc:.2f}"
    )

# Train and get Losses
train_loss, val_loss, val_acc = calc_train_loss(
    args=args,
    model=model,
    optimizer=optimizer,
    device=device,
    train_dataloader=train_dataloader,
    eval_dataloader=eval_dataloader,
)

val_loss = [i_val_loss] + val_loss
val_acc = [i_val_acc] + val_acc
base = {
    "train_loss_base": train_loss,
    "val_loss_base": val_loss,
    "val_acc_base": val_acc,
}

if args.memlog: # Memory Logging
    log_info = (
        f"\n\n{args.task_name} "
        + f"{args.num_layers} Layers "
        + f"{args.sortby} "
        + f"ascending {args.alpha_ascending}"
    )
    end_memory = memory_allocated(device=cuda_device)
    peek_memory = max_memory_allocated(device=cuda_device)
    Path(mempath).mkdir(parents=True, exist_ok=True)
    logger = get_logger(mempath, "memlog.log")
    logger.info(log_info)
    logger.info(
        f"\nMemory usage before: {(start_memory/1024)/1024}MB\n"
        + f"Memory usage after: {(end_memory/1024)/1024}MB"
    )
    logger.info(f"\nPeak Memory usage: {(peek_memory/1024)/1024}MB\n\n")

if args.debug and args.verbose:
    print("\n--> Debug Mode <--")
    print("\nTrain Loss:")
    print(*[train_loss[i] for i in range(0, len(train_loss), args.batch_size)])
    print("\nVal Loss:\n", val_loss)
    print("\nVal Acc:\n", val_acc)
else:
    # Save the data
    Path(args.savepath).mkdir(parents=True, exist_ok=True)
    np.save(os.path.join(args.savepath, "baseline.npy"), base)  # type: ignore




Downloading and preparing dataset None/ax to file:///scratch/vipul/cache/parquet/ax-97eb8006079b1fff/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

TypeError: expected str, bytes or os.PathLike object, not NoneType