In [1]:
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Device: {torch.device('cuda' if torch.cuda.is_available() else 'cpu')}")

CUDA available: True
Device: cuda


In [2]:
!mkdir -p /content/hw4/part-1-code

In [3]:
from google.colab import files
import os

os.chdir('/content/hw4/part-1-code/')
print("Upload main.py, utils.py, and requirements.txt one by one:")
uploaded = files.upload()

Upload main.py, utils.py, and requirements.txt one by one:


Saving main.py to main.py
Saving README.md to README.md
Saving requirements.txt to requirements.txt
Saving utils.py to utils.py


In [4]:
%cd /content/hw4/part-1-code/

!pip install transformers datasets torch tqdm evaluate nltk scikit-learn -q

/content
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import nltk
nltk.download('punkt_tab')
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('omw-1.4')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

In [6]:
%%writefile main.py
import datasets
from datasets import load_dataset
from transformers import AutoTokenizer
from torch.utils.data import DataLoader
from transformers import AutoModelForSequenceClassification
from torch.optim import AdamW
from transformers import get_scheduler
import torch
from tqdm.auto import tqdm
import evaluate
import random
import argparse
from utils import *
import os

# Set seed
random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


# Tokenize the input
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)


# Core training function
def do_train(args, model, train_dataloader, save_dir="./out"):
    optimizer = AdamW(model.parameters(), lr=args.learning_rate)
    num_epochs = args.num_epochs
    num_training_steps = num_epochs * len(train_dataloader)
    lr_scheduler = get_scheduler(
        name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
    )
    model.train()
    progress_bar = tqdm(range(num_training_steps))

    ################################
    ##### YOUR CODE BEGINGS HERE ###

    # Implement the training loop --- make sure to use the optimizer and lr_sceduler (learning rate scheduler)
    # Remember that pytorch uses gradient accumumlation so you need to use zero_grad (https://pytorch.org/tutorials/recipes/recipes/zeroing_out_gradients.html)
    # You can use progress_bar.update(1) to see the progress during training
    # You can refer to the pytorch tutorial covered in class for reference

    for epoch in range(num_epochs):
        for batch in train_dataloader:
            # Move batch to device
            batch = {k: v.to(device) for k, v in batch.items()}

            # Forward pass
            outputs = model(**batch)
            loss = outputs.loss

            # Backward pass
            loss.backward()

            # Update weights
            optimizer.step()
            lr_scheduler.step()

            # Zero gradients for next iteration
            optimizer.zero_grad()

            # Update progress bar
            progress_bar.update(1)

    ##### YOUR CODE ENDS HERE ######

    print("Training completed...")
    print("Saving Model....")
    model.save_pretrained(save_dir)

    return


# Core evaluation function
def do_eval(eval_dataloader, output_dir, out_file):
    model = AutoModelForSequenceClassification.from_pretrained(output_dir)
    model.to(device)
    model.eval()

    metric = evaluate.load("accuracy")
    out_file = open(out_file, "w")

    for batch in tqdm(eval_dataloader):
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)

        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)
        metric.add_batch(predictions=predictions, references=batch["labels"])

        # write to output file
        for pred, label in zip(predictions, batch["labels"]):
                out_file.write(f"{pred.item()}\n")
                out_file.write(f"{label.item()}\n")
    out_file.close()
    score = metric.compute()

    return score


# Created a dataladoer for the augmented training dataset
def create_augmented_dataloader(args, dataset):
    ################################
    ##### YOUR CODE BEGINGS HERE ###

    # Here, 'dataset' is the original dataset. You should return a dataloader called 'train_dataloader' -- this
    # dataloader will be for the original training split augmented with 5k random transformed examples from the training set.
    # You may find it helpful to see how the dataloader was created at other place in this code.

    raise NotImplementedError

    ##### YOUR CODE ENDS HERE ######

    return train_dataloader


# Create a dataloader for the transformed test set
def create_transformed_dataloader(args, dataset, debug_transformation):
    # Print 5 random transformed examples
    if debug_transformation:
        small_dataset = dataset["test"].shuffle(seed=42).select(range(5))
        small_transformed_dataset = small_dataset.map(custom_transform, load_from_cache_file=False)
        for k in range(5):
            print("Original Example ", str(k))
            print(small_dataset[k])
            print("\n")
            print("Transformed Example ", str(k))
            print(small_transformed_dataset[k])
            print('=' * 30)

        exit()

    transformed_dataset = dataset["test"].map(custom_transform, load_from_cache_file=False)
    transformed_tokenized_dataset = transformed_dataset.map(tokenize_function, batched=True, load_from_cache_file=False)
    transformed_tokenized_dataset = transformed_tokenized_dataset.remove_columns(["text"])
    transformed_tokenized_dataset = transformed_tokenized_dataset.rename_column("label", "labels")
    transformed_tokenized_dataset.set_format("torch")

    transformed_val_dataset = transformed_tokenized_dataset
    eval_dataloader = DataLoader(transformed_val_dataset, batch_size=args.batch_size)

    return eval_dataloader


if __name__ == "__main__":

    parser = argparse.ArgumentParser()

    # Arguments
    parser.add_argument("--train", action="store_true", help="train a model on the training data")
    parser.add_argument("--train_augmented", action="store_true", help="train a model on the augmented training data")
    parser.add_argument("--eval", action="store_true", help="evaluate model on the test set")
    parser.add_argument("--eval_transformed", action="store_true", help="evaluate model on the transformed test set")
    parser.add_argument("--model_dir", type=str, default="./out")
    parser.add_argument("--debug_train", action="store_true",
                        help="use a subset for training to debug your training loop")
    parser.add_argument("--debug_transformation", action="store_true",
                        help="print a few transformed examples for debugging")
    parser.add_argument("--learning_rate", type=float, default=5e-5)
    parser.add_argument("--num_epochs", type=int, default=3)
    parser.add_argument("--batch_size", type=int, default=8)

    args = parser.parse_args()

    global device
    global tokenizer

    # Device
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

    # Load the tokenizer
    tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

    # Tokenize the dataset
    dataset = load_dataset("imdb")
    tokenized_dataset = dataset.map(tokenize_function, batched=True)

    # Prepare dataset for use by model
    tokenized_dataset = tokenized_dataset.remove_columns(["text"])
    tokenized_dataset = tokenized_dataset.rename_column("label", "labels")
    tokenized_dataset.set_format("torch")

    small_train_dataset = tokenized_dataset["train"].shuffle(seed=42).select(range(4000))
    small_eval_dataset = tokenized_dataset["test"].shuffle(seed=42).select(range(1000))

    # Create dataloaders for iterating over the dataset
    if args.debug_train:
        train_dataloader = DataLoader(small_train_dataset, shuffle=True, batch_size=args.batch_size)
        eval_dataloader = DataLoader(small_eval_dataset, batch_size=args.batch_size)
        print(f"Debug training...")
        print(f"len(train_dataloader): {len(train_dataloader)}")
        print(f"len(eval_dataloader): {len(eval_dataloader)}")
    else:
        train_dataloader = DataLoader(tokenized_dataset["train"], shuffle=True, batch_size=args.batch_size)
        eval_dataloader = DataLoader(tokenized_dataset["test"], batch_size=args.batch_size)
        print(f"Actual training...")
        print(f"len(train_dataloader): {len(train_dataloader)}")
        print(f"len(eval_dataloader): {len(eval_dataloader)}")

    # Train model on the original training dataset
    if args.train:
        model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)
        model.to(device)
        do_train(args, model, train_dataloader, save_dir="./out")
        # Change eval dir
        args.model_dir = "./out"

    # Train model on the augmented training dataset
    if args.train_augmented:
        train_dataloader = create_augmented_dataloader(args, dataset)
        model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)
        model.to(device)
        do_train(args, model, train_dataloader, save_dir="./out_augmented")
        # Change eval dir
        args.model_dir = "./out_augmented"

    # Evaluate the trained model on the original test dataset
    if args.eval:
        out_file = os.path.basename(os.path.normpath(args.model_dir))
        out_file = out_file + "_original.txt"
        score = do_eval(eval_dataloader, args.model_dir, out_file)
        print("Score: ", score)

    # Evaluate the trained model on the transformed test dataset
    if args.eval_transformed:
        out_file = os.path.basename(os.path.normpath(args.model_dir))
        out_file = out_file + "_transformed.txt"
        eval_transformed_dataloader = create_transformed_dataloader(args, dataset, args.debug_transformation)
        score = do_eval(eval_transformed_dataloader, args.model_dir, out_file)
        print("Score: ", score)


Overwriting main.py


In [7]:
%%writefile utils.py
import datasets
from datasets import load_dataset
from transformers import AutoTokenizer
from torch.utils.data import DataLoader
from transformers import AutoModelForSequenceClassification
from torch.optim import AdamW
from transformers import get_scheduler
import torch
from tqdm.auto import tqdm
import evaluate
import random
import argparse
from nltk.corpus import wordnet
from nltk import word_tokenize
from nltk.tokenize.treebank import TreebankWordDetokenizer

random.seed(0)


def example_transform(example):
    example["text"] = example["text"].lower()
    return example


### Rough guidelines --- typos
# For typos, you can try to simulate nearest keys on the QWERTY keyboard for some of the letter (e.g. vowels)
# You can randomly select each word with some fixed probability, and replace random letters in that word with one of the
# nearest keys on the keyboard. You can vary the random probablity or which letters to use to achieve the desired accuracy.


### Rough guidelines --- synonym replacement
# For synonyms, use can rely on wordnet (already imported here). Wordnet (https://www.nltk.org/howto/wordnet.html) includes
# something called synsets (which stands for synonymous words) and for each of them, lemmas() should give you a possible synonym word.
# You can randomly select each word with some fixed probability to replace by a synonym.


def custom_transform(example):
    ################################
    ##### YOUR CODE BEGINGS HERE ###

    # Design and implement the transformation as mentioned in pdf
    # You are free to implement any transformation but the comments at the top roughly describe
    # how you could implement two of them --- synonym replacement and typos.

    # You should update example["text"] using your transformation

    raise NotImplementedError

    ##### YOUR CODE ENDS HERE ######

    return example


Overwriting utils.py


In [8]:
!python main.py --train --eval --debug_train

2025-11-15 03:04:50.437941: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-15 03:04:50.455528: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763175890.476832    2466 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763175890.483245    2466 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763175890.499451    2466 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [8]:
%cd /content/hw4/part-1-code/

!python main.py --train --eval

/content
2025-11-15 18:52:34.611221: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-15 18:52:34.630510: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763232754.652966    2286 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763232754.659625    2286 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763232754.677122    2286 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid

In [11]:
# Cell 10: Download output file
from google.colab import files

# Download the result file for submission
files.download('/content/hw4/part-1-code/out_original.txt')

# Also save to Google Drive (optional but recommended)
from google.colab import drive
drive.mount('/content/drive')

!cp /content/hw4/part-1-code/out_original.txt /content/drive/MyDrive/

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Mounted at /content/drive


In [9]:
%%writefile utils.py
import datasets
from datasets import load_dataset
from transformers import AutoTokenizer
from torch.utils.data import DataLoader
from transformers import AutoModelForSequenceClassification
from torch.optim import AdamW
from transformers import get_scheduler
import torch
from tqdm.auto import tqdm
import evaluate
import random
import argparse
from nltk.corpus import wordnet
from nltk import word_tokenize
from nltk.tokenize.treebank import TreebankWordDetokenizer

random.seed(0)


def example_transform(example):
    example["text"] = example["text"].lower()
    return example


### Rough guidelines --- typos
# For typos, you can try to simulate nearest keys on the QWERTY keyboard for some of the letter (e.g. vowels)
# You can randomly select each word with some fixed probability, and replace random letters in that word with one of the
# nearest keys on the keyboard. You can vary the random probablity or which letters to use to achieve the desired accuracy.


### Rough guidelines --- synonym replacement
# For synonyms, use can rely on wordnet (already imported here). Wordnet (https://www.nltk.org/howto/wordnet.html) includes
# something called synsets (which stands for synonymous words) and for each of them, lemmas() should give you a possible synonym

# You can randomly select each word with some fixed probability to replace by a synonym.


def custom_transform(example):
    ################################
    ##### YOUR CODE BEGINGS HERE ###

    # Transformation: Synonym Replacement using WordNet
    #
    # Strategy: Randomly replace words with their synonyms from WordNet.
    # For each word in the sentence, with probability 0.3, we replace it
    # with a random synonym if one exists in WordNet.
    #
    # This is reasonable because users naturally use different words to
    # express the same sentiment (e.g., "great" vs "excellent",
    # "movie" vs "film", "bad" vs "terrible").

    text = example["text"]
    words = word_tokenize(text)

    new_words = []
    for word in words:
        # Replace with 30% probability
        if random.random() < 0.3:
            # Get synonyms from WordNet
            synsets = wordnet.synsets(word)
            if synsets:
                # Get all lemmas (synonym words) from all synsets
                lemmas = []
                for syn in synsets:
                    for lemma in syn.lemmas():
                        lemma_name = lemma.name().replace('_', ' ')
                        if lemma_name.lower() != word.lower():
                            lemmas.append(lemma_name)

                # Replace with random synonym if available
                if lemmas:
                    new_words.append(random.choice(lemmas))
                else:
                    new_words.append(word)
            else:
                new_words.append(word)
        else:
            new_words.append(word)

    # Reconstruct the sentence
    detokenizer = TreebankWordDetokenizer()
    example["text"] = detokenizer.detokenize(new_words)

    ##### YOUR CODE ENDS HERE ######

    return example

Overwriting utils.py


In [21]:
%cd /content/hw4/part-1-code/

!python main.py --eval_transformed --debug_transformation

/content
2025-11-15 18:16:32.591301: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-15 18:16:32.609551: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763230592.631260    2936 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763230592.637933    2936 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763230592.655058    2936 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid

In [None]:
%cd /content/hw4/part-1-code/

!python main.py --eval_transformed

/content
2025-11-15 19:26:36.389253: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-15 19:26:36.407481: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763234796.429402   10925 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763234796.436103   10925 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763234796.452815   10925 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid

In [12]:
%cd /content/hw4/part-1-code/

!python main.py --eval_transformed --model_dir out

/content
2025-11-15 19:56:36.025262: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-15 19:56:36.043314: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763236596.065158   18603 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763236596.071785   18603 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763236596.088698   18603 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid

In [32]:
!ls -la /content/hw4/part-1-code/

total 136
drwxr-xr-x 3 root root   4096 Nov 15 18:43 .
drwxr-xr-x 3 root root   4096 Nov 15 18:08 ..
-rw-r--r-- 1 root root   9258 Nov 15 18:36 main.py
-rw-r--r-- 1 root root 100000 Nov 15 18:40 out_original.txt
drwxr-xr-x 2 root root   4096 Nov 15 18:14 __pycache__
-rw-r--r-- 1 root root    383 Nov 15 18:09 README.md
-rw-r--r-- 1 root root     98 Nov 15 18:09 requirements.txt
-rw-r--r-- 1 root root   3077 Nov 15 18:14 utils.py


In [33]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [35]:
!ls -la /content/drive/MyDrive/


total 4082
-rw------- 1 root root 282383 May  4  2025 'Adapted CLIP Model Training and Testing'
drwx------ 2 root root   4096 Mar 10  2025 'Colab Notebooks'
-rw------- 1 root root    169 Jun  4 19:11  Companies.gsheet
-rw------- 1 root root    169 Sep 25 19:19 'given this is an assignment question we are doing....gsheet'
drwx------ 2 root root   4096 Mar 26  2025 'Google AI Studio'
-rw------- 1 root root    169 Sep 18 04:02  Homework1_ComputerVision_Fall2025.gdoc
drwx------ 3 root root   4096 Nov 15 02:57  hw4
-rw------- 1 root root 776307 Jun  2 17:16  M2Lines_Coding_Assignment.ipynb
-rw------- 1 root root    169 Dec  2  2024 'ML project presentation-5.gslides'
-rw------- 1 root root    169 May 20 20:53 'nyu cs.gsheet'
-rw------- 1 root root   3330 Jan  6  2025  NYU_SR_TUNF.pdf
-rw------- 1 root root  35978 Nov  3 21:44 'NYU_transcript_3 (1).pdf'
-rw------- 1 root root  35978 Nov  3 21:44 'NYU_transcript_3 (2).pdf'
-rw------- 1 root root  35978 Nov  3 21:44 'NYU_transcript_3 (3).pdf'


In [36]:
!find /content/drive/MyDrive/ -name "out*" -o -name "*bert*" -o -name "*.safetensors" 2>/dev/null | head -20

/content/drive/MyDrive/out_original.txt


In [37]:
!find /content/drive/MyDrive/ -name "config.json" 2>/dev/null

In [38]:
!find /content/drive/MyDrive/ -type d -name "out" 2>/dev/null

In [11]:
  !ls -la /content/hw4/part-1-code/out/

total 423128
drwxr-xr-x 2 root root      4096 Nov 15 19:18 .
drwxr-xr-x 4 root root      4096 Nov 15 19:28 ..
-rw-r--r-- 1 root root       681 Nov 15 19:18 config.json
-rw-r--r-- 1 root root 433270768 Nov 15 19:18 model.safetensors


In [13]:

# Download the output file for submission
from google.colab import files
files.download('out_transformed.txt')

# Save everything to Google Drive
from google.colab import drive
drive.mount('/content/drive')

!cp -r /content/hw4/part-1-code/out /content/drive/MyDrive/hw4_bert_model
!cp /content/hw4/part-1-code/out_transformed.txt /content/drive/MyDrive/
!cp /content/hw4/part-1-code/out_original.txt /content/drive/MyDrive/
!cp /content/hw4/part-1-code/utils.py /content/drive/MyDrive/
!cp /content/hw4/part-1-code/main.py /content/drive/MyDrive/

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Mounted at /content/drive


In [14]:
%%writefile main.py
import datasets
from datasets import load_dataset
from transformers import AutoTokenizer
from torch.utils.data import DataLoader
from transformers import AutoModelForSequenceClassification
from torch.optim import AdamW
from transformers import get_scheduler
import torch
from tqdm.auto import tqdm
import evaluate
import random
import argparse
from utils import *
import os

# Set seed
random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


# Tokenize the input
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)


# Core training function
def do_train(args, model, train_dataloader, save_dir="./out"):
    optimizer = AdamW(model.parameters(), lr=args.learning_rate)
    num_epochs = args.num_epochs
    num_training_steps = num_epochs * len(train_dataloader)
    lr_scheduler = get_scheduler(
        name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
    )
    model.train()
    progress_bar = tqdm(range(num_training_steps))

    ################################
    ##### YOUR CODE BEGINGS HERE ###

    # Implement the training loop --- make sure to use the optimizer and lr_sceduler (learning rate scheduler)
    # Remember that pytorch uses gradient accumumlation so you need to use zero_grad (https://pytorch.org/tutorials/recipes/recipes/zeroing_out_gradients.html)
    # You can use progress_bar.update(1) to see the progress during training
    # You can refer to the pytorch tutorial covered in class for reference

    for epoch in range(num_epochs):
        for batch in train_dataloader:
            # Move batch to device
            batch = {k: v.to(device) for k, v in batch.items()}

            # Forward pass
            outputs = model(**batch)
            loss = outputs.loss

            # Backward pass
            loss.backward()

            # Update weights
            optimizer.step()
            lr_scheduler.step()

            # Zero gradients for next iteration
            optimizer.zero_grad()

            # Update progress bar
            progress_bar.update(1)

    ##### YOUR CODE ENDS HERE ######

    print("Training completed...")
    print("Saving Model....")
    model.save_pretrained(save_dir)

    return


# Core evaluation function
def do_eval(eval_dataloader, output_dir, out_file):
    model = AutoModelForSequenceClassification.from_pretrained(output_dir)
    model.to(device)
    model.eval()

    metric = evaluate.load("accuracy")
    out_file = open(out_file, "w")

    for batch in tqdm(eval_dataloader):
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)

        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)
        metric.add_batch(predictions=predictions, references=batch["labels"])

        # write to output file
        for pred, label in zip(predictions, batch["labels"]):
                out_file.write(f"{pred.item()}\n")
                out_file.write(f"{label.item()}\n")
    out_file.close()
    score = metric.compute()

    return score


# Created a dataladoer for the augmented training dataset
def create_augmented_dataloader(args, dataset):
    ################################
    ##### YOUR CODE BEGINGS HERE ###

    # Here, 'dataset' is the original dataset. You should return a dataloader called 'train_dataloader' -- this
    # dataloader will be for the original training split augmented with 5k random transformed examples from the training set.
    # You may find it helpful to see how the dataloader was created at other place in this code.

    # Step 1: Get the original training dataset
    train_dataset = dataset["train"]

    # Step 2: Sample 5000 random examples from training set
    # Shuffle and select 5000 examples
    sampled_dataset = train_dataset.shuffle(seed=42).select(range(5000))

    # Step 3: Apply transformation to these 5000 examples
    transformed_dataset = sampled_dataset.map(custom_transform, load_from_cache_file=False)

    # Step 4: Combine original training data with transformed examples
    from datasets import concatenate_datasets
    augmented_dataset = concatenate_datasets([train_dataset, transformed_dataset])

    # Step 5: Tokenize the augmented dataset
    augmented_tokenized = augmented_dataset.map(tokenize_function, batched=True, load_from_cache_file=False)
    augmented_tokenized = augmented_tokenized.remove_columns(["text"])
    augmented_tokenized = augmented_tokenized.rename_column("label", "labels")
    augmented_tokenized.set_format("torch")

    # Step 6: Create dataloader
    train_dataloader = DataLoader(augmented_tokenized, shuffle=True, batch_size=args.batch_size)

    ##### YOUR CODE ENDS HERE ######

    return train_dataloader



# Create a dataloader for the transformed test set
def create_transformed_dataloader(args, dataset, debug_transformation):
    # Print 5 random transformed examples
    if debug_transformation:
        small_dataset = dataset["test"].shuffle(seed=42).select(range(5))
        small_transformed_dataset = small_dataset.map(custom_transform, load_from_cache_file=False)
        for k in range(5):
            print("Original Example ", str(k))
            print(small_dataset[k])
            print("\n")
            print("Transformed Example ", str(k))
            print(small_transformed_dataset[k])
            print('=' * 30)

        exit()

    transformed_dataset = dataset["test"].map(custom_transform, load_from_cache_file=False)
    transformed_tokenized_dataset = transformed_dataset.map(tokenize_function, batched=True, load_from_cache_file=False)
    transformed_tokenized_dataset = transformed_tokenized_dataset.remove_columns(["text"])
    transformed_tokenized_dataset = transformed_tokenized_dataset.rename_column("label", "labels")
    transformed_tokenized_dataset.set_format("torch")

    transformed_val_dataset = transformed_tokenized_dataset
    eval_dataloader = DataLoader(transformed_val_dataset, batch_size=args.batch_size)

    return eval_dataloader


if __name__ == "__main__":

    parser = argparse.ArgumentParser()

    # Arguments
    parser.add_argument("--train", action="store_true", help="train a model on the training data")
    parser.add_argument("--train_augmented", action="store_true", help="train a model on the augmented training data")
    parser.add_argument("--eval", action="store_true", help="evaluate model on the test set")
    parser.add_argument("--eval_transformed", action="store_true", help="evaluate model on the transformed test set")
    parser.add_argument("--model_dir", type=str, default="./out")
    parser.add_argument("--debug_train", action="store_true",
                        help="use a subset for training to debug your training loop")
    parser.add_argument("--debug_transformation", action="store_true",
                        help="print a few transformed examples for debugging")
    parser.add_argument("--learning_rate", type=float, default=5e-5)
    parser.add_argument("--num_epochs", type=int, default=3)
    parser.add_argument("--batch_size", type=int, default=8)

    args = parser.parse_args()

    global device
    global tokenizer

    # Device
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

    # Load the tokenizer
    tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

    # Tokenize the dataset
    dataset = load_dataset("imdb")
    tokenized_dataset = dataset.map(tokenize_function, batched=True)

    # Prepare dataset for use by model
    tokenized_dataset = tokenized_dataset.remove_columns(["text"])
    tokenized_dataset = tokenized_dataset.rename_column("label", "labels")
    tokenized_dataset.set_format("torch")

    small_train_dataset = tokenized_dataset["train"].shuffle(seed=42).select(range(4000))
    small_eval_dataset = tokenized_dataset["test"].shuffle(seed=42).select(range(1000))

    # Create dataloaders for iterating over the dataset
    if args.debug_train:
        train_dataloader = DataLoader(small_train_dataset, shuffle=True, batch_size=args.batch_size)
        eval_dataloader = DataLoader(small_eval_dataset, batch_size=args.batch_size)
        print(f"Debug training...")
        print(f"len(train_dataloader): {len(train_dataloader)}")
        print(f"len(eval_dataloader): {len(eval_dataloader)}")
    else:
        train_dataloader = DataLoader(tokenized_dataset["train"], shuffle=True, batch_size=args.batch_size)
        eval_dataloader = DataLoader(tokenized_dataset["test"], batch_size=args.batch_size)
        print(f"Actual training...")
        print(f"len(train_dataloader): {len(train_dataloader)}")
        print(f"len(eval_dataloader): {len(eval_dataloader)}")

    # Train model on the original training dataset
    if args.train:
        model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)
        model.to(device)
        do_train(args, model, train_dataloader, save_dir="./out")
        # Change eval dir
        args.model_dir = "./out"

    # Train model on the augmented training dataset
    if args.train_augmented:
        train_dataloader = create_augmented_dataloader(args, dataset)
        model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)
        model.to(device)
        do_train(args, model, train_dataloader, save_dir="./out_augmented")
        # Change eval dir
        args.model_dir = "./out_augmented"

    # Evaluate the trained model on the original test dataset
    if args.eval:
        out_file = os.path.basename(os.path.normpath(args.model_dir))
        out_file = out_file + "_original.txt"
        score = do_eval(eval_dataloader, args.model_dir, out_file)
        print("Score: ", score)

    # Evaluate the trained model on the transformed test dataset
    if args.eval_transformed:
        out_file = os.path.basename(os.path.normpath(args.model_dir))
        out_file = out_file + "_transformed.txt"
        eval_transformed_dataloader = create_transformed_dataloader(args, dataset, args.debug_transformation)
        score = do_eval(eval_transformed_dataloader, args.model_dir, out_file)
        print("Score: ", score)


Overwriting main.py


In [15]:
%cd /content/hw4/part-1-code/

!python main.py --train_augmented --eval_transformed

/content
2025-11-15 20:11:04.499852: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-15 20:11:04.517577: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763237464.539006   22480 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763237464.545474   22480 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763237464.561943   22480 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid

In [16]:
# Evaluate the augmented model on original test set (~5 min)
!python main.py --eval --model_dir out_augmented

2025-11-15 20:46:34.670788: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-15 20:46:34.688477: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763239594.709834   31435 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763239594.716271   31435 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763239594.732561   31435 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [17]:
# Download submission files
from google.colab import files

files.download('out_augmented_original.txt')
files.download('out_augmented_transformed.txt')

# Save everything to Google Drive
from google.colab import drive
drive.mount('/content/drive')

!cp -r /content/hw4/part-1-code/out_augmented /content/drive/MyDrive/hw4_bert_augmented_model
!cp out_augmented_original.txt /content/drive/MyDrive/
!cp out_augmented_transformed.txt /content/drive/MyDrive/
!cp main.py /content/drive/MyDrive/
!cp utils.py /content/drive/MyDrive/

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [18]:
  %cd /content

/content


In [4]:
  # Create directory and upload Part 2 files
  !mkdir -p /content/hw4/part-2-code/data
  !mkdir -p /content/hw4/part-2-code/records
  !mkdir -p /content/hw4/part-2-code/results

In [3]:
  !pip install transformers torch nltk datasets -q

  import nltk
  nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [21]:
# Cell 3: Calculate Q4 Statistics
import os
from transformers import T5TokenizerFast
import nltk
from collections import Counter
import numpy as np

# Initialize T5 tokenizer
tokenizer = T5TokenizerFast.from_pretrained('google-t5/t5-small')

def load_lines(file_path):
    """Load lines from a file"""
    with open(file_path, 'r') as f:
        lines = [line.strip() for line in f.readlines()]
    return lines

def calculate_stats_before_preprocessing(nl_path, sql_path, split_name):
    """Calculate statistics before preprocessing (raw text)"""

    # Load data
    nl_queries = load_lines(nl_path)
    sql_queries = load_lines(sql_path)

    # Number of examples
    num_examples = len(nl_queries)

    # Tokenize with NLTK (word-level for raw stats)
    nl_words = [nltk.word_tokenize(q.lower()) for q in nl_queries]
    sql_words = [nltk.word_tokenize(q.lower()) for q in sql_queries]

    # Mean lengths (in words)
    mean_nl_length = np.mean([len(words) for words in nl_words])
    mean_sql_length = np.mean([len(words) for words in sql_words])

    # Vocabulary sizes
    nl_vocab = set()
    for words in nl_words:
        nl_vocab.update(words)

    sql_vocab = set()
    for words in sql_words:
        sql_vocab.update(words)

    print(f"\n{'='*50}")
    print(f"BEFORE PREPROCESSING - {split_name.upper()}")
    print(f"{'='*50}")
    print(f"Number of examples: {num_examples}")
    print(f"Mean sentence length: {mean_nl_length:.2f} words")
    print(f"Mean SQL query length: {mean_sql_length:.2f} words")
    print(f"Vocabulary size (natural language): {len(nl_vocab)}")
    print(f"Vocabulary size (SQL): {len(sql_vocab)}")

    return {
        'num_examples': num_examples,
        'mean_nl_length': mean_nl_length,
        'mean_sql_length': mean_sql_length,
        'nl_vocab_size': len(nl_vocab),
        'sql_vocab_size': len(sql_vocab)
    }

def calculate_stats_after_preprocessing(nl_path, sql_path, split_name):
    """Calculate statistics after T5 tokenization"""

    # Load data
    nl_queries = load_lines(nl_path)
    sql_queries = load_lines(sql_path)

    # Tokenize with T5
    nl_tokenized = [tokenizer.encode(q, add_special_tokens=False) for q in nl_queries]
    sql_tokenized = [tokenizer.encode(q, add_special_tokens=False) for q in sql_queries]

    # Mean lengths (in tokens)
    mean_nl_length = np.mean([len(tokens) for tokens in nl_tokenized])
    mean_sql_length = np.mean([len(tokens) for tokens in sql_tokenized])

    # Vocabulary sizes (unique token IDs used)
    nl_vocab = set()
    for tokens in nl_tokenized:
        nl_vocab.update(tokens)

    sql_vocab = set()
    for tokens in sql_tokenized:
        sql_vocab.update(tokens)

    print(f"\n{'='*50}")
    print(f"AFTER PREPROCESSING (T5) - {split_name.upper()}")
    print(f"{'='*50}")
    print(f"Model name: google-t5/t5-small")
    print(f"Mean sentence length: {mean_nl_length:.2f} tokens")
    print(f"Mean SQL query length: {mean_sql_length:.2f} tokens")
    print(f"Vocabulary size (natural language): {len(nl_vocab)} unique token IDs")
    print(f"Vocabulary size (SQL): {len(sql_vocab)} unique token IDs")

    return {
        'mean_nl_length': mean_nl_length,
        'mean_sql_length': mean_sql_length,
        'nl_vocab_size': len(nl_vocab),
        'sql_vocab_size': len(sql_vocab)
    }

# Change to part-2-code directory
data_dir = '/content/hw4/part-2-code/data'

# Calculate statistics for TRAIN set
print("\n" + "="*60)
print("TABLE 1: BEFORE PREPROCESSING")
print("="*60)

train_before = calculate_stats_before_preprocessing(
    f'{data_dir}/train.nl',
    f'{data_dir}/train.sql',
    'train'
)

dev_before = calculate_stats_before_preprocessing(
    f'{data_dir}/dev.nl',
    f'{data_dir}/dev.sql',
    'dev'
)

# Calculate statistics for AFTER preprocessing
print("\n" + "="*60)
print("TABLE 2: AFTER PREPROCESSING")
print("="*60)

train_after = calculate_stats_after_preprocessing(
    f'{data_dir}/train.nl',
    f'{data_dir}/train.sql',
    'train'
)

dev_after = calculate_stats_after_preprocessing(
    f'{data_dir}/dev.nl',
    f'{data_dir}/dev.sql',
    'dev'
)

# Print formatted tables for report
print("\n" + "="*60)
print("FORMATTED FOR REPORT")
print("="*60)

print("\nTable 1: Data statistics before any pre-processing")
print("-" * 70)
print(f"{'Statistics Name':<40} {'Train':<15} {'Dev':<15}")
print("-" * 70)
print(f"{'Number of examples':<40} {train_before['num_examples']:<15} {dev_before['num_examples']:<15}")
print(f"{'Mean sentence length (words)':<40} {train_before['mean_nl_length']:<15.2f} {dev_before['mean_nl_length']:<15.2f}")
print(f"{'Mean SQL query length (words)':<40} {train_before['mean_sql_length']:<15.2f} {dev_before['mean_sql_length']:<15.2f}")
print(f"{'Vocabulary size (natural language)':<40} {train_before['nl_vocab_size']:<15} {dev_before['nl_vocab_size']:<15}")
print(f"{'Vocabulary size (SQL)':<40} {train_before['sql_vocab_size']:<15} {dev_before['sql_vocab_size']:<15}")

print("\n\nTable 2: Data statistics after pre-processing")
print("-" * 70)
print(f"{'Model name: google-t5/t5-small':<40}")
print("-" * 70)
print(f"{'Statistics Name':<40} {'Train':<15} {'Dev':<15}")
print("-" * 70)
print(f"{'Mean sentence length (tokens)':<40} {train_after['mean_nl_length']:<15.2f} {dev_after['mean_nl_length']:<15.2f}")
print(f"{'Mean SQL query length (tokens)':<40} {train_after['mean_sql_length']:<15.2f} {dev_after['mean_sql_length']:<15.2f}")
print(f"{'Vocabulary size (natural language)':<40} {train_after['nl_vocab_size']:<15} {dev_after['nl_vocab_size']:<15}")
print(f"{'Vocabulary size (SQL)':<40} {train_after['sql_vocab_size']:<15} {dev_after['sql_vocab_size']:<15}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]


TABLE 1: BEFORE PREPROCESSING

BEFORE PREPROCESSING - TRAIN
Number of examples: 4225
Mean sentence length: 11.03 words
Mean SQL query length: 64.81 words
Vocabulary size (natural language): 860
Vocabulary size (SQL): 632

BEFORE PREPROCESSING - DEV
Number of examples: 466
Mean sentence length: 10.98 words
Mean SQL query length: 62.67 words
Vocabulary size (natural language): 442
Vocabulary size (SQL): 387

TABLE 2: AFTER PREPROCESSING

AFTER PREPROCESSING (T5) - TRAIN
Model name: google-t5/t5-small
Mean sentence length: 17.10 tokens
Mean SQL query length: 216.37 tokens
Vocabulary size (natural language): 791 unique token IDs
Vocabulary size (SQL): 555 unique token IDs

AFTER PREPROCESSING (T5) - DEV
Model name: google-t5/t5-small
Mean sentence length: 17.07 tokens
Mean SQL query length: 210.05 tokens
Vocabulary size (natural language): 465 unique token IDs
Vocabulary size (SQL): 395 unique token IDs

FORMATTED FOR REPORT

Table 1: Data statistics before any pre-processing
------------

In [22]:
%%writefile /content/hw4/part-2-code/load_data.py
import os, random, re, string
from collections import Counter
from tqdm import tqdm
import pickle

from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence

import nltk
nltk.download('punkt')
from transformers import T5TokenizerFast
import torch

PAD_IDX = 0

class T5Dataset(Dataset):

    def __init__(self, data_folder, split):
        '''
        Dataset class for T5 model.

        Args:
            data_folder: Path to data directory
            split: 'train', 'dev', or 'test'
        '''
        self.split = split
        self.tokenizer = T5TokenizerFast.from_pretrained('google-t5/t5-small')

        # Process data
        self.data = self.process_data(data_folder, split, self.tokenizer)

    def process_data(self, data_folder, split, tokenizer):
        '''
        Load and tokenize data from .nl and .sql files.
        '''
        # Load natural language queries
        nl_path = os.path.join(data_folder, f'{split}.nl')
        with open(nl_path, 'r') as f:
            nl_queries = [line.strip() for line in f.readlines()]

        # Load SQL queries (except for test set)
        if split != 'test':
            sql_path = os.path.join(data_folder, f'{split}.sql')
            with open(sql_path, 'r') as f:
                sql_queries = [line.strip() for line in f.readlines()]
        else:
            sql_queries = None

        # Tokenize
        data = []
        for i, nl_query in enumerate(nl_queries):
            # Tokenize encoder input (natural language)
            encoder_input = tokenizer.encode(nl_query, add_special_tokens=True)

            if split != 'test':
                # Tokenize decoder input and target (SQL)
                sql_query = sql_queries[i]

                # Decoder input: add BOS token at the beginning
                # We'll use pad_token_id as BOS (T5 convention)
                decoder_input = tokenizer.encode(sql_query, add_special_tokens=False)
                decoder_input = [tokenizer.pad_token_id] + decoder_input

                # Decoder target: SQL tokens (shifted by 1 from decoder_input)
                decoder_target = tokenizer.encode(sql_query, add_special_tokens=True)

                data.append({
                    'encoder_input': torch.tensor(encoder_input, dtype=torch.long),
                    'decoder_input': torch.tensor(decoder_input, dtype=torch.long),
                    'decoder_target': torch.tensor(decoder_target, dtype=torch.long)
                })
            else:
                # Test set: only encoder input
                data.append({
                    'encoder_input': torch.tensor(encoder_input, dtype=torch.long)
                })

        return data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

def normal_collate_fn(batch):
    '''
    Collation function to perform dynamic padding for training and evaluation with the
    development or validation set.

    Inputs:
        * batch (List[Any]): batch is a list of length batch_size, where each index contains what
                             the dataset __getitem__ function returns.

    Returns: To be compatible with the provided training loop, you should be returning
        * encoder_ids: The input ids of shape BxT to be fed into the T5 encoder.
        * encoder_mask: Mask of shape BxT associated with padding tokens in the encoder input
        * decoder_inputs: Decoder input ids of shape BxT' to be fed into T5 decoder.
        * decoder_targets: The target tokens with which to train the decoder (the tokens following each decoder input)
        * initial_decoder_inputs: The very first input token to be decoder (only to be used in evaluation)
    '''
    # Extract from batch
    encoder_inputs = [item['encoder_input'] for item in batch]
    decoder_inputs = [item['decoder_input'] for item in batch]
    decoder_targets = [item['decoder_target'] for item in batch]

    # Pad sequences
    encoder_ids = pad_sequence(encoder_inputs, batch_first=True, padding_value=PAD_IDX)
    decoder_input_ids = pad_sequence(decoder_inputs, batch_first=True, padding_value=PAD_IDX)
    decoder_target_ids = pad_sequence(decoder_targets, batch_first=True, padding_value=PAD_IDX)

    # Create attention mask for encoder (1 for real tokens, 0 for padding)
    encoder_mask = (encoder_ids != PAD_IDX).long()

    # Initial decoder input (just the BOS token)
    initial_decoder_inputs = decoder_input_ids[:, 0:1]  # Shape: (B, 1)

    return encoder_ids, encoder_mask, decoder_input_ids, decoder_target_ids, initial_decoder_inputs

def test_collate_fn(batch):
    '''
    Collation function to perform dynamic padding for inference on the test set.

    Inputs:
        * batch (List[Any]): batch is a list of length batch_size, where each index contains what
                             the dataset __getitem__ function returns.

    Recommended returns:
        * encoder_ids: The input ids of shape BxT to be fed into the T5 encoder.
        * encoder_mask: Mask of shape BxT associated with padding tokens in the encoder input
        * initial_decoder_inputs: The very first input token to be decoder (only to be used in evaluation)
    '''
    # Extract encoder inputs
    encoder_inputs = [item['encoder_input'] for item in batch]

    # Pad sequences
    encoder_ids = pad_sequence(encoder_inputs, batch_first=True, padding_value=PAD_IDX)

    # Create attention mask
    encoder_mask = (encoder_ids != PAD_IDX).long()

    # Initial decoder input (BOS token = pad_token_id for T5)
    batch_size = encoder_ids.size(0)
    initial_decoder_inputs = torch.full((batch_size, 1), PAD_IDX, dtype=torch.long)

    return encoder_ids, encoder_mask, initial_decoder_inputs

def get_dataloader(batch_size, split):
    data_folder = 'data'
    dset = T5Dataset(data_folder, split)
    shuffle = split == "train"
    collate_fn = normal_collate_fn if split != "test" else test_collate_fn

    dataloader = DataLoader(dset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate_fn)
    return dataloader

def load_t5_data(batch_size, test_batch_size):
    train_loader = get_dataloader(batch_size, "train")
    dev_loader = get_dataloader(test_batch_size, "dev")
    test_loader = get_dataloader(test_batch_size, "test")

    return train_loader, dev_loader, test_loader


def load_lines(path):
    with open(path, 'r') as f:
        lines = f.readlines()
        lines = [line.strip() for line in lines]
    return lines

def load_prompting_data(data_folder):
    # For prompting approaches (not needed for this assignment)
    train_x = load_lines(os.path.join(data_folder, 'train.nl'))
    train_y = load_lines(os.path.join(data_folder, 'train.sql'))
    dev_x = load_lines(os.path.join(data_folder, 'dev.nl'))
    dev_y = load_lines(os.path.join(data_folder, 'dev.sql'))
    test_x = load_lines(os.path.join(data_folder, 'test.nl'))

    return train_x, train_y, dev_x, dev_y, test_x

Overwriting /content/hw4/part-2-code/load_data.py


In [23]:
%%writefile /content/hw4/part-2-code/t5_utils.py
import os

import torch

import transformers
from transformers import T5ForConditionalGeneration, T5Config
from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
import wandb

DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

def setup_wandb(args):
    # Optional: Implement wandb integration for experiment tracking
    if args.use_wandb:
        wandb.init(project="hw4-t5-text2sql", name=args.experiment_name, config=vars(args))

def initialize_model(args):
    '''
    Helper function to initialize the model. You should be either finetuning
    the pretrained model associated with the 'google-t5/t5-small' checkpoint
    or training a T5 model initialized with the 'google-t5/t5-small' config
    from scratch.
    '''
    if args.finetune:
        # Fine-tuning: Load pretrained model
        print("Loading pretrained T5-small model...")
        model = T5ForConditionalGeneration.from_pretrained('google-t5/t5-small')
    else:
        # Training from scratch: Load config only
        print("Initializing T5-small model from scratch...")
        config = T5Config.from_pretrained('google-t5/t5-small')
        model = T5ForConditionalGeneration(config)

    model.to(DEVICE)
    print(f"Model loaded on {DEVICE}")

    return model

def mkdir(dirpath):
    if not os.path.exists(dirpath):
        try:
            os.makedirs(dirpath)
        except FileExistsError:
            pass

def save_model(checkpoint_dir, model, best):
    '''
    Save model checkpoint.

    Args:
        checkpoint_dir: Directory to save checkpoint
        model: Model to save
        best: If True, save as 'best_model', else 'latest_model'
    '''
    mkdir(checkpoint_dir)

    if best:
        save_path = os.path.join(checkpoint_dir, 'best_model')
    else:
        save_path = os.path.join(checkpoint_dir, 'latest_model')

    mkdir(save_path)
    model.save_pretrained(save_path)
    print(f"Model saved to {save_path}")

def load_model_from_checkpoint(args, best):
    '''
    Load model from checkpoint.

    Args:
        args: Arguments containing checkpoint_dir
        best: If True, load 'best_model', else 'latest_model'
    '''
    if best:
        load_path = os.path.join(args.checkpoint_dir, 'best_model')
    else:
        load_path = os.path.join(args.checkpoint_dir, 'latest_model')

    print(f"Loading model from {load_path}...")
    model = T5ForConditionalGeneration.from_pretrained(load_path)
    model.to(DEVICE)

    return model

def initialize_optimizer_and_scheduler(args, model, epoch_length):
    optimizer = initialize_optimizer(args, model)
    scheduler = initialize_scheduler(args, optimizer, epoch_length)
    return optimizer, scheduler

def initialize_optimizer(args, model):
    decay_parameters = get_parameter_names(model, transformers.pytorch_utils.ALL_LAYERNORM_LAYERS)
    decay_parameters = [name for name in decay_parameters if "bias" not in name]
    optimizer_grouped_parameters = [
        {
            "params": [
                p for n, p in model.named_parameters() if (n in decay_parameters and p.requires_grad)
            ],
            "weight_decay": args.weight_decay,
        },
        {
            "params": [
                p for n, p in model.named_parameters() if (n not in decay_parameters and p.requires_grad)
            ],
            "weight_decay": 0.0,
        },
    ]

    if args.optimizer_type == "AdamW":
        optimizer = torch.optim.AdamW(
            optimizer_grouped_parameters, lr=args.learning_rate, eps=1e-8, betas=(0.9, 0.999)
        )
    else:
        pass

    return optimizer

def initialize_scheduler(args, optimizer, epoch_length):
    num_training_steps = epoch_length * args.max_n_epochs
    num_warmup_steps = epoch_length * args.num_warmup_epochs

    if args.scheduler_type == "none":
        return None
    elif args.scheduler_type == "cosine":
        return transformers.get_cosine_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps)
    elif args.scheduler_type == "linear":
        return transformers.get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps)
    else:
        raise NotImplementedError

def get_parameter_names(model, forbidden_layer_types):
    result = []
    for name, child in model.named_children():
        result += [
            f"{name}.{n}"
            for n in get_parameter_names(child, forbidden_layer_types)
            if not isinstance(child, tuple(forbidden_layer_types))
        ]
    # Add model specific parameters (defined with nn.Parameter) since they are not in any child.
    result += list(model._parameters.keys())
    return result



Overwriting /content/hw4/part-2-code/t5_utils.py


In [35]:
%%writefile /content/hw4/part-2-code/train_t5.py
import os
import argparse
from tqdm import tqdm

import torch
import torch.nn as nn
import numpy as np
import wandb

from t5_utils import initialize_model, initialize_optimizer_and_scheduler, save_model, load_model_from_checkpoint, setup_wandb
from transformers import GenerationConfig
from load_data import load_t5_data
from utils import compute_metrics, save_queries_and_records

DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
PAD_IDX = 0

def get_args():
    '''
    Arguments for training. You may choose to change or extend these as you see fit.
    '''
    parser = argparse.ArgumentParser(description='T5 training loop')

    # Model hyperparameters
    parser.add_argument('--finetune', action='store_true', help="Whether to finetune T5 or not")

    # Training hyperparameters
    parser.add_argument('--optimizer_type', type=str, default="AdamW", choices=["AdamW"],
                        help="What optimizer to use")
    parser.add_argument('--learning_rate', type=float, default=1e-1)
    parser.add_argument('--weight_decay', type=float, default=0)

    parser.add_argument('--scheduler_type', type=str, default="cosine", choices=["none", "cosine", "linear"],
                        help="Whether to use a LR scheduler and what type to use if so")
    parser.add_argument('--num_warmup_epochs', type=int, default=0,
                        help="How many epochs to warm up the learning rate for if using a scheduler")
    parser.add_argument('--max_n_epochs', type=int, default=0,
                        help="How many epochs to train the model for")
    parser.add_argument('--patience_epochs', type=int, default=0,
                        help="If validation performance stops improving, how many epochs should we wait before stopping?")

    parser.add_argument('--use_wandb', action='store_true',
                        help="If set, we will use wandb to keep track of experiments")
    parser.add_argument('--experiment_name', type=str, default='experiment',
                        help="How should we name this experiment?")

    # Data hyperparameters
    parser.add_argument('--batch_size', type=int, default=16)
    parser.add_argument('--test_batch_size', type=int, default=16)

    args = parser.parse_args()
    return args

def train(args, model, train_loader, dev_loader, optimizer, scheduler):
    best_f1 = -1
    epochs_since_improvement = 0

    model_type = 'ft' if args.finetune else 'scr'
    checkpoint_dir = os.path.join('checkpoints', f'{model_type}_experiments', args.experiment_name)
    os.makedirs(checkpoint_dir, exist_ok=True)
    args.checkpoint_dir = checkpoint_dir
    experiment_name = args.experiment_name
    gt_sql_path = os.path.join(f'data/dev.sql')
    gt_record_path = os.path.join(f'records/ground_truth_dev.pkl')
    model_sql_path = os.path.join(f'results/t5_{model_type}_{experiment_name}_dev.sql')
    model_record_path = os.path.join(f'records/t5_{model_type}_{experiment_name}_dev.pkl')
    for epoch in range(args.max_n_epochs):
        tr_loss = train_epoch(args, model, train_loader, optimizer, scheduler)
        print(f"Epoch {epoch}: Average train loss was {tr_loss}")

        eval_loss, record_f1, record_em, sql_em, error_rate = eval_epoch(args, model, dev_loader,
                                                                         gt_sql_path, model_sql_path,
                                                                         gt_record_path, model_record_path)
        print(f"Epoch {epoch}: Dev loss: {eval_loss}, Record F1: {record_f1}, Record EM: {record_em}, SQL EM: {sql_em}")
        print(f"Epoch {epoch}: {error_rate*100:.2f}% of the generated outputs led to SQL errors")

        if args.use_wandb:
            result_dict = {
                'train/loss' : tr_loss,
                'dev/loss' : eval_loss,
                'dev/record_f1' : record_f1,
                'dev/record_em' : record_em,
                'dev/sql_em' : sql_em,
                'dev/error_rate' : error_rate,
            }
            wandb.log(result_dict, step=epoch)

        if record_f1 > best_f1:
            best_f1 = record_f1
            epochs_since_improvement = 0
        else:
            epochs_since_improvement += 1

        save_model(checkpoint_dir, model, best=False)
        if epochs_since_improvement == 0:
            save_model(checkpoint_dir, model, best=True)

        if epochs_since_improvement >= args.patience_epochs:
            break

def train_epoch(args, model, train_loader, optimizer, scheduler):
    model.train()
    total_loss = 0
    total_tokens = 0
    criterion = nn.CrossEntropyLoss()

    for encoder_input, encoder_mask, decoder_input, decoder_targets, _ in tqdm(train_loader):
        optimizer.zero_grad()
        encoder_input = encoder_input.to(DEVICE)
        encoder_mask = encoder_mask.to(DEVICE)
        decoder_input = decoder_input.to(DEVICE)
        decoder_targets = decoder_targets.to(DEVICE)

        logits = model(
            input_ids=encoder_input,
            attention_mask=encoder_mask,
            decoder_input_ids=decoder_input,
        )['logits']

        non_pad = decoder_targets != PAD_IDX
        loss = criterion(logits[non_pad], decoder_targets[non_pad])
        loss.backward()
        optimizer.step()
        if scheduler is not None:
            scheduler.step()

        with torch.no_grad():
            num_tokens = torch.sum(non_pad).item()
            total_loss += loss.item() * num_tokens
            total_tokens += num_tokens

    return total_loss / total_tokens

def eval_epoch(args, model, dev_loader, gt_sql_path, model_sql_path, gt_record_path, model_record_path):
    '''
    Evaluation loop for dev set during training.

    Returns:
        eval_loss: Cross-entropy loss
        record_f1: F1 score on database records
        record_em: Exact match on database records
        sql_em: Exact match on SQL queries
        error_rate: Percentage of queries with SQL errors
    '''
    model.eval()
    from transformers import T5TokenizerFast
    tokenizer = T5TokenizerFast.from_pretrained('google-t5/t5-small')

    total_loss = 0
    total_tokens = 0
    criterion = nn.CrossEntropyLoss()

    all_generated_queries = []

    with torch.no_grad():
        for encoder_input, encoder_mask, decoder_input, decoder_targets, initial_decoder_input in tqdm(dev_loader):
            encoder_input = encoder_input.to(DEVICE)
            encoder_mask = encoder_mask.to(DEVICE)
            decoder_input = decoder_input.to(DEVICE)
            decoder_targets = decoder_targets.to(DEVICE)
            initial_decoder_input = initial_decoder_input.to(DEVICE)

            # Compute loss
            logits = model(
                input_ids=encoder_input,
                attention_mask=encoder_mask,
                decoder_input_ids=decoder_input,
            )['logits']

            non_pad = decoder_targets != PAD_IDX
            loss = criterion(logits[non_pad], decoder_targets[non_pad])

            num_tokens = torch.sum(non_pad).item()
            total_loss += loss.item() * num_tokens
            total_tokens += num_tokens

            # Generate SQL queries
            generated_ids = model.generate(
                input_ids=encoder_input,
                attention_mask=encoder_mask,
                max_length=512,
                num_beams=5,
                early_stopping=True
            )

            # Decode generated IDs to SQL strings
            generated_queries = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
            all_generated_queries.extend(generated_queries)

    # Save queries and compute metrics
    save_queries_and_records(all_generated_queries, model_sql_path, model_record_path)

    sql_em, record_em, record_f1, error_msgs = compute_metrics(
        gt_sql_path, model_sql_path, gt_record_path, model_record_path
    )

    # Calculate error rate
    num_errors = sum([1 for msg in error_msgs if msg != ""])
    error_rate = num_errors / len(error_msgs) if len(error_msgs) > 0 else 0

    eval_loss = total_loss / total_tokens if total_tokens > 0 else 0

    return eval_loss, record_f1, record_em, sql_em, error_rate

def test_inference(args, model, test_loader, model_sql_path, model_record_path):
    '''
    Generate predictions for test set.
    '''
    model.eval()
    from transformers import T5TokenizerFast
    tokenizer = T5TokenizerFast.from_pretrained('google-t5/t5-small')

    all_generated_queries = []

    with torch.no_grad():
        for encoder_input, encoder_mask, initial_decoder_input in tqdm(test_loader):
            encoder_input = encoder_input.to(DEVICE)
            encoder_mask = encoder_mask.to(DEVICE)

            # Generate SQL queries
            generated_ids = model.generate(
                input_ids=encoder_input,
                attention_mask=encoder_mask,
                max_length=512,
                num_beams=5,
                early_stopping=True
            )

            # Decode to SQL strings
            generated_queries = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
            all_generated_queries.extend(generated_queries)

    # Save queries and records
    save_queries_and_records(all_generated_queries, model_sql_path, model_record_path)
    print(f"Test predictions saved to {model_sql_path} and {model_record_path}")

def main():
    # Get key arguments
    args = get_args()
    if args.use_wandb:
        # Recommended: Using wandb (or tensorboard) for result logging can make experimentation easier
        setup_wandb(args)

    # Load the data and the model
    train_loader, dev_loader, test_loader = load_t5_data(args.batch_size, args.test_batch_size)
    model = initialize_model(args)
    optimizer, scheduler = initialize_optimizer_and_scheduler(args, model, len(train_loader))

    # Train
    train(args, model, train_loader, dev_loader, optimizer, scheduler)

    # Evaluate
    model = load_model_from_checkpoint(args, best=True)
    model.eval()

    # Dev set
    experiment_name = args.experiment_name
    model_type = 'ft' if args.finetune else 'scr'
    gt_sql_path = os.path.join(f'data/dev.sql')
    gt_record_path = os.path.join(f'records/ground_truth_dev.pkl')
    model_sql_path = os.path.join(f'results/t5_{model_type}_{experiment_name}_dev.sql')
    model_record_path = os.path.join(f'records/t5_{model_type}_{experiment_name}_dev.pkl')
    dev_loss, dev_record_em, dev_record_f1, dev_sql_em, dev_error_rate = eval_epoch(args, model, dev_loader,
                                                                                    gt_sql_path, model_sql_path,
                                                                                    gt_record_path, model_record_path)
    print("Dev set results: Loss: {dev_loss}, Record F1: {dev_record_f1}, Record EM: {dev_record_em}, SQL EM: {dev_sql_em}")
    print(f"Dev set results: {dev_error_rate*100:.2f}% of the generated outputs led to SQL errors")

    # Test set
    model_sql_path = os.path.join(f'results/t5_{model_type}_{experiment_name}_test.sql')
    model_record_path = os.path.join(f'records/t5_{model_type}_{experiment_name}_test.pkl')
    test_inference(args, model, test_loader, model_sql_path, model_record_path)

if __name__ == "__main__":
    main()


Overwriting /content/hw4/part-2-code/train_t5.py


In [28]:
%cd /content/hw4/part-2-code

!python train_t5.py \
  --finetune \
  --learning_rate 1e-4 \
  --weight_decay 0.01 \
  --scheduler_type linear \
  --num_warmup_epochs 1 \
  --max_n_epochs 10 \
  --patience_epochs 3 \
  --batch_size 8 \
  --test_batch_size 16 \
  --experiment_name my_experiment

/content/hw4/part-2-code
2025-11-15 22:17:47.555759: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-15 22:17:47.573681: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763245067.595419   54758 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763245067.601938   54758 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763245067.618626   54758 computation_placer.cc:177] computation placer already registered. Please check l

In [29]:
from google.colab import files

!cp results/t5_ft_my_experiment_test.sql results/t5_ft_experiment_test.sql
!cp records/t5_ft_my_experiment_test.pkl records/t5_ft_experiment_test.pkl

files.download('results/t5_ft_experiment_test.sql')
files.download('records/t5_ft_experiment_test.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [33]:
%cd /content/hw4/part-2-code

!python train_t5.py \
  --finetune \
  --learning_rate 3e-5 \
  --weight_decay 0.01 \
  --scheduler_type cosine \
  --num_warmup_epochs 3 \
  --max_n_epochs 30 \
  --patience_epochs 7 \
  --batch_size 32 \
  --test_batch_size 16 \
  --experiment_name strategy2_modified

/content/hw4/part-2-code
2025-11-16 00:28:56.598710: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-16 00:28:56.616486: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763252936.638005   87681 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763252936.644534   87681 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763252936.661198   87681 computation_placer.cc:177] computation placer already registered. Please check l

In [36]:
%cd /content/hw4/part-2-code
!python train_t5.py \
  --finetune \
  --learning_rate 1e-4 \
  --weight_decay 0.01 \
  --scheduler_type cosine \
  --num_warmup_epochs 2 \
  --max_n_epochs 25 \
  --patience_epochs 8 \
  --batch_size 16 \
  --test_batch_size 16 \
  --experiment_name a100_no_clipping

/content/hw4/part-2-code
2025-11-16 00:44:22.762294: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-16 00:44:22.780294: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763253862.801840   91656 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763253862.808320   91656 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763253862.824960   91656 computation_placer.cc:177] computation placer already registered. Please check l

In [37]:
 !python train_t5.py \
   --finetune \
   --learning_rate 1e-4 \
   --weight_decay 0.05 \
   --scheduler_type cosine \
   --num_warmup_epochs 3 \
   --max_n_epochs 20 \
   --patience_epochs 12 \
   --batch_size 16 \
   --test_batch_size 16 \
   --experiment_name final_65plus


2025-11-16 04:33:54.305957: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-16 04:33:54.324510: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763267634.346094  150037 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763267634.352600  150037 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763267634.369492  150037 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [38]:
# Copy the best model checkpoint to Google Drive
!cp -r /content/hw4/part-2-code/checkpoints/ft_experiments/a100_no_clipping/best_model \
  /content/drive/MyDrive/hw4/saved_models/epoch11_58.2f1/

# Also save the latest model (epoch 19) just in case
!cp -r /content/hw4/part-2-code/checkpoints/ft_experiments/a100_no_clipping/latest_model \
  /content/drive/MyDrive/hw4/saved_models/epoch19_52.7f1/

cp: cannot create directory '/content/drive/MyDrive/hw4/saved_models/epoch11_58.2f1/': No such file or directory
cp: cannot create directory '/content/drive/MyDrive/hw4/saved_models/epoch19_52.7f1/': No such file or directory


In [39]:
  # Create the directory structure
  !mkdir -p /content/drive/MyDrive/hw4/saved_models/epoch11_58.2f1
  !mkdir -p /content/drive/MyDrive/hw4/saved_models/epoch19_52.7f1

In [40]:
  # Now copy the models
  !cp -r /content/hw4/part-2-code/checkpoints/ft_experiments/a100_no_clipping/best_model/* \
    /content/drive/MyDrive/hw4/saved_models/epoch11_58.2f1/

  !cp -r /content/hw4/part-2-code/checkpoints/ft_experiments/a100_no_clipping/latest_model/* \
    /content/drive/MyDrive/hw4/saved_models/epoch19_52.7f1/

In [41]:
  # Verify it worked
  !ls -lh /content/drive/MyDrive/hw4/saved_models/epoch11_58.2f1/

total 231M
-rw------- 1 root root 1.5K Nov 16 05:16 config.json
-rw------- 1 root root  142 Nov 16 05:16 generation_config.json
-rw------- 1 root root 231M Nov 16 05:16 model.safetensors


In [8]:
%cd /content/hw4/part-2-code
!python train_t5.py \
  --finetune \
  --learning_rate 1e-4 \
  --weight_decay 0.05 \
  --scheduler_type cosine \
  --num_warmup_epochs 3 \
  --max_n_epochs 25 \
  --patience_epochs 8 \
  --batch_size 16 \
  --test_batch_size 16 \
  --experiment_name final_optimized_65plus

/content/hw4/part-2-code
2025-11-17 22:17:26.680488: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-17 22:17:26.698751: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763417846.720854    3661 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763417846.727508    3661 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763417846.744320    3661 computation_placer.cc:177] computation placer already registered. Please check l

In [9]:
!python train_t5.py \
  --finetune \
  --learning_rate 1e-4 \
  --weight_decay 0.01 \
  --scheduler_type cosine \
  --max_n_epochs 25 \
  --patience_epochs 10 \
  --batch_size 16 \
  --test_batch_size 16 \
  --experiment_name t5_final_verified

2025-11-18 02:05:56.844544: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-18 02:05:56.862839: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763431556.885100   60276 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763431556.891871   60276 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763431556.908772   60276 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [1]:
!python train_t5.py \
  --finetune \
  --learning_rate 1e-3 \
  --weight_decay 0.01 \
  --scheduler_type cosine \
  --num_warmup_epochs 2 \
  --max_n_epochs 30 \
  --patience_epochs 10 \
  --batch_size 16 \
  --test_batch_size 16 \
  --experiment_name t5_final_complete

python3: can't open file '/content/train_t5.py': [Errno 2] No such file or directory
