In [2]:
# Standard library
import os
import gc
import random
import math

# Third-party
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from datasets import load_dataset
from tqdm import tqdm
from huggingface_hub import login
import matplotlib.pyplot as plt
from peft import (
    LoraConfig,
    get_peft_model,
    prepare_model_for_kbit_training,
    PeftModel,
)


# Local package imports
from rc_experiment.data_loading import raw_2_llm_data, torch_data_loader
from rc_experiment.model_loading import quanti_lora_md
from rc_experiment.training import casual_llm_train, plot_losses
from rc_experiment.eval import rc_eval

login(token="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")
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if hasattr(torch, 'mps') and torch.backends.mps.is_available() else "cpu")



########################################################
# Define Experiment Configuration
########################################################
# Data files paths
TRAINING_FILES = {
    "train": "pipeline_test_data/all_prompts_train.jsonl",
    "validation": "pipeline_test_data/validation_prompts.jsonl",
}

TEST_FILES = {
    "p2d": "pipeline_test_data/p2d_prompts_test.jsonl",
    "d2p": "pipeline_test_data/d2p_prompts_test.jsonl"
}

# Choose a small causal model from Hugging Face (for example, LLaMA-2 7B or OPT 125M)
MODELS = ["Gensyn/Qwen2.5-0.5B-Instruct", "meta-llama/Llama-3.2-1B"]
BEST_MODEL_DIR = []  # Wait to receieve

# Define max sequence lengths for prompt and completion
MAX_INPUT_LENGTH = 128    # maximum tokens for the prompt
MAX_TARGET_LENGTH = 30    # maximum tokens for the completion/response
TOTAL_MAX_LENGTH = MAX_INPUT_LENGTH + MAX_TARGET_LENGTH

# LoRA Configuration
LORA_CONFIG_KWARGS = {
    "r": 2,               # LoRA rank
    "lora_alpha": 16,       # LoRA scaling factor
    "lora_dropout": 0.05,   # LoRA dropout
    "bias": "none",         # Bias handling
    "task_type": "CAUSAL_LM" # Task type
}

# Training config
BATCH_SIZE = 3
TRAIN_PORTION_RATE = 0.01
NUM_EPOCHS = 1  # you can adjust the number of fine-tuning epochs
PATIENCE = 3    # early stopping PATIENCE
MIN_DELTA = 0.0 # minimum change in val loss to qualify as an improvement


# Loop all the model names to conduct experiments
for k, model_name in enumerate(MODELS):
    print("")
    print(f"*** Experiment start for {model_name} ***")
    print("")
    ########################################################
    # Data Loading & Pre-processing & Tokenization
    ########################################################
    tokenized_datasets, tokenizer, device = raw_2_llm_data(TRAINING_FILES, model_name, MAX_INPUT_LENGTH, MAX_TARGET_LENGTH)

    ########################################################
    # Set Up Pytroch Data Loader
    ########################################################
    # Obtian the DataLoader dictionary
    loader_dict = torch_data_loader(tokenized_datasets, batch_size=BATCH_SIZE, train_portion_rate=TRAIN_PORTION_RATE)
    # Unpack the loader
    train_loader = loader_dict["train_loader"]
    val_loader = loader_dict["validation_loader"]

    ########################################################
    # Load LoRA Model
    ########################################################
    # load the quantized lora model
    model = quanti_lora_md(LORA_CONFIG_KWARGS, model_name)
    # move the model to device
    model = model.to(device)

    ########################################################
    # Training (Finetuning) + Save the best model
    ########################################################
    # Define optimizer (AdamW) to update only trainable params (LoRA adapters)
    learning_rate = 5e-5
    optimizer = torch.optim.AdamW([p for p in model.parameters() if p.requires_grad], lr=learning_rate)

    # Save the best model's config
    saving_dir, train_losses, val_losses, val_accuracies = casual_llm_train(model_name, model, tokenizer, optimizer, train_loader, val_loader, device,
                                                                            MAX_TARGET_LENGTH, NUM_EPOCHS, PATIENCE, MIN_DELTA)
    BEST_MODEL_DIR.append(saving_dir)

    plot_losses(train_losses, val_losses, title=f"{model_name}Training and Validation Loss")
    
    """
    ########################################################
    # Load in tuned model (Optional)
    ########################################################
    
    # Load in model config
    base_model = AutoModelForCausalLM.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side='left')
    if tokenizer.pad_token_id is None:
        tokenizer.pad_token_id = tokenizer.eos_token_id
    lora_weights_path = BEST_MODEL_DIR[i]
    model = PeftModel.from_pretrained(base_model, lora_weights_path)
    model.to(device)
    """
    
    ########################################################
    # Evaluation on all test sets
    ########################################################
    for test_name, path in TEST_FILES.items():
        test_path = {test_name: path}

        test_datasets, tokenizer, device = raw_2_llm_data(test_path, model_name, MAX_INPUT_LENGTH, MAX_TARGET_LENGTH)
        # Obtian the DataLoader dictionary
        test_loader_dict = torch_data_loader(test_datasets, BATCH_SIZE=2)
        
        print(test_loader_dict)
        # Get the data loader
        test_loader = test_loader_dict[f"{test_name}_loader"]

        pred_rslt_df = rc_eval(test_loader, model, tokenizer, device, MAX_INPUT_LENGTH, MAX_TARGET_LENGTH)

        # save the data frame
        folder_path = f"experiment_rslt/{model_name}"
        os.makedirs(folder_path, exist_ok=True)
        save_path = os.path.join(folder_path, f"{test_name}_results.csv")
        pred_rslt_df.to_csv(save_path, index=False)

        print(f"DataFrame successfully saved to {save_path}")

    ########################################################
    # Clear the current model
    ########################################################

    def clear():
        global model, tokenizer
        del model
        del tokenizer
        gc.collect()
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()

    # then call
    clear()

*** Experiment start for Gensyn/Qwen2.5-0.5B-Instruct ***
DatasetDict({
    train: Dataset({
        features: ['prompt', 'completion'],
        num_rows: 3600
    })
    validation: Dataset({
        features: ['prompt', 'completion'],
        num_rows: 300
    })
})


Map:   0%|          | 0/300 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 3600
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 300
    })
})


config.json:   0%|          | 0.00/659 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'
trainable params: 1,405,440 || all params: 495,438,208 || trainable%: 0.2837

Epoch 1/1


Training Epoch 1: 100%|██████████| 12/12 [00:06<00:00,  1.86batch/s]
Evaluating:   0%|          | 0/100 [00:00<?, ?batch/s]You're using a Qwen2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  test_elements = torch.tensor(test_elements)
Evaluating:  64%|██████▍   | 64/100 [01:56<01:05,  1.82s/batch]


KeyboardInterrupt: 

In [None]:
BEST_MODEL_DIR