## Libraries

In [20]:
!pip install peft
!pip install bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.42.0-py3-none-any.whl.metadata (9.9 kB)
Collecting scipy (from bitsandbytes)
  Using cached scipy-1.14.1-cp312-cp312-macosx_14_0_arm64.whl.metadata (60 kB)
Downloading bitsandbytes-0.42.0-py3-none-any.whl (105.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 MB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0mm
[?25hUsing cached scipy-1.14.1-cp312-cp312-macosx_14_0_arm64.whl (23.1 MB)
Installing collected packages: scipy, bitsandbytes
Successfully installed bitsandbytes-0.42.0 scipy-1.14.1


In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, DataCollatorForSeq2Seq
from peft import LoraConfig, get_peft_model

  from .autonotebook import tqdm as notebook_tqdm


## Device

In [2]:
device = torch.device("mps")  # Set MPS as the device

## LoRa Fine-Tuning

In [None]:
lora_config = LoraConfig(
    r=16,  
    lora_alpha=32,  
    target_modules=["q_proj", "v_proj"],  
    lora_dropout=0.05,  
    bias="none",  
)

## Model and Tokenizer

In [None]:
# Load the model and tokenizer
model_name = "arcee-ai/Llama-3.1-SuperNova-Lite"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name)

Loading checkpoint shards: 100%|██████████| 4/4 [00:58<00:00, 14.61s/it]


## Applying and Verifying LoRa

In [5]:
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /Users/mehran/CodeSpaces/Testing/Table_Answering/env/lib/python3.12/site-packages/bitsandbytes/libbitsandbytes_cpu.so
'NoneType' object has no attribute 'cadam32bit_grad_fp32'
CUDA SETUP: Loading binary /Users/mehran/CodeSpaces/Testing/Table_Answering/env/lib/python3.12/site-packages/bitsandbytes/libbitsandbytes_cpu.so...
dlopen(/Users/mehran/CodeSpaces/Testing/Table_Answering/env/lib/python3.12/site-packages/bitsandbytes/libbitsandbytes_cpu.so, 0x0006): tried: '/Users/mehran/CodeSpaces/Testing/Table_Answering/env/lib/python3.12/site-packages/bitsandbytes/libbitsandbytes_cpu.so' (not a mach-o file), '/System/Volumes/Preboot/Cryptexes/OS/Users/mehran/CodeSpaces/Testing/Table_Answering/env/lib/python3.12/site-packages/bitsandbytes/libbitsandbytes_cpu.so' (no such file), '/Users/mehran/CodeSpac

  warn("The installed version of bitsandbytes was compiled without GPU support. "


trainable params: 6,815,744 || all params: 8,037,076,992 || trainable%: 0.0848


# Dataset

In [6]:
from datasets import load_dataset

train_dataset = load_dataset('csv', data_files="../data/queries_final.csv")
# val_dataset = load_dataset("path_to_val_file.csv")

In [7]:
train_dataset = train_dataset['train']


In [8]:

train_dataset['answer']

["df.loc[df['finalWorth'].idxmax(), 'selfMade']",
 "df.loc[df['age'].idxmin(), 'gender'] == 'male'",
 "df['city'].value_counts().idxmax() == 'United States'",
 "df.nlargest(5, 'rank')['selfMade'].eq(False).any()",
 "df.loc[df['age'] == df['age'].max(), 'philanthropyScore'].iloc[0] == 5",
 "df['age'].min()",
 "df[df['category'] == 'Technology'].shape[0]",
 "df[df['category'] == 'Automotive']['finalWorth'].sum()",
 "df[df['philanthropyScore'] > 3].shape[0]",
 "df.loc[(df['selfMade'] == False), 'rank'].idxmax()",
 "df.loc[df['finalWorth'] == df['finalWorth'].max(), 'category'].iloc[0]",
 "df.loc[df['age'] == df['age'].max(), 'country'].iloc[0]",
 "df.loc[df['philanthropyScore'] == df['philanthropyScore'].max(), 'gender'].iloc[0]",
 "df.loc[df['age'] == df['age'].min(), 'source'].iloc[0]",
 "df.loc[df['rank'].idxmin(), 'title']",
 "df['country'].value_counts().nlargest(3).index.tolist()",
 "df['source'].value_counts().nlargest(5).index.tolist()",
 "df.nsmallest(4, 'age')['city'].tolist()",

## Preprocessing

In [9]:
def preprocess_text(examples):
    inputs = [f"Question: {q} Answer: {a}" for q, a in zip(examples['question'], examples['answer'])]

    return tokenizer(inputs, padding='max_length', truncation=True, max_length=512)


In [10]:
# Set the eos_token as pad_token
tokenizer.pad_token = tokenizer.eos_token

In [11]:
train_tokenized = train_dataset.map(preprocess_text, batched=True)
# val_tokenized = val_dataset.map(prepocess_text, batched=True)

## Data Collator

In [12]:
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model, padding=True)

# Training Arguments

In [83]:
model = model.to_empty(device=device)

RuntimeError: MPS backend out of memory (MPS allocated: 20.23 GB, other allocations: 464.00 KB, max allowed: 20.40 GB). Tried to allocate 1.96 GB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

In [24]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./fine_tuned_model",
    per_device_train_batch_size=4,
    num_train_epochs=3,
    learning_rate=1e-4,
    logging_dir="./logs",
    logging_steps=10,
    save_steps=500,
    evaluation_strategy='no',
    eval_steps=500,
    gradient_accumulation_steps=4,
    fp16=False,
)




## Trainer

In [25]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    # eval_dataset=val_dataset,   
    tokenizer=tokenizer,
    data_collator=data_collator,
)

  trainer = Trainer(


RuntimeError: MPS backend out of memory (MPS allocated: 20.23 GB, other allocations: 464.00 KB, max allowed: 20.40 GB). Tried to allocate 224.00 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

## Training The Model

In [None]:
trainer.train()

# Manual Training Loop

In [None]:
from transformers import AdamW

optimizer = AdamW(model.parameters(), lr=1e-4).to(device)
model.train()

for epoch in range(3):  # Number of epochs
    for batch in train_dataloader:
        inputs = tokenizer(batch["text"], return_tensors="pt", padding=True, truncation=True).to(device)
        labels = inputs["input_ids"].to(device)
        outputs = model(**inputs, labels=labels).to(device)
        loss = outputs.loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(f"Epoch: {epoch}, Loss: {loss.item()}")


# Saving model and Tokenizer    

In [None]:
model.save_pretrained("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")


# Inference

TODO: Add Testing and Evaluation

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

device = torch.device("mps")  # Use MPS

# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("./fine_tuned_model")
model = AutoModelForCausalLM.from_pretrained("./fine_tuned_model").to(device)

# Generate text
input_text = "Your prompt here"
inputs = tokenizer(input_text, return_tensors="pt").to(device)
outputs = model.generate(**inputs)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)
