In [1]:
!pip install datasets bitsandbytes transformers tqdm accelerate

Collecting bitsandbytes
  Downloading bitsandbytes-0.42.0-py3-none-any.whl (105.0 MB)
[K     |████████████████████████████████| 105.0 MB 12.5 MB/s eta 0:00:01
Collecting scipy
  Downloading scipy-1.13.1-cp39-cp39-macosx_12_0_arm64.whl (30.3 MB)
[K     |████████████████████████████████| 30.3 MB 643 kB/s eta 0:00:012
Installing collected packages: scipy, bitsandbytes
Successfully installed bitsandbytes-0.42.0 scipy-1.13.1
You should consider upgrading via the '/Users/abhinavpandey/Developer/ProjectAI/venv/bin/python3 -m pip install --upgrade pip' command.[0m


In [2]:
import torch
from torch.utils.data import DataLoader, Dataset
from torch import optim
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from datasets import load_dataset
from tqdm import tqdm
import bitsandbytes
from huggingface_hub import login
from accelerate import init_empty_weights, infer_auto_device_map
from accelerate import load_checkpoint_and_dispatch

  from .autonotebook import tqdm as notebook_tqdm


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


  warn("The installed version of bitsandbytes was compiled without GPU support. "


In [3]:
# Set device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

In [4]:
print(device)

mps


In [5]:
# Load and subset dataset
dataset = load_dataset("andythetechnerd03/AI-human-text")
dataset['train'] = dataset['train'].select(range(500))
dataset['test'] = dataset['test'].select(range(10))

Generating train split: 100%|██████████| 462873/462873 [00:00<00:00, 530711.17 examples/s]
Generating test split: 100%|██████████| 24362/24362 [00:00<00:00, 534310.99 examples/s]


In [6]:
# Set device
login(token="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", add_to_git_credential=True)
access_token = "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"

Token is valid (permission: fineGrained).
Your token has been saved in your configured git credential helpers (osxkeychain).
Your token has been saved to /Users/abhinavpandey/.cache/huggingface/token
Login successful


In [7]:
# # Load the tokenizer and model
# model_name = "meta-llama/Llama-2-7b-hf"
# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type='nf4',
#     bnb_4bit_compute_dtype='float16',
#     bnb_4bit_use_double_quant=False
# )

# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     token=access_token,
#     quantization_config = bnb_config,
#     device_map="auto")

# Load the tokenizer and model
# model_name = "meta-llama/Llama-2-7b-hf"
# model_name = "openai-community/gpt2"
model_name = "meta-llama/Llama-3.2-1B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    token=access_token,
    device_map="auto"
)

Some parameters are on the meta device because they were offloaded to the disk.


In [8]:
# Set special tokens for padding
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id

In [9]:
# Dataset and DataLoader for training
class HuggingFaceDataset(Dataset):
    def __init__(self, hf_dataset, split='train'):
        self.dataset = hf_dataset[split]

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        return self.dataset[idx]['text'], self.dataset[idx]['generated']

In [10]:
def collate_fn(batch):
    texts, responses = zip(*batch)
    return list(texts), list(responses)

In [11]:
train_dataset = HuggingFaceDataset(dataset, split='train')
train_dataloader = DataLoader(train_dataset, batch_size=8, collate_fn=collate_fn, shuffle=True)


In [12]:
# Define the optimizer
optimizer = optim.AdamW(model.parameters(), lr=5e-5)

In [13]:
# Reward calculation and PPO update functions
def compute_reward(feedback):
    return 10 if feedback == 0 else -10

def tokenize_and_get_rewards(batch):
    texts, feedbacks = batch
    tokenized_inputs = tokenizer(list(texts), return_tensors="pt", padding=True, truncation=True, max_length=512)
    rewards = torch.tensor([compute_reward(fb) for fb in feedbacks])
    return tokenized_inputs, rewards


In [14]:
@torch.no_grad()
def whiten(values, shift_mean=True):
    values = values.float()
    mean, var = torch.mean(values), torch.var(values, unbiased=False)
    whitened = (values - mean) / torch.sqrt(var + 1e-8)
    if not shift_mean:
        whitened += mean
    return whitened

def ppo_update(states, actions, rewards, advantages, old_log_probs, epsilon_clip=0.2, beta=0.1):
    outputs = model(**states, labels=actions)
    new_log_probs = outputs.logits.log_softmax(-1)

    # KL penalty and reward adjustment
    kl_penalty = beta * (new_log_probs - old_log_probs).sum(-1).mean(-1)
    rewards = rewards + kl_penalty

    ratios = (new_log_probs - old_log_probs).exp().sum(-1).mean(-1)

    surr1 = ratios * advantages
    surr2 = torch.clamp(ratios, 1 - epsilon_clip, 1 + epsilon_clip) * advantages
    loss = -torch.min(surr1, surr2).mean()

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    return loss.item()

In [15]:
for batch in tqdm(train_dataloader):
  texts, feedbacks = batch
  print(len(texts))
  tokenized_inputs, rewards = tokenize_and_get_rewards(batch)
  print(tokenized_inputs['input_ids'].shape)
  break

  0%|          | 0/63 [00:00<?, ?it/s]

8
torch.Size([8, 512])





In [16]:
# Train PPO with reward normalization and adaptive KL penalty
def train_ppo(epochs=2, epsilon_clip=0.2, beta=0.1):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for batch in tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{epochs}"):
            tokenized_inputs, rewards = tokenize_and_get_rewards(batch)
            tokenized_inputs = {k: v.to(device) for k, v in tokenized_inputs.items()}
            rewards = rewards.to(device)

            with torch.no_grad():
                outputs = model(**tokenized_inputs)
                log_probs = outputs.logits.log_softmax(-1)
                values = log_probs.mean(-1).mean(-1)  # Average over sequence length and vocabulary

            # Reward normalization
            rewards = whiten(rewards, shift_mean=False)
            advantages = rewards - values

            loss = ppo_update(tokenized_inputs, tokenized_inputs["input_ids"], rewards, advantages, log_probs, epsilon_clip, beta)
            total_loss += loss

        print(f"Epoch {epoch+1}/{epochs}, Average Loss: {total_loss / len(train_dataloader):.4f}")

    # Save the model and tokenizer
    save_directory = "ppo_model_tokenizer"
    model.save_pretrained(save_directory)
    tokenizer.save_pretrained(save_directory)
    print("Model and tokenizer saved successfully!")

# Example usage for training
train_ppo(epochs=1)

Epoch 1/1:   0%|          | 0/63 [00:00<?, ?it/s]Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)
Epoch 1/1:   0%|          | 0/63 [00:22<?, ?it/s]


RuntimeError: MPS backend out of memory (MPS allocated: 8.71 GB, other allocations: 194.66 MB, max allowed: 9.07 GB). Tried to allocate 256.00 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

In [None]:
@torch.no_grad()
def generate_response(input_text):
    model.eval()
    input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)

    # Create attention_mask (1 for non-pad tokens, 0 for pad tokens)
    attention_mask = torch.ones(input_ids.shape, dtype=torch.long).to(device)

    # Debugging: Check for any NaN values in input
    if torch.isnan(input_ids).any():
        print("NaN detected in input_ids")

    # Generate the output, with logits clamping to avoid numerical instability
    try:
        output_ids = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_length=400,  # Reduce this if necessary for testing
            num_return_sequences=1,
            do_sample=True,  # Set to False to test without sampling
            pad_token_id=tokenizer.pad_token_id,
            temperature=1.0,  # Set temperature to 1 for more stable generation
            top_k=50,         # Add top_k to control randomness
            top_p=0.95        # Add top_p for nucleus sampling
        )
        return tokenizer.decode(output_ids[0], skip_special_tokens=True)

    except RuntimeError as e:
        print(f"RuntimeError during generation: {e}")
        # Optionally, re-raise the error if needed for further debugging
        raise e

# Example usage
response = generate_response("Explain the importance of forests.")
print(f"Generated response: {response}")


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load the tokenizer and model from the fine-tuned directory
model_path = "ppo_model_tokenizer"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)

# Set the model to evaluation mode and move it to GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()

def generate_response(prompt, max_length=100, temperature=1.0, top_k=50, top_p=0.9):
    # Tokenize the input
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    # Generate text using the model
    with torch.no_grad():
        output = model.generate(
            inputs.input_ids,
            max_length=max_length,
            temperature=temperature,  # Adjusted
            top_k=top_k,
            top_p=top_p,
            num_return_sequences=1,
            pad_token_id=tokenizer.eos_token_id
        )

    # Decode the output and return the response
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response

# Example usage
prompt = "Explain the significance of artificial intelligence in modern education."
response = generate_response(prompt)
print(response)
