In [None]:
!pip install transformers datasets
!pip install accelerate -U




In [None]:
from transformers import DistilGPT2ForSequenceClassification, GPT2Tokenizer, Trainer, TrainingArguments
from datasets import load_dataset, Dataset, DatasetDict
from sklearn.model_selection import train_test_split
import pandas as pd
import torch


In [None]:
!pip install trl


Collecting trl
  Downloading trl-0.7.4-py3-none-any.whl (133 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.9/133.9 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Collecting tyro>=0.5.11 (from trl)
  Downloading tyro-0.5.14-py3-none-any.whl (99 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.0/100.0 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
Collecting docstring-parser>=0.14.1 (from tyro>=0.5.11->trl)
  Downloading docstring_parser-0.15-py3-none-any.whl (36 kB)
Collecting shtab>=1.5.6 (from tyro>=0.5.11->trl)
  Downloading shtab-1.6.4-py3-none-any.whl (13 kB)
Installing collected packages: shtab, docstring-parser, tyro, trl
Successfully installed docstring-parser-0.15 shtab-1.6.4 trl-0.7.4 tyro-0.5.14


In [None]:
!pip install transformers datasets torch wandb

import torch
import wandb
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

from trl.gpt2 import PPOTrainer

# Configuration
config = {
    "model_name": "lvwerra/gpt2-imdb",
    "cls_model_name": "lvwerra/distilbert-imdb",
    "steps": 20000,
    "batch_size": 256,
    "forward_batch_size": 16,
    "ppo_epochs": 4,
    "txt_in_min_len": 2,
    "txt_in_max_len": 8,
    "txt_out_min_len": 4,
    "txt_out_max_len": 16,
    "lr": 1.41e-5,
    "init_kl_coef":0.2,
    "target": 6,
    "horizon":10000,
    "gamma":1,
    "lam":0.95,
    "cliprange": .2,
    "cliprange_value":.2,
    "vf_coef":.1,
}

# Initialize W&B logger
wandb.init(name='run-42', project='gpt2-test', config=config)

# Load IMDB dataset
ds = load_dataset('imdb', split='train')
ds = ds.rename_columns({'text': 'review', 'label': 'sentiment'})
ds = ds.filter(lambda x: len(x["review"]) > 200, batched=False)

# Load BERT classifier
sent_kwargs = {
    "return_all_scores": True,
    "function_to_apply": "none",
    "batch_size": config["forward_batch_size"]
}
sentiment_pipe = pipeline("sentiment-analysis", "lvwerra/distilbert-imdb")

# Load GPT2 model
gpt2_model = AutoModelForCausalLMWithValueHead.from_pretrained(config['model_name'])
gpt2_model_ref = AutoModelForCausalLMWithValueHead.from_pretrained(config['model_name'])
gpt2_tokenizer = AutoTokenizer.from_pretrained(config['model_name'])
gpt2_tokenizer.pad_token = gpt2_tokenizer.eos_token

# Move models to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
gpt2_model.to(device)
gpt2_model_ref.to(device)

# Tokenize IMDB reviews
class LengthSampler:
    def __init__(self, min_value, max_value):
        self.values = list(range(min_value, max_value))
    def __call__(self):
        return np.random.choice(self.values)

input_size = LengthSampler(config["txt_in_min_len"], config["txt_in_max_len"])
output_size = LengthSampler(config["txt_out_min_len"], config["txt_out_max_len"])

def tokenize(sample):
    sample["tokens"] = gpt2_tokenizer.encode(sample["review"])[:input_size()]
    sample["query"] = gpt2_tokenizer.decode(sample["tokens"])
    return sample

ds = ds.map(tokenize, batched=False)

# Generation settings
gen_kwargs = {
    "min_length":-1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": gpt2_tokenizer.eos_token_id
}

# Optimize model
def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])

dataloader = torch.utils.data.DataLoader(ds, batch_size=config['batch_size'], collate_fn=collator)
ppo_trainer = PPOTrainer(gpt2_model, gpt2_model_ref, gpt2_tokenizer, **config)

total_ppo_epochs = int(np.ceil(config["steps"]/config['batch_size']))

for epoch, batch in tqdm(zip(range(total_ppo_epochs), iter(dataloader))):
    logs, timing = dict(), dict()
    t0 = time.time()
    query_tensors = [torch.tensor(t).long().to(device) for t in batch["tokens"]]

    # Get response from GPT-2
    t = time.time()
    response_tensors = []
    for i in range(config['batch_size']):
        gen_len = output_size()
        response = gpt2_model.generate(query_tensors[i].unsqueeze(dim=0),
                                       max_new_tokens=gen_len, **gen_kwargs)
        response_tensors.append(response.squeeze()[-gen_len:])
    batch['response'] = [gpt2_tokenizer.decode(r.squeeze()) for r in response_tensors]
    timing['time/get_response'] = time.time()-t

    # Compute sentiment score
    t = time.time()
    texts = [q + r for q, r in zip(batch['query'], batch['response'])]
    pipe_outputs = sentiment_pipe(texts, **sent_kwargs)
    rewards = torch.tensor([output[1]["score"] for output in pipe_outputs]).to(device)
    timing['time/get_sentiment_preds'] = time.time()-t

    # Run PPO step
    t = time.time()
    stats = ppo_trainer.step(query_tensors, response_tensors, rewards)
    timing['time/optimization'] = time.time()-t

    # Log everything
    timing['time/epoch'] = time.time()-t0
    table_rows = [list(r) for r in zip(batch['query'], batch['response'], rewards.cpu().tolist())]
    logs.update({'game_log': wandb.Table(columns=['query', 'response', 'reward'], rows=table_rows)})
    logs.update(timing)
    logs.update(stats)
    logs['env/reward_mean'] = torch.mean(rewards).cpu().numpy()
    logs['env/reward_std'] = torch.std(rewards).cpu().numpy()
    logs['env/reward_dist'] = rewards.cpu().numpy()
    wandb.log(logs)







ModuleNotFoundError: ignored

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import pipeline
from tqdm import tqdm

# Load your dataset
df = pd.read_csv('/path/to/IMDB Dataset.csv')
df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})

# Split the dataset into training and test sets
train_df, test_df = train_test_split(df, test_size=0.25)

# Load your fine-tuned GPT-2 model and the BERT sentiment classifier
gpt2_model = ...  # Load your fine-tuned GPT-2 model
sentiment_classifier = pipeline("sentiment-analysis", model="lvwerra/distilbert-imdb")

# Evaluate the model on the test set
correct_predictions = 0
for _, row in tqdm(test_df.iterrows(), total=test_df.shape[0]):
    # Generate a response using GPT-2
    response = gpt2_model.generate(row['review'])

    # Predict sentiment using BERT classifier
    sentiment_prediction = sentiment_classifier(response)
    predicted_label = 1 if sentiment_prediction[0]['label'] == 'POSITIVE' else 0

    # Compare with actual label
    correct_predictions += (predicted_label == row['sentiment'])

# Calculate accuracy
accuracy = correct_predictions / test_df.shape[0]
print(f"Accuracy on test set: {accuracy:.2f}")
