In [27]:
import json
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.vec_env import VecNormalize
import gym

In [28]:
def load_data_from_file(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    prompts = []
    labels = []
    for category, actors_data in data.items():
        for actor, descriptions in actors_data.items():
            for description in descriptions:
                prompts.append(description)
                labels.append(category)
    return prompts, labels

In [29]:
# Load your fine-tuning data
X_finetune, y_finetune = load_data_from_file('gender_prompt.json')
label_encoder = LabelEncoder()
# Convert string labels to numerical labels using LabelEncoder
y_finetune_numeric = label_encoder.fit_transform(y_finetune)

In [30]:
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=len(set(y_finetune_numeric)))

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [31]:
# Tokenize and encode the fine-tuning data
X_finetune_encoding = tokenizer(X_finetune, truncation=True, padding=True, return_tensors='pt')
y_finetune_tensor = torch.tensor(y_finetune_numeric)

In [32]:
class FineTuneDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {'input_ids': self.encodings['input_ids'][idx],
                'attention_mask': self.encodings['attention_mask'][idx],
                'labels': torch.tensor(self.labels[idx])}


In [33]:
# Create dataloaders for fine-tuning
finetune_dataset = FineTuneDataset(X_finetune_encoding, y_finetune_tensor)
finetune_loader = DataLoader(finetune_dataset, batch_size=4, shuffle=True)

In [34]:
# Fine-tune the model
fine_tune_optimizer = AdamW(model.parameters(), lr=1e-5)  # Use a smaller learning rate for fine-tuning



In [None]:
# Fine-tuning loop
num_finetune_epochs = 3
for epoch in range(num_finetune_epochs):
    model.train()
    for batch in finetune_loader:
        fine_tune_optimizer.zero_grad()
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = batch['labels']
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        fine_tune_optimizer.step()

  'labels': torch.tensor(self.labels[idx])}


In [None]:
# Save the fine-tuned model
model.save_pretrained('fine_tuned_model')

In [None]:
import torch
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, classification_report

# Assuming you have a DataLoader for your validation dataset named 'val_dataloader'
# The validation dataset should consist of pairs (input_text, label), where 'label' is the gender label (0 or 1)

# Load the fine-tuned BERT model and tokenizer
model = BertForSequenceClassification.from_pretrained('fine_tuned_model')
tokenizer = BertTokenizer.from_pretrained('fine_tuned_model')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

model.eval()

y_true = []
y_pred = []

with torch.no_grad():
    for batch in val_dataloader:
        inputs = tokenizer(batch['input_text'], return_tensors="pt", padding=True, truncation=True)
        labels = batch['label'].to(device)
        inputs = {key: val.to(device) for key, val in inputs.items()}

        outputs = model(**inputs)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=1)

        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predictions.cpu().numpy())

# Calculate accuracy and other metrics
accuracy = accuracy_score(y_true, y_pred)
classification_report_result = classification_report(y_true, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:\n", classification_report_result)

In [None]:
# Training loop
num_epochs = 3
for epoch in range(num_epochs):
    print(epoch)
    model.train()
    for batch in train_loader:
        optimizer.zero_grad()
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = batch['labels']
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

In [None]:
# Evaluate the model on the test set
model.eval()
all_preds = []
with torch.no_grad():
    for batch in test_loader:
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = batch['labels']
        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=1).cpu().numpy()
        all_preds.extend(predictions)

In [None]:
# Evaluate the model
accuracy = accuracy_score(y_test_tensor.numpy(), all_preds)
report = classification_report(y_test_tensor.numpy(), all_preds)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:\n", report)

In [26]:
from stable_baselines3.common.vec_env import VecNormalize
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
import gym
import numpy as np

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Using cpu device


IndexError: too many indices for tensor of dimension 2

In [None]:
# Assuming 'load_data_from_file' and 'FineTuneDataset' classes are defined as before

class LanguageModelEnv(gym.Env):
    def __init__(self, model, tokenizer, data_loader):
        super(LanguageModelEnv, self).__init__()
        self.model = model
        self.tokenizer = tokenizer
        self.data_loader = data_loader
        self.action_space = gym.spaces.Discrete(tokenizer.vocab_size)
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32)  # Use Box space instead of Discrete
        self.current_step = 0
        self.max_steps = len(data_loader)
        self.observation = None  # Initialize the observation attribute

    def reset(self):
        self.current_step = 0
        self.observation = np.zeros(1, dtype=np.float32)  # Use np.float32 for the observation
        return self.observation

    def step(self, action):
        # Your existing step implementation
        token = self.tokenizer.convert_ids_to_tokens([action.item()])[0]
        
        # Generate next_token without converting it to a tensor
        with torch.no_grad():
            next_token_tensor = self.model.generate(
                torch.tensor(self.observation, dtype=torch.long), max_length=1, pad_token_id=self.tokenizer.pad_token_id
            )[0][:, -1]
        
        # Use next_token as indices for embedding
        with torch.no_grad():
            embedded_next_token = self.model.transformer.wte(next_token_tensor.unsqueeze(0))
        
        reward = len(token)
        self.observation = np.array([next_token_tensor.item()])
        done = self.current_step >= self.max_steps
        self.current_step += 1
        return self.observation, reward, done, {}

In [None]:
#define gpt2_model and tokenizer
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

In [None]:
# Instantiate the RL environment
rl_env = DummyVecEnv([lambda: LanguageModelEnv(gpt2_model, tokenizer, finetune_loader)])

In [None]:
# Instantiate PPO agent
ppo_model = PPO("MlpPolicy", rl_env, verbose=1)

In [None]:
# Train the PPO agent
ppo_model.learn(total_timesteps=10000)

In [None]:
# Save the trained PPO model
ppo_model.save("ppo_language_model")

In [None]:
# Load the trained PPO model for evaluation or further training
ppo_model = PPO.load("ppo_language_model")