In [72]:
import subprocess
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset
from torch.utils.data import DataLoader
import torch.nn.functional as F
import numpy as np

result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True, text=True)
output = result.stdout
for line in output.splitlines():
    if '=' in line:
        var, value = line.split('=', 1)
        os.environ[var] = value

LM_MODEL = "august66/qwen2-sft-final"
SENTIMENT_MODEL = "siebert/sentiment-roberta-large-english"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

sft_model = AutoModelForCausalLM.from_pretrained(
    LM_MODEL,
    torch_dtype = torch.float16
)
sft_tokenizer = AutoTokenizer.from_pretrained("august66/qwen2-sft-final") 
sft_tokenizer.pad_token = sft_tokenizer.eos_token
sft_model.config.pad_token_id = sft_model.config.eos_token_id
sentiment_tokenizer = AutoTokenizer.from_pretrained(SENTIMENT_MODEL)
sentiment_model = AutoModelForSequenceClassification.from_pretrained(SENTIMENT_MODEL)

dataset_test = load_dataset("stanfordnlp/imdb", split="test")
def prompt_completion_preprocess(example):
    words = example['text'].split()
    prompt = ' '.join(words[:5])
    completion = ' '.join(words[5:])
    return {'prompt': prompt, 'completion': completion}
dataset_test = dataset_test.map(prompt_completion_preprocess, remove_columns=['text', 'label'])


print(f"Allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
print(f"Reserved : {torch.cuda.memory_reserved()  / 1024**3:.2f} GB")

Allocated: 0.01 GB
Reserved : 3.28 GB


In [19]:
from tqdm import tqdm
def tokenize(batch):

    inputs = sft_tokenizer(
        batch['prompt'],
        padding = 'max_length',
        truncation = True,
        max_length = 128,
        padding_side = 'left',
        add_special_tokens = True, 
        return_tensors = 'pt'
    )

    return {k:v for k, v in inputs.items()}

tokenized_inputs = dataset_test.map(
    tokenize, 
    batched = True,
    batch_size = 32, 
    remove_columns = ['completion']
).with_format('torch', columns = ['prompt', 'input_ids', 'attention_mask'])


tokenized_inputs_dataloader = DataLoader(
    tokenized_inputs,
    batch_size = 32, 
    shuffle = True,
    pin_memory = True 
)

prompt_completion_data = []
with torch.inference_mode():
    sft_model = sft_model.to(device)
    for input_batch in tqdm(tokenized_inputs_dataloader):
        prompts = input_batch.pop('prompt')
        input_batch = {k:v.to(device) for k, v in input_batch.items()}
        batch_size = input_batch['input_ids'].shape[0]

        outputs = sft_model.generate(
            **input_batch,
            num_return_sequences = 2, 
            do_sample = True,
            top_k=50,
            top_p=0.95,
            temperature=1.0,
            use_cache = True,
            pad_token_id = sft_model.config.eos_token_id,
            max_new_tokens=10
        ).view(batch_size * 2, -1).cpu()
        
        decoded_outputs_batch = sft_tokenizer.batch_decode(
            outputs,
            skip_special_tokens = True,
            clean_up_tokenization_spaces=True
        )

        prompt_completion_data_batch = [{
            'prompt':prompt,
            'completion_1':' '.join(decoded_outputs_batch[2*index].split()[5:]).strip(),
            'completion_2':' '.join(decoded_outputs_batch[2*index+1].split()[5:]).strip(),
        } for index, prompt in enumerate(prompts)]
        
        prompt_completion_data.extend(prompt_completion_data_batch)


prompt_completion_dataset = Dataset.from_list(prompt_completion_data)






  1%|          | 5/782 [00:01<03:22,  3.83it/s]

100%|██████████| 782/782 [03:22<00:00,  3.86it/s]


In [None]:
def sentiment_tokenize(batch):
    
    prompts = batch['prompt']
    prompts = [prompt for prompt in prompts for _ in range(2)]
    completions_flat = [c for pair in zip(batch['completion_1'], batch['completion_2']) for c in pair]
    sentiment_inputs = sentiment_tokenizer(
        text = completions_flat,
        max_length = 128,
        add_special_tokens = True,
        return_tensors = 'pt',
        padding = 'max_length', 
        truncation = True,
        padding_side = 'left'
    )
    return {'prompt':prompts, 'completion':completions_flat, 'input_ids':sentiment_inputs['input_ids'], 'attention_mask':sentiment_inputs['attention_mask']}

tokenized_sentiments = prompt_completion_dataset.map(
    sentiment_tokenize, 
    batched = True,
    batch_size = 32,
    remove_columns=["completion_1", "completion_2"]
).with_format('torch', columns = ['prompt', 'completion', 'input_ids', 'attention_mask'])


tokenized_sentiment_inputs_dataloader = DataLoader(
    tokenized_sentiments, 
    batch_size = 32,
    shuffle = True,
    pin_memory = True
)

reward_dict = []
with torch.no_grad():
    sentiment_model = sentiment_model.to(device)
    for batch in tqdm(tokenized_sentiment_inputs_dataloader):
        prompts = batch.pop('prompt')
        completions = batch.pop('completion')
        inputs = {k:v.to(device) for k, v in batch.items()}
        outputs = sentiment_model(**inputs)
        probs_batch = F.softmax(outputs.logits, dim = -1)
        pred_batch = torch.argmax(probs_batch, dim = -1)
        probs_batch = probs_batch.cpu().tolist()
        pred_batch = pred_batch.cpu().tolist()
        reward_batch = probs_batch[:, 1].cpu.tolist()

        reward_batch = [{'prompt':p, 'completion':c, 'reward':reward, 'prob':prob, 'label':label} for p, c, prob, label, reward in zip(prompts, completions, probs_batch, pred_batch, reward_batch)]
        reward_dict.extend(reward_batch)



Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

100%|██████████| 1563/1563 [02:28<00:00, 10.51it/s]


In [80]:
reward_dict

[{'prompt': 'I saw this film shortly',
  'completion': 'after its release here in the States, and even',
  'prob': [0.0023950121831148863, 0.9976049661636353],
  'label': 1},
 {'prompt': 'Was a college acting class',
  'completion': 'project, I must admit. Not very memorable.',
  'prob': [0.9994891881942749, 0.0005107722827233374],
  'label': 0},
 {'prompt': 'When I think of a',
  'completion': 'horror movie, usually what comes to my mind is',
  'prob': [0.9711613059043884, 0.0288386233150959],
  'label': 0},
 {'prompt': "Sigh. I'm baffled when I",
  'completion': 'see so many comments that criticize the film - I',
  'prob': [0.9979631900787354, 0.0020367701072245836],
  'label': 0},
 {'prompt': 'This was honestly the worst',
  'completion': "movie I've ever seen. It really wasn't",
  'prob': [0.9994882345199585, 0.0005117710679769516],
  'label': 0},
 {'prompt': "There's a thin line between",
  'completion': 'good action/adventure movies and schlock. They',
  'prob': [0.00151073443703

In [14]:
import torch.nn.functional as F
def get_sentiment(text: str):
    # Tokenize and prepare inputs
    inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    # Forward pass
    with torch.no_grad():
        logits = sentiment_model(**inputs).logits
    # Convert to probabilities
    probs = F.softmax(logits, dim=-1)[0]
    # Decode prediction
    label = "positive" if probs[1] > probs[0] else "negative"
    return label, probs.cpu().tolist()

# 🔍 Example:
print(get_sentiment(completions[1]))

('positive', [0.0011193063110113144, 0.9988806843757629])


In [75]:
logits_batch = outputs.logits.cpu()
F.softmax(logits_batch, dim = -1)

tensor([[0.0025, 0.9975],
        [0.0060, 0.9940],
        [0.0017, 0.9983],
        [0.0027, 0.9973],
        [0.0011, 0.9989],
        [0.0050, 0.9950],
        [0.0013, 0.9987],
        [0.9912, 0.0088],
        [0.0021, 0.9979],
        [0.0035, 0.9965],
        [0.0151, 0.9849],
        [0.0055, 0.9945],
        [0.0111, 0.9889],
        [0.0013, 0.9987],
        [0.0035, 0.9965],
        [0.9971, 0.0029]])