In [1]:
import subprocess
import os

result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True, text=True)
output = result.stdout
for line in output.splitlines():
    if '=' in line:
        var, value = line.split('=', 1)
        os.environ[var] = value
        

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
SFT_model = AutoModelForCausalLM.from_pretrained("./qwen_imdb_final")
SFT_tokenizer = AutoTokenizer.from_pretrained("./qwen_imdb_final")

In [3]:
from datasets import load_dataset
dataset_test = load_dataset("stanfordnlp/imdb", split="test")

def prompt_completion_preprocess(example):
    words = example['text'].split()
    prompt = ' '.join(words[:5])
    completion = ' '.join(words[5:])
    return {'prompt': prompt, 'completion': completion}

dataset_test = dataset_test.map(prompt_completion_preprocess, remove_columns=['text', 'label'])

In [8]:
dataset_test

Dataset({
    features: ['prompt', 'completion'],
    num_rows: 25000
})

In [7]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1️⃣ Move model
SFT_model = SFT_model.to(device)

# 2️⃣ Tokenize and move inputs
prefix = dataset_test[0]['prompt']
inputs = SFT_tokenizer(prefix, return_tensors="pt", padding=True, truncation=True)
inputs = {k: v.to(device) for k, v in inputs.items()}

# Ensure pad_token_id is set
if SFT_tokenizer.pad_token_id is None:
    SFT_tokenizer.pad_token = SFT_tokenizer.eos_token
    SFT_model.config.pad_token_id = SFT_tokenizer.eos_token_id

# Now generate, passing the full inputs
outputs = SFT_model.generate(
    **inputs,
    num_return_sequences=2,
    do_sample=True,
    top_k=50,
    top_p=0.95,
    temperature=1.0,
    pad_token_id=SFT_model.config.pad_token_id,
)

completions = []
for seq in outputs:
    # skip the prompt tokens when decoding
    text = SFT_tokenizer.decode(seq, 
                                skip_special_tokens=True)
    completions.append(text)

print("A:", completions[0])
print("B:", completions[1])

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


A: I love sci-fi and am always happy to see a good movie every once in a while. I was in a bad mood, because I had to drive to Chicago, and this movie just happened to be up on TV.<br /><br />I will not give away any plot details here, except that the movie is set in 2022 and revolves around a nuclear-powered car. Unfortunately, the car gets into a terrible accident that shuts it off and makes it possible for a very small group of people to get on board in the hope that it will be repaired. However, some people died before it was fixed, and so everyone looks like a casualty of the accident. The accident makes the humans' face shorter and bulvier, so in order to walk normally, they have to use their necks and arms like a bunch of gymnasts, and in order to climb the rocks, their legs have to kick with all their strength (something that the non-mechanical humans don't have to worry about). <br /><br />You know that this is going to end up being bad right away, but I expected much more whe

In [5]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
sentiment_model = "siebert/sentiment-roberta-large-english"
# 2️⃣ Load its tokenizer
sentiment_tokenizer = AutoTokenizer.from_pretrained(sentiment_model)

# 3️⃣ Load the model itself
sentiment_model = AutoModelForSequenceClassification.from_pretrained(sentiment_model)

# 4️⃣ (Optional) Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
sentiment_model.to(device)

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-23): 24 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=Tru

In [14]:
import torch.nn.functional as F
def get_sentiment(text: str):
    # Tokenize and prepare inputs
    inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    # Forward pass
    with torch.no_grad():
        logits = sentiment_model(**inputs).logits
    # Convert to probabilities
    probs = F.softmax(logits, dim=-1)[0]
    # Decode prediction
    label = "positive" if probs[1] > probs[0] else "negative"
    return label, probs.cpu().tolist()

# 🔍 Example:
print(get_sentiment(completions[1]))

('positive', [0.0011193063110113144, 0.9988806843757629])


In [12]:
completions

["I love sci-fi and am always happy to see a good movie every once in a while. I was in a bad mood, because I had to drive to Chicago, and this movie just happened to be up on TV.<br /><br />I will not give away any plot details here, except that the movie is set in 2022 and revolves around a nuclear-powered car. Unfortunately, the car gets into a terrible accident that shuts it off and makes it possible for a very small group of people to get on board in the hope that it will be repaired. However, some people died before it was fixed, and so everyone looks like a casualty of the accident. The accident makes the humans' face shorter and bulvier, so in order to walk normally, they have to use their necks and arms like a bunch of gymnasts, and in order to climb the rocks, their legs have to kick with all their strength (something that the non-mechanical humans don't have to worry about). <br /><br />You know that this is going to end up being bad right away, but I expected much more when