In [None]:
!pip install trl > /dev/null

One can easily fine-tune your SFT model using SFTTrainer from TRL. Let us assume your dataset is imdb, the text you want to predict is inside the text field of the dataset, and you want to fine-tune the facebook/opt-350m model.

In [None]:
import trl
trl.__version__

In [None]:
import os
os.getpid()

In [None]:
proc = os.getpid()
# os.kill(proc,9)

In [None]:
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments

dataset = load_dataset("imdb", split="train")
dataset = dataset.train_test_split(test_size=0.2)
dataset = dataset['test'].train_test_split(test_size=0.1)
# dataset

In [None]:
args = TrainingArguments(
    output_dir='/home/aicoder/training/sftt_opt',
    push_to_hub=False,
    report_to="none",
    per_device_eval_batch_size=3,
    per_device_train_batch_size=4,
    evaluation_strategy='steps',
    eval_steps=200,
    save_strategy='epoch',
    num_train_epochs=1
)

In [None]:
dataset['train'][0]

In [None]:
trainer = SFTTrainer(
    "facebook/opt-350m",
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    dataset_text_field="text",
    max_seq_length=512,
    args=args
)
# may be a seperate training_arg object has to be passed

- facebook/opt-350m is 663MB on hdd and loads to 1450 MB inside GPU, why?
- train_bs = 1 and test_bs = 1 ==> training okay
- train_ys = 2 and test_bs = 1 ==> training okay
- train_ys = 2 and test_bs = 2 ==> training okay (8.6GB)
- train_ys = 3 and test_bs = 3 ==> training okay (9.2GB)
- train_ys = 3 and test_bs = 4 ==> training okay (10.8GB)
- train_bs = 4 and test_bs = 4 ==> training fail (11.96GB)

In [None]:
trainer.train()

You can use the DataCollatorForCompletionOnlyLM to train your model on the **generated prompts only**. Note that this works only in the case when packing=False. To instantiate that collator for instruction data, pass a response template and the tokenizer.

In [None]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments
)
from datasets import load_dataset
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM

In [None]:
dataset = load_dataset("lucasmccabe-lmi/CodeAlpaca-20k", split="train")

model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m")
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")

dataset = dataset.train_test_split(test_size=0.3)['test'].train_test_split(test_size=0.2)
dataset

In [None]:
dataset['train'][0]

In [None]:
def formatting_prompts_func(example):
    output_texts = []
    # traverse the batches 
    for i in range(len(example['instruction'])):
        # and make the batches as Question and output as answers
        text = f"### Question: {example['instruction'][i]}\n ### Answer: {example['output'][i]}"
        output_texts.append(text)
    return output_texts

response_template = " ### Answer:"
# create a DataCollator that is imported from trl for CompletionLM
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)


In [None]:
args = TrainingArguments(
    output_dir='/home/aicoder/training/sftt_opt/',
    push_to_hub=False,
    report_to="none",
    per_device_eval_batch_size=1,
    per_device_train_batch_size=1,
    evaluation_strategy='steps',
    eval_steps=200,
    save_strategy='steps',
    save_steps=200,
    num_train_epochs=1
)

In [None]:
trainer = SFTTrainer(
    model,
    args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    formatting_func=formatting_prompts_func,
    data_collator=collator,
)

- train_bs = 3 and test_bs = 4 ==> training fail (11.8GB)
- train_bs = 1 and test_bs = 4 ==> training fail (11.9GB)
- train_bs = 1 and test_bs = 1 ==> training okay (9.6GB)

In [None]:
trainer.train()

To instantiate that collator for assistant style conversation data, pass a response template, an instruction template and the tokenizer. Here is an example of how it would work to fine-tune opt-350m **on assistant completions** only on the Open Assistant Guanaco dataset:

In [None]:
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer,
    TrainingArguments
)
from datasets import load_dataset
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM

In [None]:
dataset = load_dataset("timdettmers/openassistant-guanaco", split="train")
dataset = dataset.train_test_split(test_size=0.3)['test'].train_test_split(test_size=0.2)

model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m")
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
dataset

In [None]:
instruction_template = "### Human:"
response_template = "### Assistant:"
collator = DataCollatorForCompletionOnlyLM(instruction_template=instruction_template,
                                           response_template=response_template,
                                           tokenizer=tokenizer,
                                           mlm=False)


In [None]:
args = TrainingArguments(
    output_dir='/home/aicoder/training/sftt_opt/',
    push_to_hub=False,
    report_to="none",
    per_device_eval_batch_size=1,
    per_device_train_batch_size=1,
    evaluation_strategy='steps',
    eval_steps=200,
    save_strategy='steps',
    save_steps=200,
    num_train_epochs=1
)
# - train_bs = 1 and test_bs = 1 ==> training okay (9.6GB)

In [None]:
trainer = SFTTrainer(
    model,
    args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    dataset_text_field="text",
    data_collator=collator,
)

trainer.train()

In [None]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")

def print_tokens_with_ids(txt):
    tokens = tokenizer.tokenize(txt, add_special_tokens=False)  # only tokenize the text 
    token_ids = tokenizer.encode(txt, add_special_tokens=False) # encode the text
    print(list(zip(tokens, token_ids)))  # zip them together and return

prompt = """### User: Hello\n\n### Assistant: Hi, how can I help you?"""

print_tokens_with_ids(prompt) 
# [..., ('▁Hello', 15043), ('<0x0A>', 13), ('<0x0A>', 13), ('##', 2277), ('#', 29937), ('▁Ass', 4007), ('istant', 22137), (':', 29901), ...]

response_template = "### Assistant:"

print_tokens_with_ids(response_template) 

The **setup_chat_format() function** in trl easily sets up a model and tokenizer for conversational AI tasks. This function:

- Adds special tokens to the tokenizer, e.g. <|im_start|> and <|im_end|>, to indicate the start and end of a conversation.

- Resizes the model’s embedding layer to accommodate the new tokens.

- Sets the chat_template of the tokenizer, which is used to format the input data into a chat-like format. The default is chatml from OpenAI.

- optionally you can pass resize_to_multiple_of to resize the embedding layer to a multiple of the resize_to_multiple_of argument, e.g. 64. If you want to see more formats being supported in the future, please open a GitHub issue on trl

In [None]:
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments
)
from datasets import load_dataset
from trl import setup_chat_format, SFTTrainer

In [None]:
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m")
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")

# Set up the chat format with default 'chatml' format
model, tokenizer = setup_chat_format(model, tokenizer)

In [None]:
# load jsonl dataset
# dataset = load_dataset("json", data_files="path/to/dataset.jsonl", split="train")
# load dataset from the HuggingFace Hub
dataset = load_dataset("philschmid/dolly-15k-oai-style", split="train")
dataset = dataset.train_test_split(test_size=0.3)['test'].train_test_split(test_size=0.3)
dataset

In [None]:
dataset[0]

In [None]:
args = TrainingArguments(
    output_dir='/home/aicoder/training/sftt_opt/',
    push_to_hub=False,
    report_to="none",
    per_device_eval_batch_size=1,
    per_device_train_batch_size=1,
    evaluation_strategy='steps',
    eval_steps=200,
    save_strategy='steps',
    save_steps=200,
    num_train_epochs=1
)
# - train_bs = 1 and test_bs = 1 ==> training okay (9.8GB)

In [None]:
# You didn't pass a `max_seq_length` argument to the SFTTrainer, this will default to 1024
# No chat template is defined for this tokenizer - using the default template for the 
# GPT2TokenizerFast class.

In [None]:
from rich import print

In [None]:
print(tokenizer.chat_template)

In [None]:
trainer = SFTTrainer(
    "facebook/opt-350m",
    args=args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    packing=True,
)

In [None]:
trainer.train()  # training goes through

The following is very powerful way of tackling dataset loading

In [None]:
from trl import SFTTrainer
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments
)
from datasets import load_dataset

In [None]:
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m")
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")

In [None]:
# dataset = load_dataset("philschmid/dolly-15k-oai-style", split="train")
dataset = load_dataset("lucasmccabe-lmi/CodeAlpaca-20k", split="train")
dataset = dataset.train_test_split(test_size=0.3)['test'].train_test_split(test_size=0.3)
dataset

In [None]:
args = TrainingArguments(
    output_dir='/home/aicoder/training/sftt_opt/',
    push_to_hub=False,
    report_to="none",
    per_device_eval_batch_size=1,
    per_device_train_batch_size=1,
    evaluation_strategy='steps',
    eval_steps=200,
    save_strategy='steps',
    save_steps=200,
    num_train_epochs=1
)
# - train_bs = 1 and test_bs = 1 ==> training okay (9.8GB)

In [None]:
def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['instruction'])):
        text = f"### Question: {example['instruction'][i]}\n ### Answer: {example['output'][i]}"
        output_texts.append(text)
    return output_texts

trainer = SFTTrainer(
    model,
    args=args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    formatting_func=formatting_prompts_func,
)

SFTTrainer **supports example packing**, where multiple short examples are packed in the same input sequence to increase training efficiency. This is done with the ConstantLengthDataset utility class that returns constant length chunks of tokens from a stream of examples. 

To enable the usage of this dataset class, simply pass packing=True to the SFTTrainer constructor.

In [None]:
def formatting_func(example):
    text = f"### Question: {example['instruction']}\n ### Answer: {example['output']}"
    return text

trainer = SFTTrainer(
    model=model,
    args=args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    packing=True,
    formatting_func=formatting_func
)

# trainer.train()

In [None]:
from datasets import load_dataset
from trl import SFTTrainer
from peft import LoraConfig
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments
)

dataset = load_dataset("imdb", split="train")
dataset = dataset.train_test_split(test_size=0.3)['test'].train_test_split(test_size=0.3)
dataset

In [None]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

In [None]:
args = TrainingArguments(
    output_dir='/home/aicoder/training/sftt_opt/',
    push_to_hub=False,
    report_to="none",
    per_device_eval_batch_size=1,
    per_device_train_batch_size=1,
    evaluation_strategy='steps',
    eval_steps=200,
    save_strategy='steps',
    save_steps=200,
    num_train_epochs=1
)
# - train_bs = 1 and test_bs = 1 ==> training okay (9.8GB)

In [None]:
# "EleutherAI/gpt-neo-125m" is 586 MB on hdd
trainer = SFTTrainer(
    "EleutherAI/gpt-neo-125m",
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    dataset_text_field="text",
    peft_config=peft_config,
    args=args
)
# with peft takes 3.8GB for training
trainer.train()

In [None]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

# trainig adapter with 8-bit model
model = AutoModelForCausalLM.from_pretrained(
    "EleutherAI/gpt-neo-125m",
    load_in_8bit=True,
    device_map="auto",
)

trainer = SFTTrainer(
    model,
    args=args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    dataset_text_field="text",
    peft_config=peft_config,
)  # takes 4.3GB for training

In [None]:
trainer.train()

In [None]:
# this enables use of flash_attention1

with torch.backends.cuda.sdp_kernel(enable_flash=True,
                                    enable_math=False,
                                    enable_mem_efficient=False):
    trainer.train()

To use Flash Attention 2, first install the latest flash-attn package:

pip install flash_attention

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    load_in_4bit=True,
    attn_implementation="flash_attention_2"
)

### Using model creation utility

In [None]:
from trl import (
    ModelConfig,
    SFTTrainer,
    get_kbit_device_map,
    get_peft_config, 
    get_quantization_config
)
from transformers import TrainingArguments

In [None]:
model_config = ModelConfig(
    model_name_or_path="facebook/opt-350m",
    attn_implementation=None, # or "flash_attention_2"
)

In [None]:
torch_dtype = (
    model_config.torch_dtype
    if model_config.torch_dtype in ["auto", None]
    else getattr(torch, model_config.torch_dtype)
)

In [None]:
quantization_config = get_quantization_config(model_config)
quantization_config

In [None]:
args = TrainingArguments(
    output_dir='/home/aicoder/training/sftt_opt/',
    push_to_hub=False,
    report_to="none",
    per_device_eval_batch_size=1,
    per_device_train_batch_size=1,
    evaluation_strategy='steps',
    eval_steps=200,
    save_strategy='steps',
    save_steps=200,
    num_train_epochs=1
)

In [None]:
model_kwargs = dict(
    revision=model_config.model_revision,
    trust_remote_code=model_config.trust_remote_code,
    attn_implementation=model_config.attn_implementation,
    torch_dtype=torch_dtype,
    use_cache=False if args.gradient_checkpointing else True,
    device_map=get_kbit_device_map() if quantization_config is not None else None,
    quantization_config=quantization_config,
)

In [None]:
model_kwargs

In [None]:
model = AutoModelForCausalLM.from_pretrained(model_config.model_name_or_path,
                                             **model_kwargs)

trainer = SFTTrainer(
    ...,
    model=model_config.model_name_or_path,
    peft_config=get_peft_config(model_config),
)

### Reward Trainer

**Objective**: To classify whether the generated statement is well formed, grammatically acceptable, and rules following. 

**Rules Following**: These rules are taught using do's and don't via Reward Trainer

The reward model should be trained on a dataset of paired examples, where each example is a tuple of two sequences. The reward model should be trained to predict which example in the pair is more relevant to the task at hand.

The reward trainer expects a very specific format for the dataset. The dataset should contain two 4 entries at least if you don’t use the default RewardDataCollatorWithPadding data collator. 

Therefore the final dataset object should contain two 4 entries at least if you use 
the default RewardDataCollatorWithPadding data collator. The entries should be named:

input_ids_chosen

attention_mask_chosen

input_ids_rejected

attention_mask_rejected

You should pass an **AutoModelForSequenceClassification model** to the RewardTrainer, along with a RewardConfig which configures the hyperparameters of the training.

Two Passes:

In the first pass, we feed in prompt and chosen response to the Reward Model, the output is Rchosen. In the second pass, we feed in the same prompt along with the rejected response. The output, in this case, is Rrejected

For a very high reward score for chosen response and a low reward score for rejected response, the loss would be 0.

loss = -log(sig(RCho - RRej))

https://github.com/ibm-ecosystem-engineering/SuperKnowa/blob/main/7.%20RLHF%20Model/notebooks/RLHFImplementation.ipynb

In [1]:
from peft import LoraConfig, TaskType
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TrainingArguments
)
from trl import RewardTrainer, RewardConfig
from datasets import load_dataset
import inspect
from rich import print

In [None]:
dataset = load_dataset("Anthropic/hh-rlhf",
                       data_dir="harmless-base")
dataset = load_dataset("Anthropic/hh-rlhf",
                       data_dir="red-team-attempts")

In [2]:
dataset = load_dataset("Anthropic/hh-rlhf")
dataset = dataset['test'].train_test_split(test_size=0.3)
dataset

DatasetDict({
    train: Dataset({
        features: ['chosen', 'rejected'],
        num_rows: 5986
    })
    test: Dataset({
        features: ['chosen', 'rejected'],
        num_rows: 2566
    })
})

In [3]:
print(dataset['train'][0]['rejected'])

#### Dataset prep

- Questions that requires answers are listed first

- Answers are "generated" for the questions from models 

- These answers are annotated with feedback using other models / humans 

- Answers with higher feedback are chosen and lesser feedback rejected

- Both rejected and chosen answers along with questions are collated into dataset

In [4]:
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token 

In [5]:
def pre_process(row):
    chosen = tokenizer(row['chosen'], max_length=512, truncation=True)
    # here the chosen text is being tokenized
    rejected = tokenizer(row['rejected'], max_length=512, truncation=True)
    # here the rejected text is being tokenized
    final = {}
    final['input_ids_chosen'] = chosen['input_ids']
    final['attention_mask_chosen'] = chosen['attention_mask']
    final['input_ids_rejected'] = rejected['input_ids'] 
    final['attention_mask_rejected'] = rejected['attention_mask']
    return final

In [6]:
dataset = dataset.map(pre_process,
                      remove_columns=['chosen','rejected'],)
dataset

Map:   0%|          | 0/5986 [00:00<?, ? examples/s]

Map:   0%|          | 0/2566 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['input_ids_chosen', 'attention_mask_chosen', 'input_ids_rejected', 'attention_mask_rejected'],
        num_rows: 5986
    })
    test: Dataset({
        features: ['input_ids_chosen', 'attention_mask_chosen', 'input_ids_rejected', 'attention_mask_rejected'],
        num_rows: 2566
    })
})

In [None]:
dataset['train'][0]

#### Working on dataset creation

In [None]:
# How to create the dataset into chosen / rejected format
#  ['ax', 'cola', 'mnli', 'mnli_matched', 
# 'mnli_mismatched', 'mrpc', 'qnli', 'qqp', 'rte', 'sst2', 'stsb', 'wnli']
glue_cola = load_dataset("glue", 'cola')

In [None]:
glue_cola['train'].features['label']

In [None]:
import pandas as pd
from operator import itemgetter

df = pd.read_csv('reward_trainer_feedback.csv',
                 encoding="ISO-8859-1")
# https://stackoverflow.com/questions/19699367/for-line-in-results-in-unicodedecodeerror-utf-8-codec-cant-decode-byte

In [None]:
df['tup'] = list(zip(df['answer'], df['feedback']))
df.head(1)

In [None]:
#grouping together all the answers for a given question along with its feedback
df_g = df.groupby('question')['tup'].apply(list).reset_index()
df_g.head(1)

In [None]:
df_g['tup'][0]

In [None]:
df_g['question'][0]

In [None]:
# sort each group based on the feedback score
df_g["sorted_tup"] = df_g["tup"].apply(lambda x :sorted(x,key=itemgetter(0)))

In [None]:
df_g

In [None]:
# answer with highest feedback score is "chosen"
df_g["chosen"] = df_g["sorted_tup"].apply(lambda x: x[-1][0])
df_g["chosen_score"] = df_g["sorted_tup"].apply(lambda x: x[-1][1])

# answer with highest feedback score is "rejected"
df_g["rejected"] = df_g["sorted_tup"].apply(lambda x: x[0][0])
df_g["rejected_score"] = df_g["sorted_tup"].apply(lambda x: x[0][1])

In [None]:
df_g = df_g.dropna()

df_g = df_g[(df_g['chosen_score']>=4.0) & (df_g['rejected_score']<4.0)]

df_g

#### Training models

In [20]:
model = AutoModelForSequenceClassification.from_pretrained("gpt2")

peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    inference_mode=False,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [26]:
model.num_labels

2

In [None]:
def add_margin(row):
    # Assume you have a score_chosen and score_rejected columns that you want to use to compute the margin
    return {'margin': row['chosen'] - row['rejected']}

# dataset = dataset.map(add_margin)

# Code adds a margin to the loss in the margin column to the dataset. 
# The reward collator will automatically pass it through and the 
# loss will be computed accordingly.

# https://huggingface.co/papers/2307.09288

In [21]:
args = TrainingArguments(
    output_dir='/home/aicoder/training/reward_trainer/',
    push_to_hub=False,
    report_to="none",
    per_device_eval_batch_size=1,
    per_device_train_batch_size=1,
    evaluation_strategy='steps',
    eval_steps=200,
    save_strategy='steps',
    save_steps=200,
    num_train_epochs=1
)

In [None]:
reward_source = inspect.getsource(RewardTrainer)
print(reward_source)

In [22]:
trainer = RewardTrainer(
    model=model,
    args=args,
    tokenizer=tokenizer,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    peft_config=peft_config,
)



In [None]:
trainer.train()

In [None]:
tokenizer = AutoTokenizer.from_pretrained("/home/aicoder/training/reward_trainer/checkpoint-2400/")
model = AutoModelForSequenceClassification.from_pretrained("/home/aicoder/training/reward_trainer/checkpoint-2400/")

In [14]:
testing_stmt = "There is superb park in the vicinity"
# negative statement for test
nega_stmt = "This is not a very good place to spend time"
# non statement
not_stmt = 'make wsork nedo theko orga fuaga'

In [None]:
output = model(**token_stmt)

In [None]:
model.config

In [None]:
output.logits[0]

In [8]:
from transformers import pipeline

reward_tokenizer = AutoTokenizer.from_pretrained("lvwerra/distilbert-imdb")
reward_model = AutoModelForSequenceClassification.from_pretrained("lvwerra/distilbert-imdb")

In [33]:
print(reward_model.config)

In [30]:
reward_model.config.id2label

{0: 'NEGATIVE', 1: 'POSITIVE'}

In [9]:
tokenized_stmt = tokenizer(testing_stmt, return_tensors='pt')
reward_output = reward_model(**tokenized_stmt)
reward_output.logits

tensor([[ 0.4468, -0.6826]], grad_fn=<AddmmBackward0>)

In [12]:
tokenized_stmt = tokenizer(nega_stmt, return_tensors='pt')
reward_output = reward_model(**tokenized_stmt)
reward_output.logits

tensor([[ 0.4381, -0.8219]], grad_fn=<AddmmBackward0>)

In [16]:
tokenized_stmt = tokenizer(not_stmt, return_tensors='pt')
reward_output = reward_model(**tokenized_stmt)
reward_output.logits

tensor([[ 0.5013, -0.9232]], grad_fn=<AddmmBackward0>)

In [None]:
pipe = pipeline(task='text-classification', model="lvwerra/distilbert-imdb")
pipe(testing_stmt)

In [None]:
pipe = pipeline(task='text-classification', model="lvwerra/distilbert-imdb")
pipe(nega_stmt)

In [10]:
test_model1 = "bigscience/bloomz-560m"
test_model2 = "/home/aicoder/training/reward_trainer/checkpoint-2400/"

In [11]:
reward_cp_tokenizer = AutoTokenizer.from_pretrained(test_model2)
reward_cp_model = AutoModelForSequenceClassification.from_pretrained(test_model2)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [34]:
print(reward_cp_model.config)

In [32]:
reward_cp_model.config.id2label

{0: 'LABEL_0', 1: 'LABEL_1'}

In [17]:
tokenized_stmt = reward_cp_tokenizer(testing_stmt, return_tensors='pt')
reward_output = reward_cp_model(**tokenized_stmt)
reward_output.logits

tensor([[1.9391, 2.3142]])

In [18]:
tokenized_stmt = reward_cp_tokenizer(nega_stmt, return_tensors='pt')
reward_output = reward_cp_model(**tokenized_stmt)
reward_output.logits

tensor([[2.6312, 2.7609]])

In [19]:
tokenized_stmt = reward_cp_tokenizer(not_stmt, return_tensors='pt')
reward_output = reward_cp_model(**tokenized_stmt)
reward_output.logits

tensor([[2.6594, 3.8369]])

In [None]:


pipe = pipeline(task='text-classification', model=test_model2)
pipe(not_stmt)