<a href="https://colab.research.google.com/github/Approach-Analytics/Emotion-Classifier/blob/main/SM_EmotionAnalysisRLModel_Nov18%2C2023.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Emotion Analysis Reinforcement Learning Model (GPU)

In [None]:
!pip install -qq transformers stable-baselines3[extra]

import tensorflow as tf
import pandas as pd
import numpy as np
import gcsfs
from transformers import TFAutoModelForCausalLM, TFRobertaModel, AutoTokenizer
from gymnasium import Env
from gymnasium.spaces import Discrete, Box, Dict
from stable_baselines3 import DQN, PPO
# from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from collections import OrderedDict
from google.colab import auth
auth.authenticate_user()
tf.get_logger().setLevel('ERROR')

class EmotionAnalysisEnv(Env):
    def __init__(self, model_name, max_token_length, batch_size, single_batch):
        self.single_batch = single_batch
        self.aligned_df = self.get_aligned_dataframe()
        self.strategy = tf.distribute.MirroredStrategy() # Strategy for GPU
        self.model_name = model_name
        self.max_token_length = max_token_length
        self.batch_size = batch_size
        self.model = TFAutoModelForCausalLM.from_pretrained(self.model_name)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", pad_token="</s>")
        self.multitask_model = self.load_multitask_model()
        self.multitask_tokenizer = AutoTokenizer.from_pretrained('roberta-base')
        self.action_space = Discrete(2)
        self.observation_space = Box(low=0, high=1, shape=(1,))
        self.rng = np.random.default_rng()  # Local random number generator
        self.reset()
        self.text_list = self.get_text_list()
        self.fear_emotion_axis = self.get_fear_emotion_axis()
        self.anger_emotion_axis = self.get_anger_emotion_axis()
        self.input_probabilities = self.infer(self.text_list)
        self.aligned_fifo_mean, self.aligned_aiao_mean = self.calculate_fifo_aiao(self.aligned_df.loc[self.aligned_df['model_name'].str.lower().isin(['gpt-3.5-turbo', 'gpt-4'])])

    def step(self, action):
        # Adjust temperature based on action
        temperature_change = 1
        # Increase temperature if action is 1, otherwise decrease it
        self.temperature += temperature_change if action == 1 else -temperature_change
        self.temperature = np.clip(self.temperature, 0.1, 1)  # Ensure temperature stays within bounds
        self.episode_length -= 1

        generated_text = self.generate_text_gpt2_xla()
        output_probabilities = self.infer(generated_text)

        # Create DataFrame directly with prefixed columns and concatenate horizontally
        df = pd.concat([pd.DataFrame(self.input_probabilities).add_prefix('input_'),
                        pd.DataFrame(output_probabilities).add_prefix('output_')], axis=1)
        df['model_name'] = self.model_name

        fifo_mean, aiao_mean = self.calculate_fifo_aiao(df)

        # Define reward logic
        reward = -1
        if fifo_mean > self.aligned_fifo_mean or aiao_mean > self.aligned_aiao_mean:
            reward = 1

        done = self.episode_length <= 0
        info = {}  # Can be used to provide additional information
        truncated = done # Required for Gymnasium

        return np.array([self.temperature]), reward, done, truncated, info

    def reset(self, **kwargs):
        # Initialize temperature within the defined bounds
        self.temperature = self.rng.integers(1, 11) / 10.0
        self.episode_length = 20
        return np.array([self.temperature]), {} # Return observation and an empty dictionary

    def render(self):
        pass

    def calculate_fifo_aiao(self, df):
        # Create ordinal mappings for both emotions
        mapping_ang = {"calm": 0, "annoyed": 1, "agitated": 2, "frustrated": 3, "angry": 4, "rage": 5}
        mapping_fear = {"calm": 0, "concern": 1, "worry": 2, "anxiety": 3, "fear": 4, "dread": 5}

        # Apply the mappings to create new ordinal variables
        fifo_mean = (df['input_fear_label'].map(mapping_fear) - df['output_fear_label'].map(mapping_fear)).mean()
        aiao_mean = (df['input_anger_label'].map(mapping_ang) - df['output_anger_label'].map(mapping_ang)).mean()

        return fifo_mean, aiao_mean

    def get_aligned_dataframe(self):
        # Define your GCS file path
        gcs_path = 'gs://gpt-conversation-datasets/reddit-six-models.parquet'
        fs = gcsfs.GCSFileSystem()
        # Open the file as a file-like object
        with fs.open(gcs_path) as f:
            return pd.read_parquet(f)

    def get_text_list(self):
        if self.single_batch:
            text_list = self.aligned_df['input_text'].sample(self.batch_size).tolist() # 170 x 77 = 13,090
        else:
            text_list = self.aligned_df['input_text'].tolist()
        return text_list

    def get_label_mapping(self, emo_axis):
        return {i: label for i, label in enumerate(reversed(list(emo_axis.keys())))}

    def get_fear_emotion_axis(self):
        # Define fear emotion axis using OrderedDict
        return OrderedDict([
            ("dread", ['panic', 'dread', 'horror', 'horrified', 'horrifying', 'terror', 'terrified', 'terrifying']),
            ("fear", ["fear", "fearful", "fright", "frightening", "afraid", "frightful", "frightfully", "frightened", 'scared', 'scary', 'scare']),
            ("anxiety", ["anxious", "anxiety", 'angst', 'anxiousness']),
            ("worry", ["worry", "worried", "worrying", "worries"]),
            ("concern", ["concern", "concerning", "concerned", "concerns"]),
            ("calm", ["calm", "peaceful", "serene", "serenity", "relax", "relaxing", "relaxed", "untroubled", "content", "contented", "composed", "tranquil"])
        ])

    def get_anger_emotion_axis(self):
        # Define anger emotion axis using OrderedDict
        return OrderedDict([
            ("rage", ['rage', 'raging', 'raged', 'fury', 'wrath', 'wrathful', 'furious', 'enraged', 'enraging', 'incensed', 'outraged']),
            ("angry", ["angry", "anger", "angered", "mad", "maddening", "maddened"]),
            ("frustrated", ["frustration", "frustrate", "frustrated", "frustrating", "exasperated", "exasperating", "discontented", "vexed", "vexing", "bothered", "bothersome"]),
            ("agitated", ["agitated", "agitating", "agitation", "aggravated", "aggravation", "aggravating", "upset", 'upsetting', 'irritate', 'irritating', 'irritable', 'irritated']),
            ("annoyed", ["annoying", "annoy", "annoyed", "pestered", "pestering", "pester", "troubled", "troubling", "disturbed", "disturbing", "harassed", "harassing", "nagged", "nagging"]),
            ("calm", ["calm", "peaceful", "serene", "serenity", "relax", "relaxing", "relaxed", "untroubled", "content", "contented", "composed", "tranquil"])
        ])

    def batch_tokenize(self, texts):
        # Tokenizes a batch of text using a predefined tokenizer.
        encodings = self.multitask_tokenizer(texts, truncation=True, padding='max_length', max_length=self.max_token_length, return_tensors='tf')
        return encodings['input_ids'], encodings['attention_mask']

    def generate_inference_dataset(self, input_ids, attention_mask):
        # Generates a TensorFlow dataset
        dataset = tf.data.Dataset.from_tensor_slices(({'input_ids': input_ids, 'attention_mask': attention_mask}))
        dataset = dataset.batch(self.batch_size)
        dataset = dataset.prefetch(tf.data.AUTOTUNE)
        return dataset

    def infer(self, texts):
        # Create dataset for inference
        input_ids, attention_mask = self.batch_tokenize(texts)
        dataset = self.generate_inference_dataset(input_ids, attention_mask)
        decoded_texts = [self.multitask_tokenizer.decode(ids, skip_special_tokens=True) for ids in input_ids]

        # Predict
        fear_outputs, anger_outputs = self.multitask_model.predict(dataset)

        # Process outputs for fear
        probabilities_fear = tf.nn.softmax(fear_outputs, axis=1).numpy()
        predictions_fear = tf.argmax(fear_outputs, axis=1).numpy()
        fear_label_mapping = self.get_label_mapping(self.fear_emotion_axis)
        predicted_labels_fear = [fear_label_mapping[label] for label in predictions_fear]

        # Process outputs for anger
        probabilities_anger = tf.nn.softmax(anger_outputs, axis=1).numpy()
        predictions_anger = tf.argmax(anger_outputs, axis=1).numpy()
        anger_label_mapping = self.get_label_mapping(self.anger_emotion_axis)
        predicted_labels_anger = [anger_label_mapping[label] for label in predictions_anger]

        # Check data consistency
        if not (len(texts) == len(decoded_texts) ==  len(predicted_labels_fear) == len(predicted_labels_anger) == len(probabilities_fear) == len(probabilities_anger)):
            raise ValueError("Mismatch in lengths of input lists.")

        # Pair each text with its predicted probabilities
        result = [
            {
                "text": text,
                "fear_label": label_fear,
                "anger_label": label_anger,
                **{f"fear_{key}": round(value, 3) for key, value in zip(fear_label_mapping.values(), prob_fear)},
                **{f"anger_{key}": round(value, 3) for key, value in zip(anger_label_mapping.values(), prob_anger)}
            }
            for text, label_fear, label_anger, prob_fear, prob_anger in zip(decoded_texts, predicted_labels_fear, predicted_labels_anger, probabilities_fear, probabilities_anger)
        ]

        return result

    def load_multitask_model(self):
        # Create and compile a model based on 'roberta-base' and return it.
        base_model = TFRobertaModel.from_pretrained('roberta-base')

        input_ids = tf.keras.layers.Input(shape=(self.max_token_length,), dtype=tf.int32, name="input_ids")
        attention_mask = tf.keras.layers.Input(shape=(self.max_token_length,), dtype=tf.int32, name="attention_mask")

        # Get the last hidden state (embeddings)
        embeddings = base_model(input_ids, attention_mask=attention_mask)[0]

        # Output for the fear set of labels
        classifier_fear = tf.keras.layers.Dense(6, name="fear")(embeddings[:,0,:])

        # Output for the anger set of labels
        classifier_anger = tf.keras.layers.Dense(6, name="anger")(embeddings[:,0,:])

        model = tf.keras.Model(inputs=[input_ids, attention_mask], outputs=[classifier_fear, classifier_anger])

        # Load the weights
        weights_path = "gs://emotion-model-checkpoints/fear-anger/multitask_checkpoint_02_20230930"
        model.load_weights(weights_path)

        return model

    def generate_text_gpt2_xla(self):
        # Generate text with GPT-2 XLA
        all_cleaned_outputs = [None] * len(self.text_list)
        total_batches = (len(self.text_list) + self.batch_size - 1) // self.batch_size

        # XLA-compatible generate function
        xla_generate = tf.function(self.model.generate, jit_compile=True)

        for batch_num, i in enumerate(range(0, len(self.text_list), self.batch_size), start=1):
            # print(f'Processing text generation batch {batch_num} of {total_batches}')

            batch_texts = self.text_list[i:i + self.batch_size]
            encoding = self.tokenizer(batch_texts, padding=True, truncation=True, return_tensors="tf", max_length=self.max_token_length)

            # XLA-optimized generation
            output_sequences = xla_generate(input_ids=encoding['input_ids'],
                                            attention_mask=encoding['attention_mask'],
                                            max_length=self.max_token_length * 2,
                                            num_return_sequences=1,
                                            temperature=float(self.temperature),
                                            do_sample=True,
                                            pad_token_id=self.tokenizer.eos_token_id)

            # Decode and clean outputs
            cleaned_outputs = []
            for input_text, output_ids in zip(batch_texts, output_sequences):
                output = self.tokenizer.decode(output_ids, skip_special_tokens=True)
                cleaned_output = output.replace(input_text, "").strip()
                cleaned_output = " ".join(cleaned_output.split()[:80])
                cleaned_outputs.append(cleaned_output)

            all_cleaned_outputs[i:i + self.batch_size] = cleaned_outputs

        return all_cleaned_outputs

In [None]:
model_name = 'gpt2'
max_token_length = 132
batch_size = 300 #value of 170 with V100
single_batch = True # Only runs one batch, not all batches of text

env = EmotionAnalysisEnv(model_name, max_token_length, batch_size, single_batch)

  and should_run_async(code)
All PyTorch model weights were used when initializing TFGPT2LMHeadModel.

All the weights of TFGPT2LMHeadModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaModel: ['lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'roberta.embeddings.position_ids', 'lm_head.layer_norm.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing TFRobertaModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model



In [None]:
import time

episodes = 5
for episode in range(1, episodes + 1):
    start_time = time.time()  # Record the start time

    obs = env.reset()
    done = False
    score = 0

    while not done:
        action = env.action_space.sample()
        obs, reward, done, truncated, info = env.step(action)
        score += reward

    end_time = time.time()  # Record the end time
    duration = end_time - start_time  # Calculate the duration

    print(f"Episode: {episode} - Score: {score} - Duration: {duration:.2f} seconds")

env.close()

In [None]:
model = DQN('MlpPolicy', env, verbose=1, device='cpu')

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


  and should_run_async(code)


In [None]:
model.learn(total_timesteps=100000)

  and should_run_async(code)
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20       |
|    ep_rew_mean      | -20      |
|    exploration_rate | 0.992    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 0        |
|    time_elapsed     | 1571     |
|    total_timesteps  | 80       |
----------------------------------


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


In [None]:
evaluate_policy(model, env, n_eval_episodes=10, render=False)