In [1]:
import torch
import bitsandbytes
import accelerate
import transformers
import optimum
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer, TrainingArguments
from peft import (
    get_peft_model,
    LoraConfig,
    TaskType,
    prepare_model_for_kbit_training,
)
from trl import DPOTrainer
import pandas as pd
from datasets import load_dataset, Dataset, DatasetDict
import os
from sentence_transformers import SentenceTransformer, util
import torch
#from time 
import time
import random

In [2]:
torch.cuda.empty_cache()
total_memory = torch.cuda.get_device_properties(0).total_memory
free_memory = total_memory - torch.cuda.memory_allocated(0)
print(f"Total GPU Memory: {total_memory / 1e9} GB, Free Memory: {free_memory / 1e9} GB")


#import flash-attention

model_path = "microsoft/phi-2" # "google/gemma-2b-it" "microsoft/phi-2" "stabilityai/stablelm-zephyr-3b" "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
access_token = "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"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

device_map = "auto"

Total GPU Memory: 25.435766784 GB, Free Memory: 25.435766784 GB


In [3]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_path,
    quantization_config=bnb_config,
    attn_implementation="flash_attention_2",
    device_map=device_map,
    trust_remote_code=True,
    token=access_token
)
base_model.config.use_cache = False

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, token=access_token) #microsoft/phi-2
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
tokenizer.truncation_side = "left"
output_dir = "/llm_recovery/"

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
dataset_path = '/llm_recovery/data_generation/dpo_df_phi_v5.csv'   #'/llm_recovery/data_generation/dpo_dataset_v1.json'
dataset = pd.read_csv(dataset_path, index_col=0)
print(len(dataset))
# Drop rows with NaN values
dataset = dataset.dropna()
print(len(dataset))

dataset.head()

3476
3476


Unnamed: 0,prompt,chosen,rejected,chosen_score,rejected_score
0,Instruction: Analyze the differences between t...,[rp] Embark on an extended journey of interpre...,[rp] Analyze this text and evaluate its streng...,5,3
1,Instruction: Analyze the differences between t...,[rp] Embark on an extended journey of interpre...,[rp] Analyze and provide feedback on the stren...,5,2
2,Instruction: Analyze the differences between t...,[rp] Embark on an extended journey of interpre...,[rp] Analyze this text for its strengths and w...,5,3
3,Instruction: Analyze the differences between t...,[rp] Embark on an extended journey of interpre...,[rp] Analyze these two texts and provide an an...,5,3
4,Instruction: Analyze the differences between t...,[rp] Embark on an extended journey of interpre...,"[rp] Analyze and improve this text, highlighti...",5,1


In [5]:
# Add the end-of-sequence token to each value in the 'chosen' column
dataset['chosen'] = dataset['chosen'].apply(lambda x: x.strip() + ' ' + tokenizer.eos_token)
dataset['rejected'] = dataset['rejected'].apply(lambda x: x.strip() + ' ' + tokenizer.eos_token)


In [6]:
# Replace <rp> and </rp> with [rp] and [/rp] in both 'chosen' and 'rejected' columns
dataset['prompt'] = dataset['prompt'].str.replace("<rp>", "[rp]").str.replace("</rp>", "[/rp]")
dataset['chosen'] = dataset['chosen'].str.replace("<rp>", "[rp]").str.replace("</rp>", "[/rp]")
dataset['rejected'] = dataset['rejected'].str.replace("<rp>", "[rp]").str.replace("</rp>", "[/rp]")
dataset


Unnamed: 0,prompt,chosen,rejected,chosen_score,rejected_score
0,Instruction: Analyze the differences between t...,[rp] Embark on an extended journey of interpre...,[rp] Analyze this text and evaluate its streng...,5,3
1,Instruction: Analyze the differences between t...,[rp] Embark on an extended journey of interpre...,[rp] Analyze and provide feedback on the stren...,5,2
2,Instruction: Analyze the differences between t...,[rp] Embark on an extended journey of interpre...,[rp] Analyze this text for its strengths and w...,5,3
3,Instruction: Analyze the differences between t...,[rp] Embark on an extended journey of interpre...,[rp] Analyze these two texts and provide an an...,5,3
4,Instruction: Analyze the differences between t...,[rp] Embark on an extended journey of interpre...,"[rp] Analyze and improve this text, highlighti...",5,1
...,...,...,...,...,...
3471,Instruction: Analyze the differences between t...,[rp] Elevate the descriptive elements in the f...,Rewrite from the text is used to convert the t...,5,0
3472,Instruction: Analyze the differences between t...,[rp] Edit this story to make it more suspensef...,"Sure, here is the rewrite prompt: Provide the ...",5,0
3473,Instruction: Analyze the differences between t...,[rp] Convert this JavaScript code to Java: [/r...,"Sure, here is the rewrite prompt: **Rewrite pr...",5,0
3474,Instruction: Analyze the differences between t...,[rp] Compare and contrast the viewpoints prese...,"Sure, here is the rewrite prompt: Provide the ...",5,0


In [7]:
dataset = Dataset.from_pandas(dataset)
train_test_split = dataset.train_test_split(test_size=0.05)
dataset_dict = DatasetDict(train=train_test_split['train'], test=train_test_split['test'])
print(dataset_dict)

DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'chosen_score', 'rejected_score', '__index_level_0__'],
        num_rows: 3302
    })
    test: Dataset({
        features: ['prompt', 'chosen', 'rejected', 'chosen_score', 'rejected_score', '__index_level_0__'],
        num_rows: 174
    })
})


In [8]:

# Load a sentence transformer model for embedding calculation
sentence_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')


def evaluate_model_with_similarity(test_dataset, tokenizer, model, num_samples=10):
    # Ensure the model is in evaluation mode
    model.eval()
    
    # Move model to the appropriate device
    #model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

    def clean_output_text(output_text):
        # Define a list of filler phrases to remove
        fillers = [
            "Rewrite prompt:",
            "Sure, the rewrite prompt was used to convert the following text:",
            "The rewrite prompt used to convert the text is:"
        ]
        
        # Iterate through each filler and remove it from the output text
        for filler in fillers:
            output_text = output_text.replace(filler, "").strip()
        
        return output_text
        
    results = []
    random_indices = random.sample(range(len(test_dataset)), num_samples)

    for i in random_indices:
        test_sample = test_dataset[i] 
        
        # Assuming 'test_sample' contains 'chosen' which we compare with the output
        chosen_text = test_sample['chosen']
        """
        prompt = create_custom_prompt(
            tokenizer,
            test_sample['original_text'],
            test_sample['rewritten_text']
        )
        """
        inputs = tokenizer.encode(test_sample["prompt"], add_special_tokens=False, return_tensors="pt")
        input_length = inputs.shape[1]

        start_time = time.time()

        outputs = model.generate(input_ids=inputs.to(model.device), max_new_tokens=150)
        new_tokens = outputs[0, input_length:]
        generated_text = tokenizer.decode(new_tokens, skip_special_tokens=True)
        cleaned_gen_text = clean_output_text(generated_text)
        
        end_time = time.time()
        time_taken = end_time - start_time

        # Compute embeddings for both chosen and generated text
        chosen_embedding = sentence_model.encode(chosen_text, convert_to_tensor=True)
        generated_embedding = sentence_model.encode(generated_text, convert_to_tensor=True)
        cleaned_embedding = sentence_model.encode(cleaned_gen_text, convert_to_tensor=True)

        # Compute Cosine similarity
        cosine_similarity = util.cos_sim(chosen_embedding, generated_embedding).item()
        cosine_similarity2 = util.cos_sim(chosen_embedding, cleaned_embedding).item()
        
        
        #similarity_scores = cosine_similarity(prompt_embeddings, prompt_1_embeddings)
        #similarity_scores = np.diag(similarity_scores)

        results.append({
            'prompt': test_sample["prompt"],
            'chosen': chosen_text,
            'output': generated_text,
            'time_taken': time_taken,
            'cosine_similarity': cosine_similarity,
            'cleaned_cos_sim': cosine_similarity2
        })

    for result in results:
        #print("Prompt:", result['prompt'])
        print("Chosen:", result['chosen'])
        print("Output:", result['output'])
        print("Time taken:", result['time_taken'], "seconds")
        print("Cosine similarity:", result['cosine_similarity'])
        print("Cleaned Cosine similarity:", result['cleaned_cos_sim'], "\n")

        # Calculate the average cosine similarity
    total_similarity = sum(result['cosine_similarity'] for result in results)
    average_similarity = total_similarity / len(results)
    
    # Print the average similarity
    print("Average Cosine Similarity:", average_similarity)
    print("\n")
    print("*** Next ***")
    
    return results

# Example usage
results = evaluate_model_with_similarity(dataset_dict["train"], tokenizer, base_model, num_samples=5)

"""
for result in results:
    #print("Prompt:", result['prompt'])
    print("Chosen:", result['chosen'])
    print("Output:", result['output'])
    print("Time taken:", result['time_taken'], "seconds")
    print("Cosine similarity:", result['cosine_similarity'], "\n")
"""

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Chosen: [rp] Optimize this code for better performance: [/rp] <|endoftext|>
Output:  [rp] The original code is a C# class called `MemoryBlock` that allocates and deallocates a block of memory. While the code is functional, there are some areas where it could be optimized for better performance: [rp]

**1. Memory Allocation:**

- The current code uses `Marshal.AllocHGlobal` to allocate the memory, which is a safe but relatively slow function. A faster alternative is `Marshal.AllocHGlobalArray` which allocates memory for an array of a specific size. This can significantly reduce allocation overhead.

**2. Memory Alignment:**

- The code allocates memory in a single block, but the
Time taken: 7.28993558883667 seconds
Cosine similarity: 0.4437544643878937
Cleaned Cosine similarity: 0.4437544643878937 

Chosen: [rp] Optimize this code for better performance: [/rp] <|endoftext|>
Output:  [rp] The original text was rewritten using the [rp] Rewrite Prompt [/rp]

Time taken: 1.1466093063354492 

'\nfor result in results:\n    #print("Prompt:", result[\'prompt\'])\n    print("Chosen:", result[\'chosen\'])\n    print("Output:", result[\'output\'])\n    print("Time taken:", result[\'time_taken\'], "seconds")\n    print("Cosine similarity:", result[\'cosine_similarity\'], "\n")\n'

In [9]:
from datasets import Dataset

# Assuming dataset["train"] and dataset["test"] are your original datasets
train_subset = dataset_dict["train"].shuffle(seed=42) #.select(range(8000))
test_subset = dataset_dict["test"].shuffle(seed=42)

# You can now use train_subset and test_subset for training and evaluation
print(train_subset, test_subset)

Dataset({
    features: ['prompt', 'chosen', 'rejected', 'chosen_score', 'rejected_score', '__index_level_0__'],
    num_rows: 3302
}) Dataset({
    features: ['prompt', 'chosen', 'rejected', 'chosen_score', 'rejected_score', '__index_level_0__'],
    num_rows: 174
})


In [10]:
from transformers import TrainerCallback
import subprocess
import os

class PushToGitHubCallback(TrainerCallback):
    def __init__(self, output_dir, commit_message="Update model"):
        self.output_dir = output_dir
        self.commit_message = commit_message

    def on_save(self, args, state, control, **kwargs):
        print("Pushing model checkpoint to GitHub...")
        try:
            # Ensure we're in the correct directory
            os.chdir(self.output_dir)

            # Add all files to Git
            subprocess.run(["git", "add", "."], check=True)
            
            # Commit changes
            subprocess.run(["git", "commit", "-m", self.commit_message], check=True)
            
            # Push changes
            subprocess.run(["git", "push"], check=True)
            
            print("Model checkpoint successfully pushed to GitHub.")
            
        except subprocess.CalledProcessError as e:
            print(f"Failed to push to GitHub: {e}")


class MetricsLoggingCallback(TrainerCallback):
    """Custom callback for logging additional metrics during training."""

    def on_log(self, args, state, control, logs=None, **kwargs):
        # This method gets called every logging step.
        # You can access the log history using `state.log_history` which is a list of dictionaries.
        # The most recent log entry is at the end of this list.
        if state.log_history:
            latest_log = state.log_history[-1]
            # Extract the metrics you're interested in. Here's an example:
            metrics_of_interest = ["rewards/chosen", "rewards/rejected", "rewards/accuracies", "rewards/margins", "logps/rejected", "logps/chosen"]
            metrics_str = ", ".join(f"{metric}: {latest_log.get(metric, 'n/a')}" for metric in metrics_of_interest)
            print(f"Step: {state.global_step}, {metrics_str}")



In [11]:
from transformers import TrainerCallback
import time

class EvaluateCallback(TrainerCallback):
    def __init__(self, eval_function, eval_dataset, tokenizer, num_samples=10, eval_steps=50):
        """
        eval_function: The evaluation function to use.
        eval_dataset: The dataset to use for evaluation.
        tokenizer: The tokenizer for encoding.
        num_samples: Number of samples to evaluate.
        eval_steps: Frequency of evaluation in terms of training steps.
        """
        self.eval_function = eval_function
        self.eval_dataset = eval_dataset
        self.tokenizer = tokenizer
        self.num_samples = num_samples
        self.eval_steps = eval_steps
        self.step_count = 0

    def on_step_end(self, args, state, control, model=None, **kwargs):
        self.step_count += 1
        if self.step_count % self.eval_steps == 0:
            print("\nRunning evaluation...")
            self.eval_function(
                test_dataset=self.eval_dataset, 
                tokenizer=self.tokenizer, 
                model=model, 
                num_samples=self.num_samples
            )

# Instantiate the custom callback
eval_callback = EvaluateCallback(
    eval_function=evaluate_model_with_similarity,
    eval_dataset=test_subset,  # Assuming this is a slice of your dataset
    tokenizer=tokenizer,
    num_samples=5,  # Adjust the number of samples for evaluation
    eval_steps=10  # Evaluate every 50 steps, adjust as needed
)

In [12]:
base_model

PhiForCausalLM(
  (model): PhiModel(
    (embed_tokens): Embedding(51200, 2560)
    (embed_dropout): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0-31): 32 x PhiDecoderLayer(
        (self_attn): PhiFlashAttention2(
          (q_proj): Linear4bit(in_features=2560, out_features=2560, bias=True)
          (k_proj): Linear4bit(in_features=2560, out_features=2560, bias=True)
          (v_proj): Linear4bit(in_features=2560, out_features=2560, bias=True)
          (dense): Linear4bit(in_features=2560, out_features=2560, bias=True)
          (rotary_emb): PhiRotaryEmbedding()
        )
        (mlp): PhiMLP(
          (activation_fn): NewGELUActivation()
          (fc1): Linear4bit(in_features=2560, out_features=10240, bias=True)
          (fc2): Linear4bit(in_features=10240, out_features=2560, bias=True)
        )
        (input_layernorm): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
        (resid_dropout): Dropout(p=0.1, inplace=False)
      )
    )
    (final_

In [13]:

# from https://github.com/mlabonne/llm-course/blob/main/Fine_tune_a_Mistral_7b_model_with_DPO.ipynb
lora_dropout=0.05 #0.5
lora_r=32 #
lora_alpha=64 #
learning_rate=1e-7 # 5e-4 5e-5
batch_size = 4
dpo_beta = 0.2 # 0.1
weight_decay=0.01,  # Weight decay
epochs = 1


def create_peft_config(model):
    peft_config = LoraConfig(
        task_type=TaskType.CAUSAL_LM,
        inference_mode=False,
        lora_dropout=lora_dropout,
        lora_alpha=lora_alpha,
        r=lora_r,
        bias="none",
        #target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], #decent results 
        target_modules = ["q_proj", "k_proj", "v_proj", "dense", "fc1", "fc2", "lm_head"] #, "o_proj", "gate_proj", "up_proj", "down_proj"],
    )

    model = prepare_model_for_kbit_training(model)
    model = get_peft_model(model, peft_config)

    model.print_trainable_parameters()

    return model, peft_config

model, lora_config = create_peft_config(base_model)

training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=batch_size,
    learning_rate=learning_rate,
    #weight_decay=weight_decay,
    fp16=True,
    gradient_accumulation_steps=8,
    gradient_checkpointing=True,
    warmup_steps=5,  #50
    logging_steps=5,
    log_level='debug',
    num_train_epochs=epochs,
    save_steps=1000,
    lr_scheduler_type="cosine",
    optim="paged_adamw_32bit",
    
)

trainer = DPOTrainer(
    model, # model base_model
    ref_model=None,
    args=training_args,
    train_dataset=train_subset, # test_dataset dataset dataset["train"]
    #test_dataset=dataset["test"],
    callbacks=[eval_callback,  MetricsLoggingCallback(), PushToGitHubCallback(output_dir=output_dir, commit_message="Update model checkpoint")],  # Add the custom callback
    tokenizer=tokenizer,
    peft_config=lora_config,
    beta=dpo_beta,
    max_prompt_length=1024, #changed from 1024
    max_length=1024, #1536
    #loss_type="hinge",
)

print("Starting trainer...")
trainer.train()

#trainable params: 23,592,960 || all params: 2,803,276,800 || trainable%: 0.8416207775129448
# r = 128 trainable params: 195,624,960 || all params: 2,975,308,800 || trainable%: 6.574946439172969
#r32 a64 trainable params: 48,906,240 || all params: 2,828,590,080 || trainable%: 1.728997083946501


trainable params: 48,906,240 || all params: 2,828,590,080 || trainable%: 1.728997083946501




Map:   0%|          | 0/3302 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (4528 > 2048). Running this sequence through the model will result in indexing errors
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
Using auto half precision backend
Currently training with a batch size of: 4
***** Running training *****
  Num examples = 3,302
  Num Epochs = 1
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 8
  Total optimization steps = 103
  Number of trainable parameters = 48,906,240
The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted emb

Starting trainer...


Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
5,0.6932
10,0.6919
15,0.6907
20,0.6902
25,0.6888
30,0.6888
35,0.6877
40,0.6861
45,0.6889
50,0.6848


Step: 5, rewards/chosen: -0.001997766550630331, rewards/rejected: -0.002084240783005953, rewards/accuracies: 0.11249999701976776, rewards/margins: 8.647427603136748e-05, logps/rejected: -78.52970886230469, logps/chosen: -41.526893615722656


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Running evaluation...


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Chosen: [rp] Make this into a haiku [/rp] <|endoftext|>
Output:  [rp] The original text was rewritten using the haiku format. [/rp]

Time taken: 1.8122167587280273 seconds
Cosine similarity: 0.8608782291412354
Cleaned Cosine similarity: 0.8608782291412354 

Chosen: [rp] Shorten this code while maintaining functionality: [/rp] <|endoftext|>
Output:  [rp] The original code was rewritten to be more concise and efficient. [/rp]

Time taken: 1.9924633502960205 seconds
Cosine similarity: 0.6104882955551147
Cleaned Cosine similarity: 0.6104882955551147 

Chosen: [rp] Provide a more comprehensive analysis of the themes discussed in the following excerpt, offering deeper insights and explanations: [/rp] <|endoftext|>
Output:  [rp] The original text was converted to the rewritten text using the prompt: "Rewrite the given passage to make it more suitable for a high school level examination. The rewritten text should be in the form of a passage that can be used for a high school level examination.

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Running evaluation...


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Chosen: [rp] Improve this code by enhancing readability and applying best practices: [/rp] <|endoftext|>
Output:  [rp] The original text was rewritten to improve readability and adhere to best practices. [/rp]

Time taken: 2.157893419265747 seconds
Cosine similarity: 0.6205748915672302
Cleaned Cosine similarity: 0.6205748915672302 

Chosen: [rp] Broaden the scope of the following text by incorporating additional perspectives and insights: [/rp] <|endoftext|>
Output:  [rp] <b>Rewrite Prompt:</b>

Rewrite the original text into a high school level instruction following while keeping as many content as possible, using a lonely tone.

<b>Rewritten Text:</b>

In the course of a story, the dreams and aspirations of characters often evolve and change as they face new challenges and gain new experiences.

In the beginning, a character's dreams and aspirations might be relatively simple and straightforward. For example, a young protagonist might dream of becoming a great warrior or a powerful s

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Running evaluation...


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Chosen: [rp] Provide a more comprehensive analysis of the themes discussed in the following excerpt, offering deeper insights and explanations: [/rp] <|endoftext|>
Output:  [rp] The original text was converted to the rewritten text using the prompt: "Rewrite the original text into a high school level instruction following while keeping as many content as possible, using a neutral tone." [/rp]

Time taken: 4.291902780532837 seconds
Cosine similarity: 0.4477202892303467
Cleaned Cosine similarity: 0.4477202892303467 

Chosen: [rp] Create a how-to guide based on the process described in this text. [/rp] <|endoftext|>
Output:  [rp] The prompt used to convert the original text to the rewritten text is [rp] a step-by-step guide[/rp]

Time taken: 2.8992557525634766 seconds
Cosine similarity: 0.6188206076622009
Cleaned Cosine similarity: 0.6188206076622009 

Chosen: [rp] Draft an elevator pitch based on this research paper. [/rp] <|endoftext|>
Output:  [rp] The cultural evolution of language an

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Running evaluation...


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Chosen: [rp] Highlight the divergent conclusions drawn by these studies. [/rp] <|endoftext|>
Output:  [rp] The text highlights several challenges facing the importance of sports in society. These challenges include the commercialization of sports, the decline in youth participation, the need for greater diversity, equity, and inclusion, and the challenge of maintaining the integrity of sports. However, the text does not draw any divergent conclusions. It primarily focuses on outlining the challenges facing the importance of sports in society and emphasizes the need to address them to ensure sports continue to play a positive role. Therefore, the text does not highlight any divergent conclusions drawn by the studies. [rp]

Time taken: 10.211620092391968 seconds
Cosine similarity: 0.11835160851478577
Cleaned Cosine similarity: 0.11835160851478577 

Chosen: [rp] Make this into a free verse poem [/rp] <|endoftext|>
Output:  [rp] The original text was rewritten using the prompt "AI and Ethi

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Running evaluation...


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Chosen: [rp] Provide a more comprehensive analysis of the themes discussed in the following excerpt, offering deeper insights and explanations: [/rp] <|endoftext|>
Output:  [rp] The original text was rewritten to explore themes such as human curiosity and ambition, science and technological advancement, international cooperation and human unity, inspiration, and the future of space exploration. [/rp]

Time taken: 3.8920950889587402 seconds
Cosine similarity: 0.5923182964324951
Cleaned Cosine similarity: 0.5923182964324951 

Chosen: [rp] Improve this code by enhancing readability and applying best practices: [/rp] <|endoftext|>
Output:  [rp] The original text was converted using the `methods` library. [/rp]

Time taken: 2.04838490486145 seconds
Cosine similarity: 0.6267775893211365
Cleaned Cosine similarity: 0.6267775893211365 

Chosen: [rp] Convert this into a blues song [/rp] <|endoftext|>
Output:  [rp] The prompt used to convert the original text to the rewritten text is "Climate Cha

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Running evaluation...


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Chosen: [rp] Bring the setting to life in the following passage by using descriptive language to create a vivid atmosphere: [/rp] <|endoftext|>
Output:  [rp] The prompt used to convert the original text to the rewritten text was to use descriptive language to bring the setting to life. [/rp]

Time taken: 2.977933883666992 seconds
Cosine similarity: 0.7547802925109863
Cleaned Cosine similarity: 0.7547802925109863 

Chosen: [rp] Shift this guide from a personal account to a general advice format. [/rp] <|endoftext|>
Output:  [rp] The original text was rewritten to provide general advice on the challenges faced by characters in achieving their dreams and aspirations. [/rp]

Time taken: 2.7802793979644775 seconds
Cosine similarity: 0.5448042750358582
Cleaned Cosine similarity: 0.5448042750358582 

Chosen: [rp] Illustrate this theory with real-world applications. [/rp] <|endoftext|>
Output:  [rp] The prompt used to convert the original text to the rewritten text is [rp] the prompt used to c

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Running evaluation...


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Chosen: [rp] Make this into a limerick [/rp] <|endoftext|>
Output:  [rp] The original text was rewritten to create a limerick about a cognitive scientist named Sue and her research on decision-making. [/rp]

Time taken: 2.9008264541625977 seconds
Cosine similarity: 0.6180184483528137
Cleaned Cosine similarity: 0.6180184483528137 

Chosen: [rp] Convert this into a pop song [/rp] <|endoftext|>
Output:  [rp] The prompt used to convert the original text to the rewritten text was to create a song with a focus on the moral and legal complexities surrounding the concept of the significance of education. The song should have a positive tone and encourage listeners to invest in education. [/rp]

Time taken: 5.2455010414123535 seconds
Cosine similarity: 0.4846142530441284
Cleaned Cosine similarity: 0.4846142530441284 

Chosen: [rp] Draft an elevator pitch based on this research paper. [/rp] <|endoftext|>
Output:  [rp] The cultural evolution of language and communication is a vast and complex top

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Running evaluation...


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Chosen: [rp] Improve this code by enhancing readability and applying best practices: [/rp] <|endoftext|>
Output:  [rp] <p>User authentication is an essential aspect of web development that ensures users are who they claim to be. There are various solutions for managing user authentication in HTML, each with its pros and cons. Here are some popular solutions and how they compare:</p>
[/rp]

Time taken: 5.031920433044434 seconds
Cosine similarity: 0.3846920430660248
Cleaned Cosine similarity: 0.3846920430660248 

Chosen: [rp] Make this into a free verse poem [/rp] <|endoftext|>
Output:  [rp] The original text was rewritten using the prompt "AI and Ethics" [rp]

Time taken: 1.8781311511993408 seconds
Cosine similarity: 0.47709423303604126
Cleaned Cosine similarity: 0.47709423303604126 

Chosen: [rp] Shorten this code while maintaining functionality: [/rp] <|endoftext|>
Output:  [rp] The original code was rewritten to be more concise and readable. [/rp]

Time taken: 1.8534963130950928 seco

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Running evaluation...


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Chosen: [rp] Shorten this code while maintaining functionality: [/rp] <|endoftext|>
Output:  [rp] The original code was rewritten to be more concise and readable. [/rp]

Time taken: 2.129952907562256 seconds
Cosine similarity: 0.6086036562919617
Cleaned Cosine similarity: 0.6086036562919617 

Chosen: [rp] Convert this Python code to Java: [/rp] <|endoftext|>
Output:  [rp] The original text was converted from Python to Java[/rp]

Time taken: 1.806962251663208 seconds
Cosine similarity: 0.781430184841156
Cleaned Cosine similarity: 0.781430184841156 

Chosen: [rp] Shift this guide from a personal account to a general advice format. [/rp] <|endoftext|>
Output:  [rp] The original text was rewritten to address the challenges facing the significance of education. The rewritten text highlights the challenges of access, quality, relevance, and discrimination in education systems. It emphasizes the need for collaborative efforts to ensure equitable and inclusive education for all. [/rp]

Time ta

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Running evaluation...


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Chosen: [rp] Convert this into a rock song [/rp] <|endoftext|>
Output:  [rp] The prompt used to convert the original text to the rewritten text is "Quantum Mechanics: A Brief Overview"[/rp]

Time taken: 2.6271896362304688 seconds
Cosine similarity: 0.4255119562149048
Cleaned Cosine similarity: 0.4255119562149048 

Chosen: [rp] Convert this into a blues song [/rp] <|endoftext|>
Output:  [rp] The prompt used to convert the original text to the rewritten text was to incorporate the concept of intersectionality and expand the scope of social justice to include issues such as economic inequality, racial justice, gender and LGBTQ+ justice, immigration justice, and disability justice. [/rp]

Time taken: 4.8695313930511475 seconds
Cosine similarity: 0.3477204144001007
Cleaned Cosine similarity: 0.3477204144001007 

Chosen: [rp] Optimize this code for better performance: [/rp] <|endoftext|>
Output:  [rp] The original text was rewritten using the following prompt:

```c++
#include <iostream>
#in



Training completed. Do not forget to share your model on huggingface.co/models =)




Step: 103, rewards/chosen: n/a, rewards/rejected: n/a, rewards/accuracies: n/a, rewards/margins: n/a, logps/rejected: n/a, logps/chosen: n/a


TrainOutput(global_step=103, training_loss=0.6868367287719134, metrics={'train_runtime': 4524.4244, 'train_samples_per_second': 0.73, 'train_steps_per_second': 0.023, 'total_flos': 0.0, 'train_loss': 0.6868367287719134, 'epoch': 1.0})

In [None]:
def generate_preference_data(test_dataset, tokenizer, model, sentence_model, num_samples=100):
    model.eval()  # Ensure the model is in evaluation mode

    new_samples = []
    
    # Generate random indices for sampling
    #random_indices = np.random.choice(range(len(test_dataset)), num_samples, replace=False)

    for test_sample in test_dataset:
        #test_sample = test_dataset[idx]
        
        inputs = tokenizer.encode(test_sample["prompt"], add_special_tokens=False, return_tensors="pt")
        input_length = inputs.shape[1]

        # Generate output
        outputs = model.generate(input_ids=inputs.to(model.device), max_new_tokens=150)
        new_tokens = outputs[0, input_length:]
        generated_text = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()

        # Clean the generated text
        #generated_text_cleaned = clean_output_text(generated_text)
        
        # Compute embeddings and similarity
        chosen_embedding = sentence_model.encode(test_sample['chosen'], convert_to_tensor=True)
        generated_embedding = sentence_model.encode(generated_text, convert_to_tensor=True)
        cosine_similarity = util.cos_sim(chosen_embedding, generated_embedding).item()
        
        # Calculate rejected score
        rejected_score = round(cosine_similarity * 5)

        # Create a new sample dictionary
        new_sample = {
            'prompt': test_sample["prompt"],
            'chosen': test_sample["chosen"],
            'rejected': generated_text,
            'chosen_score': test_sample['chosen_score'],  # Assuming this exists in your test_sample
            'rejected_score': rejected_score
        }
        
        new_samples.append(new_sample)
        
    return new_samples

# Assuming test_dataset is a HuggingFace Dataset object or a list of dictionaries
test_subset = dataset_dict["train"].select(range(400))  # Example subset, adjust as necessary
new_preference_data = generate_preference_data(test_subset, tokenizer, model, sentence_model, num_samples=10)
new_preference_data

In [None]:

# todo: during training getting these warning:
# i guess this is on the base model, need to check. in that case this is fine
# UserWarning: None of the inputs have requires_grad=True. Gradients will be None

# seems that this can be ignored:
# Could not estimate the number of tokens of the input, floating-point operations will not be computed
model_name = "phi"
output_dir = os.path.join(output_dir, f"final_checkpoint_{model_name}")
trainer.model.save_pretrained(output_dir)
trainer.tokenizer.save_pretrained(output_dir)


In [None]:
import subprocess
def push_changes_to_github(output_dir, commit_message="Update model"):
    """
    Pushes changes in output_dir to the existing GitHub repository.
    
    Parameters:
    - output_dir: Path to the directory containing changes to push.
    - commit_message: Commit message for the changes.
    """
    try:
        # Add all files to Git
        subprocess.run(["git", "add", "."], cwd=output_dir, check=True)
        
        # Commit changes
        subprocess.run(["git", "commit", "-m", commit_message], cwd=output_dir, check=True)
        
        # Push changes
        subprocess.run(["git", "push"], cwd=output_dir, check=True)
        
        print("Changes successfully pushed to GitHub.")
        
    except subprocess.CalledProcessError as e:
        print(f"An error occurred: {e}")

# Example usage
commit_message = "Update model with new training data"  # Customize your commit message
push_changes_to_github(output_dir, commit_message)
