## INSTALLATIONS AND MOUNTING GOOGLE DRIVE

In [None]:
import torch

# Install profanity check
!pip install alt-profanity-check

# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.26" trl peft accelerate bitsandbytes

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# DATASET

# PREPROCESS DATASET

Do only once, if required.

In [None]:
import pandas as pd
import os

# Preprocess DPO Training Data - Remove EOS tokens from jokes
dataset_directory = '/content/drive/MyDrive/JOKEGPT_FOLDER/Datasets/DPO_Dataset'

dataset_files = os.listdir(dataset_directory)

for dataset_file in dataset_files:

  # Load File to preprocess
  dpo_dataset = pd.read_csv(os.path.join("/content/drive/MyDrive/JOKEGPT_FOLDER/Datasets/DPO_Dataset", dataset_file))
  print(f"Loaded {dataset_file}")

  # Remove end of sequence token from each joke
  for i in range(dpo_dataset.shape[0]):
    if "<eos>" in dpo_dataset['losing_joke'].iloc[i]:
      dpo_dataset['losing_joke'].iloc[i] = dpo_dataset['losing_joke'].iloc[i].split("<eos>")[0]
    if "</s>" in dpo_dataset['losing_joke'].iloc[i]:
      dpo_dataset['losing_joke'].iloc[i] = dpo_dataset['losing_joke'].iloc[i].split("</s>")[0]
    if "<|end_of_text|>" in dpo_dataset['losing_joke'].iloc[i]:
      dpo_dataset['losing_joke'].iloc[i] = dpo_dataset['losing_joke'].iloc[i].split("<|end_of_text|>")[0]

  print("Preprocessed jokes:\n", dpo_dataset['losing_joke'][:4])

  # Save to new directory
  output_dir = '/content/drive/MyDrive/JOKEGPT_FOLDER/Datasets/DPO_Dataset/After_preprocessing'

  dpo_dataset.to_csv(os.path.join(output_dir, dataset_file), index=False)
  print(f"Saved {dataset_file} to {output_dir}")

# LOAD PREPROCESSED DATASET

In [None]:
import pandas as pd
from datasets import Dataset

# Preprocess DPO training data

# Map the dataset to the desired format.
def return_prompt_and_responses(samples):
  return {
    "prompt": samples["prompt"],
    "chosen": samples["winning_joke"],
    "rejected": samples["losing_joke"],
 }

# Choose the dataset to train
dpo_dataset = pd.read_csv("/content/drive/MyDrive/JOKEGPT_FOLDER/Datasets/DPO_Dataset/After_preprocessing/results_dpo_llama3_04-55-37.csv")
column_names = ['winning_joke', 'losing_joke']

dpo_dataset_hf = Dataset.from_pandas(dpo_dataset)

dpo_dataset = dpo_dataset_hf.map(
  return_prompt_and_responses,
  batched=True,
  remove_columns = column_names,
)
dpo_dataset, dpo_dataset[0]

# DPO TRAINING

In [None]:
# Patch DPO Trainer
from unsloth import PatchDPOTrainer
PatchDPOTrainer()

from transformers import TrainingArguments
from trl import DPOTrainer
from google.colab import drive
from numba import cuda
from unsloth import FastLanguageModel

# Choose model to train
model_path = "/content/drive/MyDrive/JOKEGPT_FOLDER/Models/llama3"

# Load model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
        model_path,
        max_seq_length = 2048,
        dtype = None,
        load_in_4bit = True
    )

# Set hyper-parameters for DPO Training
dpo_trainer = DPOTrainer(
    model = model,
    ref_model = None,
    args = TrainingArguments(
        per_device_train_batch_size = 5,
        gradient_accumulation_steps = 10,
        warmup_ratio = 0.1,
        num_train_epochs = 2,
        learning_rate = 2e-7,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        # weight_decay = 1e-3,
        # max_grad_norm = 3,
        lr_scheduler_type = "linear",
        seed = 42,
        output_dir = "outputs",
    ),
    beta = 0.1,
    train_dataset = dpo_dataset,
    tokenizer = tokenizer,
    max_length = 1024,
    max_prompt_length = 512,
)

# Training
dpo_trainer.train()

**SAVE** **MODEL**

In [None]:
import os
from datetime import datetime

save_to_dir = "/content/drive/MyDrive/JOKEGPT_FOLDER/Models/After_DPO"

# Add timestamp to the directory path
# timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
save_to_dir = os.path.join(save_to_dir, f"dpo_llama3")

# Save the model and tokenizer with the timestamp included
model.save_pretrained(save_to_dir)
tokenizer.save_pretrained(save_to_dir)

# INFERENCE

In [None]:
from numba import cuda
from unsloth import FastLanguageModel

# Choose model to perform inference
model_path = "/content/drive/MyDrive/JOKEGPT_FOLDER/Models/After_DPO/dpo_llama2"

dpo_fine_model, dpo_fine_tokenizer = FastLanguageModel.from_pretrained(
        model_path,
        max_seq_length = 2048,
        dtype = None,
        load_in_4bit = True
    )

In [None]:
import pandas as pd
from profanity_check import predict, predict_prob

# Load test dataset
test_dataset = pd.read_csv("/content/drive/MyDrive/JOKEGPT_FOLDER/Datasets/jokes_test_dataset.csv", header=None)

test_dataset = test_dataset.rename(columns={0: 'joke', 1: 'prompt'})
prompts = [element for element in test_dataset['prompt'].values]

FastLanguageModel.for_inference(dpo_fine_model)
joke_prompt ="""### Prompt:{} ### Joke:{}"""

# Generate results for each prompt
dpo_fineTunedJokes = []
for prompt in prompts:
  inputs = dpo_fine_tokenizer(
          [
              joke_prompt.format(
                  prompt,
                  "",
              )
          ], return_tensors = "pt").to("cuda")
  outputs = dpo_fine_model.generate(**inputs, max_new_tokens = 200, use_cache = True, do_sample=True)

  # Regenerate results if the joke is profane for upto 3 attempts
  appropriate_answer = False
  attempt_counter = 0
  while not appropriate_answer:
    # Generate output
    outputs = dpo_fine_model.generate(**inputs, max_new_tokens=64, use_cache=True, do_sample=True)
    # Get response
    res = dpo_fine_tokenizer.batch_decode(outputs)
    if res is None or len(res) == 0:
        print("res is empty: trying again")
        continue

    # Check that answer is appropriate
    profane_score = predict_prob(res)
    if profane_score < 0.5 or attempt_counter >= 2:
        # If so, add to list and exit loop; otherwise, generate another joke
        # Extract joke only from text
        res = res[0].split("### Joke:")[1]

        dpo_fineTunedJokes.append(res)
        appropriate_answer = True
    else:
        print(f"Too profane! Trying again.\nScore: {profane_score}, Response: {res}")
        attempt_counter += 1


In [None]:
# Remove EOS token from outputs
test_dataset['dpo_joke'] = [s.replace('</s>', '') for s in dpo_fineTunedJokes]

In [None]:
# Save to result folder
test_dataset.to_csv('/content/drive/MyDrive/JOKEGPT_FOLDER/Results/SFT_DPO/llama3.csv', index=False)