# ===========================================================

## Installs and imports

In [1]:
# %pip install -q -U transformers trl peft bitsandbytes openai numpy==1.26.4 pandas==2.2.2 torch==2.4.0
# %pip install -q -U transformers==4.44.0 trl==0.9.6 peft==0.12.0 bitsandbytes openai numpy==1.26.4 pandas==2.2.2 torch==2.4.0

In [2]:
# %pip install -qqq flash-attn

In [9]:
from openai import OpenAI
from system_prompts_builder import generate_all_permutations, CounselorPersonality
from questionnaires import get_prompt_eval_questionnaire, get_prompt_eval_questionnaire_partial_conv
import textwrap
import json
import os
import tqdm 
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, PeftModel
from trl import setup_chat_format
import pandas as pd
import numpy as np
import re
import pickle

# print versions of the above libraries
print("numpy version:", np.__version__)
print("pandas version:", pd.__version__)
print("torch version:", torch.__version__)
# import peft
# import trl 
# import transformers
# print("peft version:", peft.__version__)
# print("trl version:", trl.__version__)
# print("transformers version:", transformers.__version__)



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
print("cuda version:", torch.version.cuda)

numpy version: 1.26.4
pandas version: 2.2.2
torch version: 2.4.0+cu118
Using device: cuda
cuda version: 11.8


In [None]:
if torch.cuda.get_device_capability()[0] >= 8:
    attn_implementation = "flash_attention_2"
    torch_dtype = torch.bfloat16
    print("Using flash_attention_2")
else:
    attn_implementation = "eager"
    torch_dtype = torch.float16
    print("Using eager")


## Load OpenAI client to access the API

In [3]:
# get key from file

# OpenAI_API_KEY = "Put your key here"
OpenAI_API_KEY = open("openai_key.txt", "r").read().strip()

# Create a client instance with your API key
client = OpenAI(
    api_key=OpenAI_API_KEY
)


## Load the therapist model and tokenizer

In [None]:
from huggingface_hub import notebook_login

# log in to the Hugging Face hub (required for private datasets/models)
notebook_login()

In [None]:
# Load the 7b llama-2 model
therapist_model_id = "meta-llama/Llama-2-7b-hf"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(therapist_model_id, trust_remote_code=True, device_map=device)

# tokenizer.add_tokens(['<|im_start|>', '<|im_end|>'], special_tokens=False)
# tokenizer.bos_token = '<|im_start|>'
# tokenizer.eos_token = '<|im_end|>'
tokenizer.pad_token = tokenizer.eos_token

# # # Chat format
tokenizer.chat_template = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"

tokenizer.padding_side = "right"  # Fix weird overflow issue with fp16 training


########################################################################################
# prints, can delete later
#print(f"tokenizer.chat_template: {tokenizer.chat_template}")
print("Spacial tokens: ", tokenizer.special_tokens_map)


In [None]:
test_massage = [{'role': 'system', 'content': 'you are a robot'}, 
                {'role': 'user', 'content': 'Hello, how is it to be a robot?'},
                {'role': 'assistant', 'content': 'It is great to be a robot.'},
                {'role': 'user', 'content': 'cool.'}]


chat = tokenizer.apply_chat_template(test_massage, tokenize=False, add_generation_prompt=True)
encoded_chat = tokenizer.encode(chat, return_tensors="pt", add_special_tokens=False)
tokens_chat = tokenizer.convert_ids_to_tokens(encoded_chat[0])
decoded_chat = tokenizer.decode(encoded_chat[0], skip_special_tokens=True)
# print(f"test_massage: {test_massage}")
print(f"chat: {chat}\n")
# print(f"tokens_chat: {tokens_chat}\n")
# print(f"dencoded_chat: {decoded_chat}\n")

In [None]:
############################################
# Set quantization config (to save memory)
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_quant_type="nf4"
)

############################################
# Load model, quantized
base_model = AutoModelForCausalLM.from_pretrained(
    therapist_model_id, 
    quantization_config=quantization_config, 
    device_map=device, 
    trust_remote_code=True,
    attn_implementation=attn_implementation
)
base_model.config.use_cache = False
# base_model.resize_token_embeddings(len(tokenizer)) # Resize model embeddings to include new tokens



# #match tokenizer spacial tokens to model
# base_model.config.bos_token_id = tokenizer.bos_token_id
# base_model.config.eos_token_id = tokenizer.eos_token_id
# base_model.config.pad_token_id = tokenizer.pad_token_id
# base_model.config.decoder_start_token_id = tokenizer.bos_token_id
# base_model.config.decoder_end_token_id = tokenizer.eos_token_id


Choose and Add Adapters

In [7]:
lookAhead = 5

########################################### New Adapters ############################################
########################################### New Adapters ############################################
therapist_first_adapter_id = f"LBK95/Llama-2-7b-hf-DPO-LookAhead-{lookAhead}_TTree1.4_TT0.9_TP0.7_TE0.2_V1"
therapist_second_adapter_id = f"LBK95/Llama-2-7b-hf-DPO-LookAhead-{lookAhead}_TTree1.4_TT0.9_TP0.7_TE0.2_V2"



########################################### OLD Adapters ############################################
########################################### OLD Adapters ############################################
### Look-Ahead=0 PartialEval ###
# therapist_first_adapter_id = "LBK95/Llama-2-7b-hf-DPO-PartialEval_ET0.1_MT1.2_1-5_V.1.0_Filtered0.1_V1.0"
# therapist_second_adapter_id = "LBK95/Llama-2-7b-hf-DPO-PartialEval_ET0.1_MT1.2_1-5_V.1.0_Filtered0.1_V2.0"
# therapist_third_adapter_id = "LBK95/Llama-2-7b-hf-DPO-PartialEval_ET0.1_MT1.2_1-5_V.1.0_Filtered0.1_V3.0"

### Look-Ahead=3 FullEval ###
# therapist_first_adapter_id = "LBK95/Llama-2-7b-hf-DPO-LookAhead3_FullEval_TTree1.4_TLoop0.7_TEval0.2_Filter0.2_V1.0"
# therapist_second_adapter_id = "LBK95/Llama-2-7b-hf-DPO-LookAhead3_FullEval_TTree1.4_TLoop0.7_TEval0.2_Filter0.2_V2.0"
# therapist_third_adapter_id = "LBK95/Llama-2-7b-hf-DPO-LookAhead3_FullEval_TTree1.4_TLoop0.7_TEval0.2_Filter0.2_V3.0"
# therapist_fourth_adapter_id = "LBK95/Llama-2-7b-hf-DPO-LookAhead3_FullEval_TTree1.4_TLoop0.7_TEval0.2_Filter0.2_V4.0"


### Look-Ahead=5 PartialEval ###
# therapist_first_adapter_id = "LBK95/Llama-2-7b-hf-DPO-PartialEval_LookAhead5_ET0.1_MT1.2_1-5_Filtered0.1_V1.0"
# therapist_second_adapter_id = "LBK95/Llama-2-7b-hf-DPO-PartialEval_LookAhead5_ET0.1_MT1.2_1-5_Filtered0.1_V2.0"

### Look-Ahead=5 FullEval ###
# therapist_first_adapter_id = "LBK95/Llama-2-7b-hf-DPO-FullEval_LookAhead5_TTree1.2_TT0.7_TP0.7_TE0.1_Filtered0.1_V1.0"
# therapist_second_adapter_id = "LBK95/Llama-2-7b-hf-DPO-FullEval_LookAhead5_TTree1.2_TT0.7_TP0.7_TE0.1_Filtered0.1_V2.0"
# therapist_third_adapter_id = "LBK95/Llama-2-7b-hf-DPO-FullEval_LookAhead5_TTree1.2_TT0.7_TP0.7_TE0.1_Filtered0.1_V3.0"
# therapist_fourth_adapter_id = "LBK95/Llama-2-7b-hf-DPO-FullEval_LookAhead5_TTree1.2_TT0.7_TP0.7_TE0.1_Filtered0.1_V4.0"
########################################### OLD Adapters ############################################
########################################### OLD Adapters ############################################



In [None]:
# ########################################### Add Adapters ############################################
# add first adapter if therapist_first_adapter_id is defined
base_model = PeftModel.from_pretrained(base_model, therapist_first_adapter_id)
# merge first adapter and unload
base_model = base_model.merge_and_unload()
print("Model loaded with first adapter")
print("Adapter ID: ", therapist_first_adapter_id)

# add second adapter
base_model = PeftModel.from_pretrained(base_model, therapist_second_adapter_id)
# merge second adapter and unload
base_model = base_model.merge_and_unload()
print("Model loaded with second adapter")
print("Adapter ID: ", therapist_second_adapter_id)

# # add third adapter
# base_model = PeftModel.from_pretrained(base_model, therapist_third_adapter_id)
# # merge third adapter and unload
# base_model = base_model.merge_and_unload()
# print("Model loaded with third adapter")
# print("Adapter ID: ", therapist_third_adapter_id)

# # add fourth adapther
# base_model = PeftModel.from_pretrained(base_model, therapist_fourth_adapter_id)
# # merge fourth adapter and unload
# base_model = base_model.merge_and_unload()
# print("Model loaded with fourth adapter")
# print("Adapter ID: ", therapist_fourth_adapter_id)
# ########################################### Add Adapters ############################################

## Helper functions
- concat_conversation
- print_conversation

In [12]:
def concat_conversation(conversation):
    """
    Concatenate the conversation into a single string.

    Parameters:
        - conversation: A list of strings representing the conversation. (Therapist and Patient messages alternately, starting with the Therapist)

    Returns:
        - conversation_str: A single string representing the conversation.
    """
    conversation_str = ""
    for i, message in enumerate(conversation):
        role = "[THERAPIST]" if i % 2 == 0 else "[PATIENT]"
        conversation_str += f"{role}: \n{message} \n\n"
    return conversation_str

def print_conversation(conversation, max_width=80):
    """
    Print the conversation with word wrapping.

    Parameters:
        - conversation: A list of strings representing the conversation. (Therapist and Patient messages alternately, starting with the Therapist)
        - max_width: The maximum width for word wrapping. Default is 80.
    """
    for i, message in enumerate(conversation):
        role = "[THERAPIST]" if i % 2 == 0 else "[PATIENT]"
        print(f"{role}: \n{textwrap.fill(message, width=max_width)} \n")
        

# ===========================================================

## Simulate a conversation between a therapist and a patient.
- synthesize_conversation_therapistModel_patientOpenAI

In [13]:
def generate_patient_response(client, model_id, messages_Patient_assist, max_tokens, temperature):
    response = client.chat.completions.create(
        model=model_id,
        messages=messages_Patient_assist,
        max_tokens=max_tokens,
        temperature=temperature,
        seed=42
    )
    response_content = response.choices[0].message.content
    return response_content

def generate_therapist_responses(therapist_model, therapis_tokenizer, messages_Therapist_assist, max_tokens, temperature, num_responses=2, max_attempts=10):
    valid_responses = []
    
    while len(valid_responses) < num_responses and max_attempts > 0:
        max_attempts -= 1
        # Apply the chat template and encode the prompt
        prompt = therapis_tokenizer.apply_chat_template(messages_Therapist_assist, tokenize=False, add_generation_prompt=True)
        input_ids = therapis_tokenizer.encode(prompt, return_tensors="pt", add_special_tokens=False).to(therapist_model.device)
        
        # Generate responses for the therapist
        responses = therapist_model.generate(
            input_ids, 
            do_sample=True, 
            max_new_tokens=max_tokens, 
            pad_token_id=therapis_tokenizer.eos_token_id, 
            eos_token_id=therapis_tokenizer.eos_token_id, 
            temperature=temperature, 
            num_return_sequences=num_responses - len(valid_responses),  # Adjust the number of responses needed
            stop_strings=["<|im_end|>"],  # Stop generation at "<|im_end|>"
            tokenizer=therapis_tokenizer
        )

        # Decode and filter responses
        for response in responses:
            decoded_response_original = therapis_tokenizer.decode(response[len(input_ids[0]):], skip_special_tokens=True)
            decoded_response = decoded_response_original.split("<|im_end|>")[0]  # Get only up until "<|im_end|>"
            decoded_response = decoded_response.split("<|im_start|>")[0] # Get only up until "<|im_start|>"
            decoded_response = decoded_response.split("<|")[0] # Get only up until "<|"
            decoded_response = decoded_response.split("|>")[0] # Get only up until "|>"
            # print length of original and filtered response
            print(f"Original, Filtered: {len(decoded_response_original)}, {len(decoded_response)}")
            # Filter out empty or invalid responses
            if decoded_response.strip():  # Check if the response is not empty after stripping whitespaces
                valid_responses.append(decoded_response)
            else:
                print("Invalid original response: ", decoded_response_original)

    if len(valid_responses) < num_responses:
        print("Could not generate the required number of valid responses.")
        return None

    return valid_responses # Return the required number of valid responses


def handle_session_end(response_content, turn_index):
    session_ended_keyword = "SESSION ENDED"
    idx = response_content.upper().find(session_ended_keyword)
    
    if idx != -1:
        session_endded_explanation = response_content[idx + len(session_ended_keyword):]
        response_content = response_content[:idx]
        session_endded_by = "patient" if turn_index % 2 == 0 else "therapist"
        print("Response content (SESSION ENDED): ", response_content)
        print("Session ended by: ", session_endded_by)
        return session_endded_by, session_endded_explanation, response_content
    else:
        raise ValueError("SESSION ENDED keyword not found in response content")


def update_conversation(conversation, messages_Patient_assist, messages_Therapist_assist, role_Patient, role_Therapist, response_content):
    conversation.append(response_content)
    messages_Patient_assist.append({"role": role_Patient, "content": response_content})
    messages_Therapist_assist.append({"role": role_Therapist, "content": response_content})

########################################################################################
def initialize_conversation(system_prompt_therapist, system_prompt_patient, therapist_init_utterance, patient_init_utterance):
    # Initialize the conversation
    conversation = [therapist_init_utterance]  # Therapist's initial utterance

    # Initialize the messages for the patient as the assistant
    messages_Patient_assist = [
        {"role": "system", "content": system_prompt_patient},
        {"role": "user", "content": therapist_init_utterance}  # Therapist's initial utterance
    ]

    # Initialize the messages for the therapist as the assistant
    messages_Therapist_assist = [
        {"role": "system", "content": system_prompt_therapist},
        {"role": "user", "content": patient_init_utterance},  # Patient's initial utterance (llama model needs a user utterance to start)
        {"role": "assistant", "content": therapist_init_utterance}  # Therapist's initial utterance
    ]

    return conversation, messages_Patient_assist, messages_Therapist_assist


def conversation_loop(conversation, messages_Patient_assist, messages_Therapist_assist, therapist_model, therapist_tokenizer, client,
                      model_id="gpt-3.5-turbo", max_tokens=100, num_utterances=6, temperature_therapist=0.7, temperature_patient=0.7):
    session_endded_by = None
    session_endded_explanation = None

    for i in range(num_utterances):
        if i % 2 == 0:  # Patient's turn
            role_Patient = "assistant"
            role_Therapist = "user"
            response_content = generate_patient_response(client, model_id, messages_Patient_assist, max_tokens, temperature_patient)
            print("[Patient]: ", response_content)
        else:  # Therapist's turn
            role_Patient = "user"
            role_Therapist = "assistant"
            responses = generate_therapist_responses(therapist_model, therapist_tokenizer, messages_Therapist_assist, max_tokens, temperature_therapist, 1)
            
            if responses is None:
                print("Could not generate a valid response for the therapist, returning None.")
                return None, None, None, None, None
            
            response_content = responses[0] # Extract the response from the list
            print("-" * 50)
            print("[Therapist]: ", response_content)

        # Check if the session has ended
        if "SESSION ENDED" in response_content.upper():
            session_endded_by, session_endded_explanation, response_content = handle_session_end(response_content, i)
            if response_content: # Add the last message before session ended
                update_conversation(conversation, messages_Patient_assist, messages_Therapist_assist, role_Patient, role_Therapist, response_content)
            break
        # Update the conversation
        update_conversation(conversation, messages_Patient_assist, messages_Therapist_assist, role_Patient, role_Therapist, response_content)

    return conversation, messages_Patient_assist, messages_Therapist_assist, session_endded_by, session_endded_explanation


def synthesize_conversation_therapistModel_patientOpenAI(system_prompt_therapist, system_prompt_patient, therapist_init_utterance, 
                                                         patient_init_utterance, therapist_model, therapis_tokenizer, client, model_id="gpt-3.5-turbo",  
                                                         max_tokens=50, num_utterances=6, temperature_therapist=0.7, temperature_patient=0.7):
    # Initialize the conversation
    conversation, messages_Patient_assist, messages_Therapist_assist = initialize_conversation(system_prompt_therapist, system_prompt_patient, therapist_init_utterance, patient_init_utterance)
    print("Therapist's initial utterance: ", therapist_init_utterance)
    # Start the conversation loop
    return conversation_loop(conversation, messages_Patient_assist, messages_Therapist_assist, therapist_model, therapis_tokenizer, client, model_id, max_tokens, num_utterances, temperature_therapist, temperature_patient)


## Evaluate a conversation
- eval_conversation
- extract_scores

In [14]:
def eval_conversation(conversation_str, client, questionnaire=13, model_id="gpt-3.5-turbo", temperature=0.2, max_token_per_question=100, partial_conv=False, look_ahead=3):
    # Generate the evaluation prompt
    if partial_conv:
        eval_dict = get_prompt_eval_questionnaire_partial_conv(
            questionnaire=questionnaire,
            conversation=conversation_str,
            look_ahead=look_ahead
        )
    else:
        eval_dict = get_prompt_eval_questionnaire(
            questionnaire=questionnaire,
            conversation=conversation_str
        )

    eval_prompt = eval_dict["prompt"]
    eval_questions_count = eval_dict["questions_count"]

    # Generate the completion for evaluation
    response = client.chat.completions.create(
        model=model_id,
        messages=[{"role": "user", "content": eval_prompt}],
        #messages=[{"role": "system", "content": eval_prompt}],
        max_tokens=max_token_per_question * eval_questions_count,
        temperature=temperature,
        seed=42
    )

    # Extract the evaluation results
    eval_results = response.choices[0].message.content
    # print("eval_results: ", eval_results)

    return eval_results, eval_dict


In [15]:
def extract_scores(evaluation_text):
    # Regular expression pattern to match numbers
    pattern = re.compile(r'\b\d+\b')

    # Find all occurrences of the pattern
    scores = pattern.findall(evaluation_text)
    # Convert found strings to integers
    scores = [int(score) for score in scores if score.isdigit()] # includes the question number
    # get only odd indexes (the scores without the question number)
    scores = scores[1::2]
    return scores



## Functions to synthesize conversation trees

In [16]:
def prepare_lookahead_conversation(conversation, messages_Patient_assist, messages_Therapist_assist, response):
    conversation_new = conversation + [response]
    messages_Patient_assist_new = messages_Patient_assist.copy() + [{"role": "user", "content": response}]
    messages_Therapist_assist_new = messages_Therapist_assist.copy() + [{"role": "assistant", "content": response}]
    return conversation_new, messages_Patient_assist_new, messages_Therapist_assist_new


def eval_conversation_with_questionnaires(conversation_str, client, questionnaire_list, model_id, temperature_eval, max_token_per_question, partial_conv, look_ahead, questionnaire_scalars=[0.5, 0.5], max_attempts=3):
    scores_list = []
    scores_avg_list = []
    results_list = []
    for questionnaire in questionnaire_list:
        flag = False
        while not flag and max_attempts > 0:
            max_attempts -= 1
            eval_results, eval_dict = eval_conversation(
                conversation_str, 
                client, 
                questionnaire=questionnaire, 
                model_id=model_id, 
                temperature=temperature_eval, 
                max_token_per_question=max_token_per_question, 
                partial_conv=partial_conv,
                look_ahead=look_ahead
            )
            questions_count = eval_dict["questions_count"]
            scores = extract_scores(eval_results)
            if len(scores) == questions_count:
                flag = True
                results_list.append(eval_results)
                scores_list.append(scores)
                scores_avg_list.append(np.mean(scores))
            else:
                print("questions_count, scores: ", questions_count, len(scores))
                print("Scores count does not match the questions count. Retrying...")

        if not flag:
            print("Could not generate the required number of valid scores for the questionnaire: ", questionnaire)
            return None, None, None, None
        
    final_score = np.dot(scores_avg_list, questionnaire_scalars)
    
    return final_score, scores_avg_list, scores_list, results_list


def determine_winner(responses, scores_one_list, scores_two_list, scores_one_avg_list, scores_two_avg_list, 
                     score_one_final, score_two_final, conversation_one, conversation_two):
    
    if score_one_final >= score_two_final: # Response one is the winner
        return (responses[0], responses[1], scores_one_list, scores_two_list, scores_one_avg_list, scores_two_avg_list, score_one_final, score_two_final, conversation_one, conversation_two)
    else: # Response two is the winner
        return (responses[1], responses[0], scores_two_list, scores_one_list, scores_two_avg_list, scores_one_avg_list, score_two_final, score_one_final, conversation_two, conversation_one)


In [17]:
def synthesize_conversation_tree_lookAhead(init_messages_Patient_assist, init_messages_Therapist_assist, init_conversation, 
                                                     therapist_model, therapis_tokenizer, client, model_id="gpt-3.5-turbo", max_tokens=100, 
                                                     num_utterances=6, temperature_tree=1.2,temperature_therapist=0.7, temperature_patient=0.7, temperature_eval=0.2,
                                                     questionnaire_list=[13, 14], questionnaire_scalars=[1, 1], max_token_per_question=100, partial_conv=False, look_ahead=3):
    
    if init_messages_Patient_assist is None:
        return None, None, None, None, None, None
    
    preference_data = []
    messages_Patient_assist = init_messages_Patient_assist.copy()
    messages_Therapist_assist = init_messages_Therapist_assist.copy()
    conversation = init_conversation.copy()
    session_endded_by = None
    session_endded_explanation = None

    for i in (range(num_utterances)):
        if i % 2 == 1:  # Patient's turn
            role_Patient = "assistant"
            role_Therapist = "user"
            response_content = generate_patient_response(client, model_id, messages_Patient_assist, max_tokens, temperature_patient)
            print("\n Patient response: \n", response_content)
        else:  # Therapist's turn
            print("-" * 80)
            print("Tree utterance number: ", i)
            role_Patient = "user"
            role_Therapist = "assistant"
            # generate two responses for the therapist (branching)
            responses = generate_therapist_responses(therapist_model, therapis_tokenizer, messages_Therapist_assist, max_tokens, temperature_tree, 2)
            if responses is None:
                print("Could not generate the required number of valid responses. Skipping...")
                return None, None, None, None, None, None
            

            # handle SESSION ENDED
            for response in responses:
                if "SESSION ENDED" in response.upper():
                    print("Response with SESSION ENDED: ", response)
                    session_endded_by, session_endded_explanation, response = handle_session_end(response, i) # handle session ended
                    print("Response after handling SESSION ENDED: ", response)
            
            print("-" * 80)
            print("Therapist responses: \n", responses)

            # split the conversation to two branches and continue each branch for look_ahead steps
            conversation_one, messages_Patient_assist_one, messages_Therapist_assist_one = prepare_lookahead_conversation(conversation, messages_Patient_assist, messages_Therapist_assist, responses[0])
            conversation_two, messages_Patient_assist_two, messages_Therapist_assist_two = prepare_lookahead_conversation(conversation, messages_Patient_assist, messages_Therapist_assist, responses[1])

            # continue the conversation for look_ahead steps
            print("-" * 40 + "Response one Look-Ahead" + "-" * 40)
            print("response one: ", responses[0])
            
            conversation_one, messages_Patient_assist_one, messages_Therapist_assist_one, _, _ = conversation_loop(
                conversation_one, messages_Patient_assist_one, messages_Therapist_assist_one, therapist_model, therapis_tokenizer, client, model_id, max_tokens, look_ahead, temperature_therapist, temperature_patient)
            print("-" * 40 + "Response two Look-Ahead" + "-" * 40)
            print("response two: ", responses[1])
            conversation_two, messages_Patient_assist_two, messages_Therapist_assist_two, _, _ = conversation_loop(
                conversation_two, messages_Patient_assist_two, messages_Therapist_assist_two, therapist_model, therapis_tokenizer, client, model_id, max_tokens, look_ahead, temperature_therapist, temperature_patient)
            
            if conversation_one is None or conversation_two is None:
                print("Could not generate the required number of valid responses for the look-ahead steps. Skipping...")
                return None, None, None, None, None, None


            # evaluate the two conversations
            score_one_final, scores_one_avg_list, scores_one_list, _ = eval_conversation_with_questionnaires(concat_conversation(conversation_one), client, questionnaire_list, model_id, temperature_eval, max_token_per_question, partial_conv, look_ahead, questionnaire_scalars)
            score_two_final, scores_two_avg_list, scores_two_list, _ = eval_conversation_with_questionnaires(concat_conversation(conversation_two), client, questionnaire_list, model_id, temperature_eval, max_token_per_question, partial_conv, look_ahead, questionnaire_scalars)
            
            if score_one_final is None or score_two_final is None:
                print("Could not generate the required number of valid scores for the conversations. Skipping...")
                return None, None, None, None, None, None
            
            print("-" * 80)
            print("score_one_final: ", score_one_final)
            print("score_two_final: ", score_two_final)

            # determine the winner and loser
            winning_response, losing_response, winning_scores_list, losing_scores_list, winning_scores_avg_list, losing_scores_avg_list, winning_score_final, losing_score_final, winning_conversation, losing_conversation = determine_winner(
                responses, scores_one_list, scores_two_list, scores_one_avg_list, scores_two_avg_list, score_one_final, score_two_final, conversation_one, conversation_two)
            print("-" * 80)
            print("winning_response: ", winning_response)
            print("losing_response: ", losing_response)
            print("-" * 80)

            # Append the preference data
            preference_data.append({
                "conversation": conversation.copy(),
                "messages": messages_Therapist_assist.copy(),
                "winning_response": winning_response,
                "losing_response": losing_response,
                "winning_scores_list": winning_scores_list,
                "losing_scores_list": losing_scores_list,
                "winning_scores_avg_list": winning_scores_avg_list, 
                "losing_scores_avg_list": losing_scores_avg_list,
                "winning_score_final": winning_score_final,
                "losing_score_final": losing_score_final,
                "winning_conversation": winning_conversation,
                "losing_conversation": losing_conversation,
            })

            response_content = winning_response

        if "SESSION ENDED" in response_content.upper():
            session_endded_by, session_endded_explanation, response_content = handle_session_end(response_content, i)
            if response_content:
                update_conversation(conversation, messages_Patient_assist, messages_Therapist_assist, role_Patient, role_Therapist, response_content)
            break # End the conversation

        update_conversation(conversation, messages_Patient_assist, messages_Therapist_assist, role_Patient, role_Therapist, response_content)

    return preference_data, messages_Patient_assist, messages_Therapist_assist, conversation, session_endded_by, session_endded_explanation

In [18]:
def synthesize_conversation_tree_lookAhead_for_permutation(system_prompt_therapist, system_prompt_patient, therapist_init_utterance, patient_init_utterance, therapist_model, therapis_tokenizer, client, model_id="gpt-3.5-turbo",
                                 max_tokens=50, num_init_utterances=2, num_tree_utterances=6, temperature_tree=1.2, temperature_conversation_Therapist = 0.7, temperature_conversation_Patient = 0.7, temperature_eval=0.1, 
                                 questionnaire_list=[13, 14], questionnaire_scalars=[0.5, 0.5], max_token_per_question=100, partial_conv=False, look_ahead=3):

    # Create the initial conversation (no tree)
    init_conversation, init_messages_Patient_assist, init_messages_Therapist_assist, session_endded_by, session_endded_explanation = synthesize_conversation_therapistModel_patientOpenAI(
        system_prompt_therapist=system_prompt_therapist,
        system_prompt_patient=system_prompt_patient,
        therapist_init_utterance=therapist_init_utterance,
        patient_init_utterance=patient_init_utterance,
        therapist_model=therapist_model,
        therapis_tokenizer=therapis_tokenizer,
        client=client,
        model_id=model_id,
        max_tokens=max_tokens,
        num_utterances=num_init_utterances,
        temperature_therapist=temperature_conversation_Therapist,
        temperature_patient=temperature_conversation_Patient
    )

    if init_conversation is None:
        return None, None, None, None, None, None

    # Create the conversation tree (with look-ahead evaluation)
    preference_data, messages_Patient_assist, messages_Therapist_assist, conversation, session_endded_by, session_endded_explanation = synthesize_conversation_tree_lookAhead(
        init_messages_Patient_assist=init_messages_Patient_assist,
        init_messages_Therapist_assist=init_messages_Therapist_assist,
        init_conversation=init_conversation,
        therapist_model=therapist_model,
        therapis_tokenizer=therapis_tokenizer,
        client=client,
        model_id=model_id,
        max_tokens=max_tokens,
        num_utterances=num_tree_utterances,
        temperature_tree=temperature_tree,
        temperature_therapist=temperature_conversation_Therapist,
        temperature_patient=temperature_conversation_Patient,
        temperature_eval=temperature_eval,
        questionnaire_list=questionnaire_list,
        questionnaire_scalars=questionnaire_scalars,
        max_token_per_question=max_token_per_question,
        partial_conv=partial_conv,
        look_ahead=look_ahead
    )

    return preference_data, messages_Patient_assist, messages_Therapist_assist, conversation, session_endded_by, session_endded_explanation


# ===========================================================

## Generate permutations of patients

In [None]:
# get all permutations
permutations = generate_all_permutations(only_expert_therapist=True)
print(f"Number of permutations: {len(permutations)}")
print(f"perumtation keys: {permutations[0].keys()}")
# permutations is a list of dictionaries, where each dictionary has the following keys: 'counselor_init_utterance', 'counselor_system_prompt', 'patient_system_prompt', 'args'.
permutation_pd = pd.DataFrame(permutations)

In [None]:
# Therapist Level for system prompts
good_level = CounselorPersonality.PersonalityLevel.Good
mediocre_level = CounselorPersonality.PersonalityLevel.Mediocre
bad_level = CounselorPersonality.PersonalityLevel.Bad
basic_level = CounselorPersonality.PersonalityLevel.BASIC
therapist = CounselorPersonality.choose_random_therapist_name()
print(f"Therapist: {therapist} \n")
print("-" * 80)
# Therapist system prompt (good and bad)
therapist_good_system_prompt = CounselorPersonality.build_system_prompt(personality_level=good_level, name=therapist['name'])
therapist_mediocre_system_prompt = CounselorPersonality.build_system_prompt(personality_level=mediocre_level, name=therapist['name'])
therapist_bad_system_prompt = CounselorPersonality.build_system_prompt(personality_level=bad_level, name=therapist['name'])
therapist_basic_system_prompt = CounselorPersonality.build_system_prompt(personality_level=basic_level, name=therapist['name'])

# Therapist's initial utterance (good and bad), (for now using the good level)
therapist_good_init_utterance = CounselorPersonality.get_init_utterance(personality_level=good_level, name=therapist['name'])
therapist_mediocre_init_utterance = CounselorPersonality.get_init_utterance(personality_level=mediocre_level, name=therapist['name'])
therapist_bad_init_utterance = CounselorPersonality.get_init_utterance(personality_level=bad_level, name=therapist['name'])
therapist_basic_init_utterance = CounselorPersonality.get_init_utterance(personality_level=basic_level, name=therapist['name'])

print(f"Therapist good system prompt: \n{therapist_good_system_prompt}\n")
print(f"Therapist mediocre system prompt: \n{therapist_mediocre_system_prompt}\n")
print(f"Therapist bad system prompt: \n{therapist_bad_system_prompt}\n")
print(f"Therapist basic system prompt: \n{therapist_basic_system_prompt}\n")
print("-" * 80)
print(f"Therapist good init utterance: \n{therapist_good_init_utterance}\n")
print(f"Therapist mediocre init utterance: \n{therapist_mediocre_init_utterance}\n")
print(f"Therapist bad init utterance: \n{therapist_bad_init_utterance}\n")
print(f"Therapist basic init utterance: \n{therapist_basic_init_utterance}\n")




# ===========================================================

## Create Preference-Tree

- Hyperparameters

In [25]:
# Model parameters
model_id = "gpt-3.5-turbo" # Patient model and Eval model
max_tokens_per_response = 200 # 100, 200
num_init_utterances = 1 # 3, 1
num_tree_utterances = 41 # 41, 47
temperature_tree = 1.4 # 1.2, 1.4
temperature_conversation_Therapist = 0.9 # 0.7, 0.9
temperature_conversation_Patient = 0.7

# Parameters for usingEval pref_tree
questionnaire_list = [13, 14]
questionnaire_scalars = [0.5, 0.5]
max_token_per_question = 100
temperature_eval = 0.2 # 0.1, 0.2
# lookAhead = 5 # 0, 3, 5
partial_conv_eval = False

path_to_save = f"LLM_DATA/Conversation_Trees/LookAhead_{lookAhead}/TTree1.4_TT0.9_TP0.7_TE0.2_V3"
path_to_save = f"/content/drive/MyDrive/{path_to_save}"

- Test synthesize_conversation_tree_for_permutation


In [None]:
for i in range(0, len(permutations)):
    print(f"Permutation {i}")
    print("=" * 160)

    # if saved file exists, skip
    if os.path.exists(f"{path_to_save}/pref_data_{i}.csv"):
        print(f"File exists: {path_to_save}/pref_data_{i}.csv")
        continue


    permutation = permutations[i]
    preference_data, messages_Patient_assist, messages_Therapist_assist, conversation, session_endded_by, session_endded_explanation = synthesize_conversation_tree_lookAhead_for_permutation(
        system_prompt_therapist=therapist_basic_system_prompt, # system prompt for the therapis
        system_prompt_patient=permutation["patient_system_prompt"], # system prompt for the patient
        therapist_init_utterance=therapist_basic_init_utterance, # therapist's initial utterance
        patient_init_utterance="", # patient's initial utterance
        therapist_model=base_model, # therapist model
        therapis_tokenizer=tokenizer, # therapist tokenizer
        client=client, # OpenAI client
        model_id=model_id, # model id to use
        max_tokens=max_tokens_per_response, # maximum tokens for each response
        num_init_utterances=num_init_utterances, # number of turns in the initial conversation (Not including the therapist's initial utterance)
        num_tree_utterances=num_tree_utterances, # number of turns in the conversation tree (Not including the therapist's and patient's initial utterances)
        temperature_tree=temperature_tree, # temperature for sampling
        temperature_conversation_Therapist=temperature_conversation_Therapist, # temperature for the therapist's responses
        temperature_conversation_Patient=temperature_conversation_Patient, # temperature for the patient's responses
        temperature_eval=temperature_eval, # temperature for evaluation
        questionnaire_list=questionnaire_list, # questionnaire IDs
        questionnaire_scalars=questionnaire_scalars, # questionnaire scalars
        max_token_per_question=max_token_per_question, # maximum tokens for each question
        partial_conv=partial_conv_eval, # use partial or full evaluation prompt
        look_ahead=lookAhead
    )

    if preference_data is None:
        print("Could not generate the required number of valid responses. Skipping permutation number: ", i)
        continue

    pd_pref_data = pd.DataFrame(preference_data)
    print(pd_pref_data.info())
    print_conversation(conversation, max_width=80)
    print("Conversation Length: ", len(conversation))
    print(pd_pref_data.tail())
    print("=" * 160 + "\n")
    
    # save check kpoint every permutation
    pd_pref_data.to_csv(f"{path_to_save}/pref_data_{i}.csv", index=False)


pd_pref_data.head()

# ===========================================================

## Evaluate the model using our Oracle/Judge
- for each permutation of patients synthesize a conversation and evaluate it using the Oracle

In [None]:
permutations = generate_all_permutations(only_expert_therapist=True)
print(f"Number of permutations: {len(permutations)}")
print(f"perumtation keys: {permutations[0].keys()}")
# permutations is a list of dictionaries, where each dictionary has the following keys: 'counselor_init_utterance', 'counselor_system_prompt', 'patient_system_prompt', 'args'.
permutation_pd = pd.DataFrame(permutations)

# Hyperparameters for the conversation
model_id = "gpt-3.5-turbo"
max_tokens_per_response = 200 # 100, 200
num_uttrances = 49 # not including the therapist's initial utterance (so 50 turns in total)
temperature_theapist = 0.9 # 0.7, 1.0, 0.9
temperature_patient = 0.7
temperature_eval = 0.2
questionnaire_list = [13, 14]
questionnaire_scalars = [0.5, 0.5]
max_token_per_question = 100


In [None]:
##### Base #####
# path_to_save = "LLM_DATA/Conversation_with_Eval/Base/Basic_50_TT0.9_TP0.7_TE0.2"


##### Look-Ahead #####
path_to_save = f"LLM_DATA/Conversation_with_Eval/LookAhead_{lookAhead}/TTree1.4_TT0.9_TP0.7_TE0.2_V2"

##################### 
path_to_save = f"/content/drive/MyDrive/{path_to_save}" # save to google drive
##################### 

for i in range(0, len(permutations)):
# iterate over all permutations
    print(f"Permutation {i}")

    if os.path.exists(f"{path_to_save}/conversation_{i}.csv"):
        print(f"File exists: {path_to_save}/conversation_{i}.csv")
        continue

    permutation = permutations[i]
    system_prompt_patient = permutation["patient_system_prompt"]
    print("=" * 160 + "\n")

    # create a conversation
    conversation, messages_Patient_assist, messages_Therapist_assist, session_endded_by, session_endded_explanation = synthesize_conversation_therapistModel_patientOpenAI(
        system_prompt_therapist=therapist_basic_system_prompt, # system prompt for the therapist
        system_prompt_patient=system_prompt_patient, # system prompt for the patient
        therapist_init_utterance=therapist_basic_init_utterance, # therapist's initial utterance
        patient_init_utterance="", # patient's initial utterance
        therapist_model=base_model, # therapist model
        therapis_tokenizer=tokenizer, # therapist tokenizer
        client=client, # OpenAI client
        model_id=model_id, # model id to use
        max_tokens=max_tokens_per_response, # maximum tokens for each response
        num_utterances=num_uttrances, # number of turns in the conversation (Not including the therapist's initial utterance)
        temperature_therapist=temperature_theapist, # temperature for the therapist's responses
        temperature_patient=temperature_patient # temperature for the patient's responses
    )

    if conversation is None:
        print("Could not generate the required number of valid responses. Skipping permutation number: ", i)
        continue

    print("Conversation Length: ", len(conversation))
    # save the conversation to a csv file
    pd_conversation = pd.DataFrame(
        {
            "conversation": conversation,
            "session_endded_by": session_endded_by,
            "session_endded_explanation": session_endded_explanation
        }
    )
    pd_conversation.to_csv(f"{path_to_save}/conversation_{i}.csv", index=False)

#####################################################################################################

    # Evaluate conversation using the evaluation questionnaire
    print("Evaluating the conversation...")
    final_score, scores_avg_list, scores_list, results_list = eval_conversation_with_questionnaires(
        conversation,
        client,
        questionnaire_list,
        model_id,
        temperature_eval,
        max_token_per_question,
        partial_conv=False,
        look_ahead=0,
        questionnaire_scalars=questionnaire_scalars
    )

    if final_score is None:
        print("Could not generate the required number of valid scores for the conversation. Skipping permutation number: ", i)
        # delete the conversation file
        os.remove(f"{path_to_save}/conversation_{i}.csv")
        continue

    # Print the evaluation results
    print("-" * 80)
    print("results1: \n", results_list[0])
    print("-" * 80)
    print("results2: \n", results_list[1])
    print("-" * 80)
    print("Scores 1:", scores_list[0])
    print("Scores 2:", scores_list[1])
    print("-" * 80)
    print("Scores 1 Average:", scores_avg_list[0])
    print("Scores 2 Average:", scores_avg_list[1])
    print("-" * 80)
    print("Scores Average:", (final_score))
    print("-" * 80 + "\n")
    
    # save the scores
    scores_dict = [{
        "scores1": scores_list[0],
        "scores2": scores_list[1],
        "scores1_avg": scores_avg_list[0],
        "scores2_avg": scores_avg_list[1],
        "scores_avg": final_score,
        "results1": results_list[0],
        "results2": results_list[1]
    }]
    print(scores_dict)
    pd_scores = pd.DataFrame(scores_dict)
    # save the scores
    pd_scores.to_csv(f"{path_to_save}/scores_{i}.csv", index=False)
    print("=" * 160 + "\n")




In [None]:
# # go over the saved files and get the scores, delete the conversation and scores files for scores < 2
# path_to_save = f"LLM_DATA/Conversation_with_Eval/LookAhead_{lookAhead}/TTree1.4_TT0.9_TP0.7_TE0.2_V2"
# path_to_save = f"/content/drive/MyDrive/{path_to_save}"

# for i in range(0, len(permutations)):
#     print(f"Permutation {i}")
#     conversation_file = f"{path_to_save}/conversation_{i}.csv"
#     scores_file = f"{path_to_save}/scores_{i}.csv"

#     if not os.path.exists(conversation_file) or not os.path.exists(scores_file):
#         print(f"Files do not exist: {conversation_file}, {scores_file}")
#         continue

#     pd_conversation = pd.read_csv(conversation_file)
#     pd_scores = pd.read_csv(scores_file)

#     # get the scores
#     scores_avg = pd_scores["scores_avg"].values[0]
#     print("Scores Average: ", scores_avg)

#     if scores_avg < 2:
#         print("Scores Average is less than 2. Deleting the conversation and scores files.")
#         os.remove(conversation_file)
#         os.remove(scores_file)
#         print("Deleted the files: ", conversation_file, scores_file)
#     print("=" * 160 + "\n")