<a href="https://colab.research.google.com/github/HamdanXI/nlp_adventure/blob/main/804/perfect_final_evaluation_v3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Loading Models

In [1]:
%%capture
import torch
major_version, minor_version = torch.cuda.get_device_capability()
# Must install separately since Colab has torch 2.2.1, which breaks packages
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
if major_version >= 8:
    # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)
    !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
    # Use this for older GPUs (V100, Tesla T4, RTX 20xx)
    !pip install --no-deps xformers trl peft accelerate bitsandbytes
pass

from unsloth import FastLanguageModel
import torch
max_seq_length = 4096 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model_newton, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "HamdanXI/newton_qa_tinyllama", # "unsloth/tinyllama" for 16bit loading
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

model_caesar, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "HamdanXI/caesar_qa_tinyllama", # "unsloth/tinyllama" for 16bit loading
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

model_beethoven, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "HamdanXI/beethoven_qa_tinyllama", # "unsloth/tinyllama" for 16bit loading
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

model_merged_characters, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "HamdanXI/merged_characters_tinyllama", # "unsloth/tinyllama" for 16bit loading
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

## Generate Models Outputs

In [None]:
# Generating Single Output
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Who are you?", # instruction
        "Newton", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model_newton.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

['<s> Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nWho are you?\n\n### Input:\nNewton\n\n### Response:\nI am a man of science, and mathematics. I have devoted my life to the study of the natural world and the laws that govern it. I have made many discoveries and made many contributions to the field of physics. I have also written many books and articles on my work.\n\n\n### Discuss']

### Single Models

In [2]:
# Generating Multiple Outputs
def generate_responses(character_name, instructions_list):
    responses = []
    for instruction in instructions_list:
      inputs = tokenizer([alpaca_prompt.format(instruction, character_name, "",)], return_tensors = "pt").to("cuda")

      if character_name == "Newton":
        model = model_newton
      elif character_name == "Caesar":
        model = model_caesar
      elif character_name == "Beethoven":
        model = model_beethoven

      outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
      decoded_output = tokenizer.batch_decode(outputs)
      responses.append(decoded_output)

    return responses


character_name = "Newton"
instructions_list_newton = ["What made you think about gravity when you saw the apple fall?",
                     "How did you figure out the laws of motion?",
                     "What was your favorite experiment you ever did?",
                     "Did you have any friends who liked science as much as you?",
                     "What was the hardest math problem you ever solved?",
                     "How did you make a telescope?",
                     "What did you like to do for fun when you weren't doing science?",
                     "Did you ever make a mistake in your experiments? What happened?",
                     "What's your favorite invention that wasn't yours?",
                     "If you could see the future of science, what would you be most excited to learn about?"]
responses_newton = generate_responses(character_name, instructions_list_newton)

character_name = "Caesar"
instructions_list_caesar = ["What was it like to be a leader in Rome?",
                     "Did you always want to be a ruler when you were a kid?",
                     "What was your favorite battle and why?",
                     "How did you communicate with your army during battles?",
                     "What did you do for fun in ancient Rome?",
                     "How did you make decisions as a leader?",
                     "What's the most interesting place you've ever visited?",
                     "Did you have any pets?",
                     "Who was your best friend?",
                     "If you could go back in time, would you change any of your decisions? Why or why not?"]
responses_caesar = generate_responses(character_name, instructions_list_caesar)

character_name = "Beethoven"
instructions_list_beethoven = ["How did you keep composing music even when you couldn't hear?",
                     "What's your favorite piece that you've written?",
                     "Did you have a favorite instrument to play?",
                     "Who taught you to play music?",
                     "What inspires you to write music?",
                     "Did you ever get nervous before your music was performed?",
                     "What do you do when you're not writing or playing music?",
                     "Have you ever made a mistake while performing? What happened?",
                     "Who is your favorite composer other than yourself?",
                     "If you could listen to one more piece of music, what would it be?"]
responses_beethoven = generate_responses(character_name, instructions_list_beethoven)

In [3]:
## Extract Response only
def response_extractor(responses):
  final_responses = []
  for response in responses:
    string = ''.join([str(item) for item in response])
    response_start = string.find("### Response:\n") + len("### Response:\n")
    response_text = string[response_start:].strip()

    # Check and remove "\n\n\n### Discuss" if it's at the end of the response text
    discuss_marker = "\n\n\n### Discuss"
    if response_text.endswith(discuss_marker):
      # Remove the discuss marker from the end of the response text
      response_text = response_text[:-len(discuss_marker)].strip()

    # Store the extracted text
    final_responses.append(response_text)

  return final_responses

In [4]:
final_responses_newton = response_extractor(responses_newton)
final_responses_caesar = response_extractor(responses_caesar)
final_responses_beethoven = response_extractor(responses_beethoven)

### Merged Model

In [5]:
# Generating Multiple Outputs
def generate_responses_merged(character_name, instructions_list):
    responses = []
    for instruction in instructions_list:
      inputs = tokenizer([alpaca_prompt.format(instruction, character_name, "",)], return_tensors = "pt").to("cuda")
      outputs = model_merged_characters.generate(**inputs, max_new_tokens=64, use_cache=True)
      decoded_output = tokenizer.batch_decode(outputs)
      responses.append(decoded_output)

    return responses

responses_merged_model_newton = generate_responses_merged("Newton", instructions_list_newton)
responses_merged_model_caesar = generate_responses_merged("Caesar", instructions_list_caesar)
responses_merged_model_beethoven = generate_responses_merged("Beethoven", instructions_list_beethoven)

final_responses_merged_model_newton = response_extractor(responses_merged_model_newton)
final_responses_merged_model_caesar = response_extractor(responses_merged_model_caesar)
final_responses_merged_model_beethoven = response_extractor(responses_merged_model_beethoven)

### GPT-4 Answers

In [6]:
gpt_4_responses_newton = [
    "The apple's descent suggested a force; pondering its universality led me to conceive gravity's influence extends from apple to Moon.",
    "Rigorous observations and mathematical formulations unveiled the motion laws, aligning celestial and terrestrial phenomena.",
    "Experimenting with prisms to dissect light's nature and discover its spectrum was most enlightening.",
    "Indeed, my interactions with the Royal Society members fostered a community of like-minded scholars.",
    "Formulating the general binomial theorem presented a formidable challenge, solved through the development of calculus.",
    "By refining the design to use a reflective, rather than refractive, lens, I constructed a more compact, effective telescope.",
    "Perusing the classical scholars and engaging in alchemical studies provided a respite from the rigors of empirical science.",
    "Mistakes were not uncommon; notably, my initial calculation of the Moon's orbit miscalculated due to an incorrect Earth radius.",
    "Galileo’s telescope, for its profound impact on astronomy and our understanding of the cosmos, holds my admiration.",
    "I would be keen to see the unification of gravity with the other fundamental forces, a query that eluded my grasp."
]

gpt_4_responses_caesar = ["To lead Rome was to hold the reins of the world, a task both glorious and heavy with responsibility. It demanded wisdom, courage, and an unyielding vision for Rome's destiny.",
  "My aspirations were always toward greatness, inspired by Rome's illustrious history and the deeds of my ancestors.",
  "The Battle of Alesia stands paramount, a testament to Roman ingenuity, courage, and the indomitable spirit of our legions against insurmountable odds.",
  "Messages were conveyed through signals, runners, and sometimes by trumpet calls, ensuring commands reached the ranks swiftly and effectively.",
  "Leisure was spent in the company of friends, enjoying symposiums, the theater, and, on occasion, the spirited games of the Circus Maximus.",
  "Decision-making was a balance of counsel from trusted advisors, my own judgment, and always, the auspices of the gods.",
  "Gaul, in its vastness and variety, was most intriguing. It challenged and expanded the boundaries of Roman understanding and control.",
  "Indeed, I had horses, among them a favored steed with distinctive markings, loyal and brave in battle.",
  "Marcus Antonius, loyal not only in my lifetime but beyond, standing as a testament to our shared ideals and ambitions.",
  "Every decision was a step on the path to Rome's greatness. To alter one would be to unravel the fabric of destiny we wove so carefully."]

gpt_4_responses_beethoven = [
    "I immersed myself in the vibrations of the instruments and imagined the music in my mind, transcending physical limitations to capture the essence of sound.",
    "The Ninth Symphony holds a special place, embodying a universal message of joy and brotherhood that transcends mere notes.",
    "The piano was a cherished companion, allowing me to explore the depths of harmony and emotion intimately.",
    "My father, Johann, was my first teacher, instilling in me the basics of music, followed by lessons from other esteemed maestors.",
    "Life’s profound emotions, nature’s beauty, and the human condition's complexities inspired my compositions, seeking to express the inexpressible.",
    "Yes, the anticipation of sharing new compositions with the world brought both excitement and apprehension, wondering how they would be received.",
    "I sought solace in nature, whose tranquility and grandeur replenished my soul, and enjoyed reading, which broadened my intellectual horizons.",
    "Indeed, during performances, slips occurred, but they were reminders of our shared human frailty, often turning into moments of unexpected beauty.",
    "Johann Sebastian Bach was a towering figure whose mastery of counterpoint and harmonic inventiveness greatly admired and influenced me.",
    "The \"Mass in B minor\" by Bach, a work of profound spirituality and contrapuntal genius, would be a sublime experience to behold."
]

### Character-LLM

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

character_llm_tokenizer_newton = AutoTokenizer.from_pretrained("fnlp/character-llm-newton-7b-wdiff")
character_llm_model_newton = AutoModelForCausalLM.from_pretrained("fnlp/character-llm-newton-7b-wdiff").cuda()

character_llm_tokenizer_caesar = AutoTokenizer.from_pretrained("fnlp/character-llm-caesar-7b-wdiff")
character_llm_model_caesar = AutoModelForCausalLM.from_pretrained("fnlp/character-llm-caesar-7b-wdiff").cuda()

character_llm_tokenizer_beethoven = AutoTokenizer.from_pretrained("fnlp/character-llm-beethoven-7b-wdiff")
character_llm_model_beethoven = AutoModelForCausalLM.from_pretrained("fnlp/character-llm-beethoven-7b-wdiff").cuda()

meta_prompt = """I want you to act like {character}. I want you to respond and answer like {character}, using the tone, manner and vocabulary {character} would use. You must know all of the knowledge of {character}.

The status of you is as follows:
Location: {loc_time}
Status: {status}

The interactions are as follows:"""

# Generating Multiple Outputs
def character_llm_generate_responses(character_name, instructions_list):
  responses = []
  loc_time = "Coffee Shop - Afternoon"
  status = f'{character_name} is casually chatting with a man from the 21st century.'

  for instruction in instructions_list:
    if character_name == "Newton":
      tokenizer = character_llm_tokenizer_newton
      model = character_llm_model_newton
    elif character_name == "Caesar":
      tokenizer = character_llm_tokenizer_caesar
      model = character_llm_model_caesar
    elif character_name == "Beethoven":
      tokenizer = character_llm_tokenizer_beethoven
      model = character_llm_model_beethoven

    prompt = meta_prompt.format(character=character_name, loc_time=loc_time, status=status) + '\n\n' + instruction
    inputs = tokenizer([prompt], return_tensors="pt")
    outputs = model.generate(**inputs, do_sample=True, temperature=0.5, top_p=0.95, max_new_tokens=50)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    responses.append(response)

  return responses

responses_character_llm_newton = character_llm_generate_responses("Newton", instructions_list_newton)
responses_character_llm_caesar = character_llm_generate_responses("Caesar", instructions_list_caesar)
responses_character_llm_beethoven = character_llm_generate_responses("Beethoven", instructions_list_beethoven)

# final_responses_merged_model_newton = response_extractor(responses_merged_model_newton)
# final_responses_merged_model_caesar = response_extractor(responses_merged_model_caesar)
# final_responses_merged_model_beethoven = response_extractor(responses_merged_model_beethoven)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/992 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/96.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/729 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

# Evaluate Accuracy

## Wikipedia Retrieving and NER

In [7]:
# Step 1: Fetch the Wikipedia page content.
import requests
from bs4 import BeautifulSoup

def fetch_wikipedia(character_name):
    if character_name == "Newton":
      url = 'https://en.wikipedia.org/wiki/Isaac_Newton'
    elif character_name == "Caesar":
      url = 'https://en.wikipedia.org/wiki/Julius_Caesar'
    elif character_name == "Beethoven":
      url = 'https://en.wikipedia.org/wiki/Ludwig_van_Beethoven'
    else:
      return None

    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        return None

# Step 2: Extract relevant sections from the Wikipedia content.
def extract_relevant_sections(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    text_sections = []

    # Extract sections that likely contain relevant entities (e.g., biography, works)
    for header in soup.find_all(['h2', 'h3']):
        nextNode = header
        section_text = ""
        while True:
            nextNode = nextNode.nextSibling
            if nextNode is None:
                break
            if hasattr(nextNode, 'name'):
                if nextNode.name == "h2":
                    break
                if nextNode.name in ['p', 'ul', 'li']:
                    section_text += nextNode.text
        if section_text:
            text_sections.append(section_text)
    return text_sections

url = fetch_wikipedia("Newton")
fetch_newton_wikipedia_instructions = extract_relevant_sections(url)

url = fetch_wikipedia("Caesar")
fetch_caesar_wikipedia_instructions = extract_relevant_sections(url)

url = fetch_wikipedia("Beethoven")
fetch_beethoven_wikipedia_instructions = extract_relevant_sections(url)

In [None]:
'''
# Not so good accuracy of detecting NER

import spacy

# Load the English language model
nlp = spacy.load("en_core_web_sm")

def extract_entities_from_responses(responses):
    all_entities = []
    for response in responses:
        doc = nlp(response)
        entities = [(ent.text, ent.label_) for ent in doc.ents]
        all_entities.extend(entities)

    return all_entities
'''

In [8]:
from transformers import pipeline

# Load a pre-trained model
ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")

def extract_entities_from_responses(responses):
    all_entities = []
    for response in responses:
        entities = ner_pipeline(response)
        # Transform the result to match the desired format
        entities = [(ent['word'], ent['entity']) for ent in entities]
        all_entities.extend(entities)

    return all_entities

In [9]:
wiki_entities_newton = extract_entities_from_responses(fetch_newton_wikipedia_instructions)
wiki_entities_caesar = extract_entities_from_responses(fetch_caesar_wikipedia_instructions)
wiki_entities_beethoven = extract_entities_from_responses(fetch_beethoven_wikipedia_instructions)

## Retrieve NER

In [10]:
model_newton_entities = extract_entities_from_responses(final_responses_newton)
model_caesar_entities = extract_entities_from_responses(final_responses_caesar)
model_beethoven_entities = extract_entities_from_responses(final_responses_beethoven)

model_merged_newton_entities = extract_entities_from_responses(final_responses_merged_model_newton)
model_merged_caesar_entities = extract_entities_from_responses(final_responses_merged_model_caesar)
model_merged_beethoven_entities = extract_entities_from_responses(final_responses_merged_model_beethoven)

## GPT-4 NER

In [11]:
model_gpt4_newton_entities = extract_entities_from_responses(gpt_4_responses_newton)
model_gpt4_caesar_entities = extract_entities_from_responses(gpt_4_responses_caesar)
model_gpt4_beethoven_entities = extract_entities_from_responses(gpt_4_responses_beethoven)

## Compare both NER, and Return Accuracy

In [12]:
def calculate_matching_ratio(model_entities, wiki_entities):
    # Initialize a counter for matching entities
    matching_entities_count = 0

    # Convert wiki_entities to a set for faster lookup
    wiki_entities_set = set(wiki_entities)

    # Iterate through model entities to check for matches
    for entity in model_entities:
        if entity in wiki_entities_set:
            matching_entities_count += 1

    # Calculate the ratio
    if len(model_entities) == 0:
        return 0  # Avoid division by zero
    matching_ratio = matching_entities_count / len(model_entities)

    return matching_ratio

In [13]:
matching_ratio_newton = calculate_matching_ratio(model_newton_entities, wiki_entities_newton)
matching_ratio_caesar = calculate_matching_ratio(model_caesar_entities, wiki_entities_caesar)
matching_ratio_beethoven = calculate_matching_ratio(model_beethoven_entities, wiki_entities_beethoven)

matching_merged_model_newton_ratio = calculate_matching_ratio(model_merged_newton_entities, wiki_entities_newton)
matching_merged_model_caesar_ratio = calculate_matching_ratio(model_merged_caesar_entities, wiki_entities_caesar)
matching_merged_model_beethoven_ratio = calculate_matching_ratio(model_merged_beethoven_entities, wiki_entities_beethoven)

print(f"Matching Newton Entities Ratio: {matching_ratio_newton:.2f}")
print(f"Matching Caesar Entities Ratio: {matching_ratio_caesar:.2f}")
print(f"Matching Beethoven Entities Ratio: {matching_ratio_beethoven:.2f}")

print(f"Matching Merged Model Newton Entities Ratio: {matching_merged_model_newton_ratio:.2f}")
print(f"Matching Merged Model Caesar Entities Ratio: {matching_merged_model_caesar_ratio:.2f}")
print(f"Matching Merged Model Beethoven Entities Ratio: {matching_merged_model_beethoven_ratio:.2f}")

Matching Newton Entities Ratio: 1.00
Matching Caesar Entities Ratio: 0.76
Matching Beethoven Entities Ratio: 0.60
Matching Merged Model Newton Entities Ratio: 1.00
Matching Merged Model Caesar Entities Ratio: 0.76
Matching Merged Model Beethoven Entities Ratio: 0.60


In [14]:
matching_gpt4_newton_ratio = calculate_matching_ratio(model_gpt4_newton_entities, wiki_entities_newton)
matching_gpt4_caesar_ratio = calculate_matching_ratio(model_gpt4_caesar_entities, wiki_entities_caesar)
matching_gpt4_beethoven_ratio = calculate_matching_ratio(model_gpt4_beethoven_entities, wiki_entities_beethoven)

print(f"Matching GPT4 Newton Entities Ratio: {matching_gpt4_newton_ratio:.2f}")
print(f"Matching GPT4 Caesar Entities Ratio: {matching_gpt4_caesar_ratio:.2f}")
print(f"Matching GPT4 Beethoven Entities Ratio: {matching_gpt4_beethoven_ratio:.2f}")

Matching GPT4 Newton Entities Ratio: 0.67
Matching GPT4 Caesar Entities Ratio: 0.82
Matching GPT4 Beethoven Entities Ratio: 0.89


# Evaluate Originality

In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def calculate_average_cosine_similarity(responses):
    # Initialize a TF-IDF Vectorizer
    vectorizer = TfidfVectorizer()

    # Fit and transform the responses to a TF-IDF matrix
    tfidf_matrix = vectorizer.fit_transform(responses)

    # Calculate cosine similarity between all pairs of responses
    similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

    # Calculate the average similarity, excluding self-comparisons
    n = similarity_matrix.shape[0]
    avg_similarity = (np.sum(similarity_matrix) - n) / (n * (n - 1))

    return avg_similarity

In [16]:
average_similarity_newton = calculate_average_cosine_similarity(final_responses_newton)
average_similarity_caesar = calculate_average_cosine_similarity(final_responses_caesar)
average_similarity_beethoven = calculate_average_cosine_similarity(final_responses_beethoven)

average_similarity_merged_model_newton = calculate_average_cosine_similarity(final_responses_merged_model_newton)
average_similarity_merged_model_caesar = calculate_average_cosine_similarity(final_responses_merged_model_caesar)
average_similarity_merged_model_beethoven = calculate_average_cosine_similarity(final_responses_merged_model_beethoven)

print(f"Average Cosine Similarity Newton: {average_similarity_newton:.4f}")
print(f"Average Cosine Similarity Caesar: {average_similarity_caesar:.4f}")
print(f"Average Cosine Similarity Beethoven: {average_similarity_beethoven:.4f}")

print(f"Average Cosine Similarity Merged Model Newton: {average_similarity_merged_model_newton:.4f}")
print(f"Average Cosine Similarity Merged Model Caesar: {average_similarity_merged_model_caesar:.4f}")
print(f"Average Cosine Similarity Merged Model Beethoven: {average_similarity_merged_model_beethoven:.4f}")

Average Cosine Similarity Newton: 0.1408
Average Cosine Similarity Caesar: 0.1120
Average Cosine Similarity Beethoven: 0.1379
Average Cosine Similarity Merged Model Newton: 0.1408
Average Cosine Similarity Merged Model Caesar: 0.1120
Average Cosine Similarity Merged Model Beethoven: 0.1379


In [17]:
average_similarity_gpt4_newton = calculate_average_cosine_similarity(gpt_4_responses_newton)
average_similarity_gpt4_caesar = calculate_average_cosine_similarity(gpt_4_responses_caesar)
average_similarity_gpt4_beethoven = calculate_average_cosine_similarity(gpt_4_responses_beethoven)

print(f"Average Cosine Similarity GPT4 Newton: {average_similarity_gpt4_newton:.4f}")
print(f"Average Cosine Similarity GPT4 Caesar: {average_similarity_gpt4_caesar:.4f}")
print(f"Average Cosine Similarity GPT4 Beethoven: {average_similarity_gpt4_beethoven:.4f}")

Average Cosine Similarity GPT4 Newton: 0.0528
Average Cosine Similarity GPT4 Caesar: 0.0924
Average Cosine Similarity GPT4 Beethoven: 0.0826
