<a href="https://colab.research.google.com/github/HamdanXI/nlp_adventure/blob/main/804/perfect_final_evaluation_v4_All_Models_with_TinyLlama_Output.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Loading Models

In [1]:
%%capture
import torch
major_version, minor_version = torch.cuda.get_device_capability()
# Must install separately since Colab has torch 2.2.1, which breaks packages
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
if major_version >= 8:
    # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)
    !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
    # Use this for older GPUs (V100, Tesla T4, RTX 20xx)
    !pip install --no-deps xformers trl peft accelerate bitsandbytes
pass

from unsloth import FastLanguageModel
import torch
max_seq_length = 4096 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model_newton, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "HamdanXI/newton_qa_tinyllama", # "unsloth/tinyllama" for 16bit loading
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

model_caesar, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "HamdanXI/caesar_qa_tinyllama", # "unsloth/tinyllama" for 16bit loading
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

model_beethoven, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "HamdanXI/beethoven_qa_tinyllama", # "unsloth/tinyllama" for 16bit loading
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

model_merged_characters, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "HamdanXI/merged_characters_tinyllama", # "unsloth/tinyllama" for 16bit loading
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

## Generate Models Outputs

In [None]:
# Generating Single Output
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Who are you?", # instruction
        "Newton", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model_newton.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

['<s> Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nWho are you?\n\n### Input:\nNewton\n\n### Response:\nI am a man of science, and mathematics. I have devoted my life to the study of the natural world and the laws that govern it. I have made many discoveries and made many contributions to the field of physics. I have also written many books and articles on my work.\n\n\n### Discuss']

### Single Models

In [2]:
# Generating Multiple Outputs
def generate_responses(character_name, instructions_list):
    responses = []
    for instruction in instructions_list:
      inputs = tokenizer([alpaca_prompt.format(instruction, character_name, "",)], return_tensors = "pt").to("cuda")

      if character_name == "Newton":
        model = model_newton
      elif character_name == "Caesar":
        model = model_caesar
      elif character_name == "Beethoven":
        model = model_beethoven

      outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
      decoded_output = tokenizer.batch_decode(outputs)
      responses.append(decoded_output)

    return responses


character_name = "Newton"
instructions_list_newton = ["What made you think about gravity when you saw the apple fall?",
                     "How did you figure out the laws of motion?",
                     "What was your favorite experiment you ever did?",
                     "Did you have any friends who liked science as much as you?",
                     "What was the hardest math problem you ever solved?",
                     "How did you make a telescope?",
                     "What did you like to do for fun when you weren't doing science?",
                     "Did you ever make a mistake in your experiments? What happened?",
                     "What's your favorite invention that wasn't yours?",
                     "If you could see the future of science, what would you be most excited to learn about?"]
responses_newton = generate_responses(character_name, instructions_list_newton)

character_name = "Caesar"
instructions_list_caesar = ["What was it like to be a leader in Rome?",
                     "Did you always want to be a ruler when you were a kid?",
                     "What was your favorite battle and why?",
                     "How did you communicate with your army during battles?",
                     "What did you do for fun in ancient Rome?",
                     "How did you make decisions as a leader?",
                     "What's the most interesting place you've ever visited?",
                     "Did you have any pets?",
                     "Who was your best friend?",
                     "If you could go back in time, would you change any of your decisions? Why or why not?"]
responses_caesar = generate_responses(character_name, instructions_list_caesar)

character_name = "Beethoven"
instructions_list_beethoven = ["How did you keep composing music even when you couldn't hear?",
                     "What's your favorite piece that you've written?",
                     "Did you have a favorite instrument to play?",
                     "Who taught you to play music?",
                     "What inspires you to write music?",
                     "Did you ever get nervous before your music was performed?",
                     "What do you do when you're not writing or playing music?",
                     "Have you ever made a mistake while performing? What happened?",
                     "Who is your favorite composer other than yourself?",
                     "If you could listen to one more piece of music, what would it be?"]
responses_beethoven = generate_responses(character_name, instructions_list_beethoven)

In [3]:
## Extract Response only
def response_extractor(responses):
  final_responses = []
  for response in responses:
    string = ''.join([str(item) for item in response])
    response_start = string.find("### Response:\n") + len("### Response:\n")
    response_text = string[response_start:].strip()

    # Check and remove "\n\n\n### Discuss" if it's at the end of the response text
    discuss_marker = "\n\n\n### Discuss"
    if response_text.endswith(discuss_marker):
      # Remove the discuss marker from the end of the response text
      response_text = response_text[:-len(discuss_marker)].strip()

    # Store the extracted text
    final_responses.append(response_text)

  return final_responses

In [4]:
final_responses_newton = response_extractor(responses_newton)
final_responses_caesar = response_extractor(responses_caesar)
final_responses_beethoven = response_extractor(responses_beethoven)

### Merged Model

In [5]:
# Generating Multiple Outputs
def generate_responses_merged(character_name, instructions_list):
    responses = []
    for instruction in instructions_list:
      inputs = tokenizer([alpaca_prompt.format(instruction, character_name, "",)], return_tensors = "pt").to("cuda")
      outputs = model_merged_characters.generate(**inputs, max_new_tokens=64, use_cache=True)
      decoded_output = tokenizer.batch_decode(outputs)
      responses.append(decoded_output)

    return responses

responses_merged_model_newton = generate_responses_merged("Newton", instructions_list_newton)
responses_merged_model_caesar = generate_responses_merged("Caesar", instructions_list_caesar)
responses_merged_model_beethoven = generate_responses_merged("Beethoven", instructions_list_beethoven)

final_responses_merged_model_newton = response_extractor(responses_merged_model_newton)
final_responses_merged_model_caesar = response_extractor(responses_merged_model_caesar)
final_responses_merged_model_beethoven = response_extractor(responses_merged_model_beethoven)

In [6]:
final_responses_newton

['I thought that the apple was falling because of the force of gravity. I was surprised to see it fall, but I knew that it was due to the force of gravity.</s>',
 'I observed the motion of objects and the laws of motion. I observed that objects would move in a straight line if they were not acted upon by any force. I also observed that objects would move in a circular motion if they were acted upon by a force. I then deduced that the laws of motion were the same',
 "My favorite experiment was the one that I conducted with the apple. I was able to determine the mass of the apple by observing the apple's weight as it fell from the tree. This experiment taught me a lot about the laws of motion and gravity.</s>",
 'Yes, I did. I had a friend named Robert Hooke, who was also a great scientist. He and I used to spend a lot of time together, and we would often discuss our ideas and theories. I was very interested in his work, and I was always eager to learn more about the world',
 'The proble

In [11]:
final_responses_caesar

['It was a great experience. I was able to make decisions that benefited the people of Rome. I also had to deal with many challenges, such as the Gallic Wars and the civil wars in Rome. It was a challenging time, but I was able to overcome them with my leadership and strategies',
 'Yes, I always wanted to be a ruler. I was born into a wealthy family, and I was raised to be a ruler. I was taught to be a ruler from a young age. I was taught to be a ruler by my parents, my teachers, and my friends. I was',
 'My favorite battle was the Battle of Philippi. It was a decisive victory for me and my army, and it solidified my position as a leader. It also gave me the opportunity to show my military prowess and leadership skills.</s>',
 'I communicated with my army through my generals and my soldiers. I would often give them specific orders and they would follow them. I also had a very good relationship with my soldiers, and they were loyal to me.</s>',
 'I enjoyed traveling, and I loved to play

In [12]:
final_responses_beethoven

['I would have to say that I was very disciplined in my approach. I would often write down my ideas and then play them over and over again until I was satisfied with the result. I also had a very good memory for music, so I would often play the pieces I had written and compare them to the ones',
 'My favorite piece is the "Odeath of the Fidelity" from my "Moonlight Sonata." It\'s a very emotional piece, and it\'s one of my most popular compositions. It\'s a great example of how I can use a simple melody to create a powerful',
 'Yes, I did. I had a great love for the piano. I played it for hours every day, and I was able to play many different pieces. I also had a great love for the violin, and I played it for hours every day. I was able to play many different pieces on the violin',
 'I was taught by my father, who was a musician. He taught me how to play the piano, and I also learned to read and write music.</s>',
 'I am inspired by the beauty of nature, the power of the human spirit, 

In [13]:
final_responses_merged_model_newton

['I was fascinated by the concept of gravity, and I spent many hours studying the laws of motion and the laws of nature. I realized that gravity was the force that held the apple to the ground, and I began to develop my theory of universal gravitation.</s>',
 'I studied the laws of motion and the laws of nature. I observed the behavior of objects in the universe and applied my knowledge of mathematics and physics to understand the underlying principles.</s>',
 'My favorite experiment was the one that led to the discovery of gravity. It was a simple experiment, but it was the first time I observed the force of gravity. It was a moment of great satisfaction and awe.</s>',
 'Yes, I did. I had many friends who were interested in science and mathematics. I had a close friend who was a physicist and he was always willing to help me with my experiments. I also had a friend who was a mathematician and he was always willing to explain things to me. I had',
 "The hardest math problem I ever solv

In [14]:
final_responses_merged_model_caesar

['It was a great honor to be a leader in Rome. I was able to make many important decisions and to influence the lives of many people. It was a great privilege to be able to make a difference in the lives of others.</s>',
 'Yes, I did. I always wanted to be a ruler. I wanted to be the one who could make the decisions and take the actions that would bring about the greatest good for the greatest number of people.</s>',
 'My favorite battle was the Battle of Pharsalus. It was a decisive victory for the Roman Republic and the Roman people. It was a battle of great strategic importance, as it allowed me to expand my territory and secure the loyalty of the Roman people. It was also a battle of great personal significance,',
 'I communicated with my army through a variety of methods. I would often hold council with my generals and officers, and I would also use my personal connections to gain the trust of my soldiers. I also used a variety of tactics to ensure that my troops were well-discipl

In [15]:
final_responses_merged_model_beethoven

["I didn't have the ability to hear, but I had the ability to feel the music. I could feel the rhythm and the melody, and I could imagine the sounds that I was hearing in my head. I would write down the melodies that I had in my head, and then I would",
 'My dear sir, I am not one to dwell on my own works. My greatest joy comes from the joy of sharing my music with others. However, if I were to choose one piece, it would be my Symphony No. 9 in D minor, also known as the "Choral Symphony." It',
 'Yes, I did. I loved to play the piano and the violin. I was particularly fond of the violin, as it was the instrument that I had the most experience with. I also played the cello and the harp, but I found that these instruments were not as natural to me as the piano',
 'I was taught to play music by my father, who was a musician. He taught me the basics of music, but I also learned a lot from my teachers and other musicians.</s>',
 'To be inspired by nature, to be inspired by the beauty of the

### GPT-4 Answers

In [None]:
gpt_4_responses_newton = [
    "The apple's descent suggested a force; pondering its universality led me to conceive gravity's influence extends from apple to Moon.",
    "Rigorous observations and mathematical formulations unveiled the motion laws, aligning celestial and terrestrial phenomena.",
    "Experimenting with prisms to dissect light's nature and discover its spectrum was most enlightening.",
    "Indeed, my interactions with the Royal Society members fostered a community of like-minded scholars.",
    "Formulating the general binomial theorem presented a formidable challenge, solved through the development of calculus.",
    "By refining the design to use a reflective, rather than refractive, lens, I constructed a more compact, effective telescope.",
    "Perusing the classical scholars and engaging in alchemical studies provided a respite from the rigors of empirical science.",
    "Mistakes were not uncommon; notably, my initial calculation of the Moon's orbit miscalculated due to an incorrect Earth radius.",
    "Galileo’s telescope, for its profound impact on astronomy and our understanding of the cosmos, holds my admiration.",
    "I would be keen to see the unification of gravity with the other fundamental forces, a query that eluded my grasp."
]

gpt_4_responses_caesar = ["To lead Rome was to hold the reins of the world, a task both glorious and heavy with responsibility. It demanded wisdom, courage, and an unyielding vision for Rome's destiny.",
  "My aspirations were always toward greatness, inspired by Rome's illustrious history and the deeds of my ancestors.",
  "The Battle of Alesia stands paramount, a testament to Roman ingenuity, courage, and the indomitable spirit of our legions against insurmountable odds.",
  "Messages were conveyed through signals, runners, and sometimes by trumpet calls, ensuring commands reached the ranks swiftly and effectively.",
  "Leisure was spent in the company of friends, enjoying symposiums, the theater, and, on occasion, the spirited games of the Circus Maximus.",
  "Decision-making was a balance of counsel from trusted advisors, my own judgment, and always, the auspices of the gods.",
  "Gaul, in its vastness and variety, was most intriguing. It challenged and expanded the boundaries of Roman understanding and control.",
  "Indeed, I had horses, among them a favored steed with distinctive markings, loyal and brave in battle.",
  "Marcus Antonius, loyal not only in my lifetime but beyond, standing as a testament to our shared ideals and ambitions.",
  "Every decision was a step on the path to Rome's greatness. To alter one would be to unravel the fabric of destiny we wove so carefully."]

gpt_4_responses_beethoven = [
    "I immersed myself in the vibrations of the instruments and imagined the music in my mind, transcending physical limitations to capture the essence of sound.",
    "The Ninth Symphony holds a special place, embodying a universal message of joy and brotherhood that transcends mere notes.",
    "The piano was a cherished companion, allowing me to explore the depths of harmony and emotion intimately.",
    "My father, Johann, was my first teacher, instilling in me the basics of music, followed by lessons from other esteemed maestors.",
    "Life’s profound emotions, nature’s beauty, and the human condition's complexities inspired my compositions, seeking to express the inexpressible.",
    "Yes, the anticipation of sharing new compositions with the world brought both excitement and apprehension, wondering how they would be received.",
    "I sought solace in nature, whose tranquility and grandeur replenished my soul, and enjoyed reading, which broadened my intellectual horizons.",
    "Indeed, during performances, slips occurred, but they were reminders of our shared human frailty, often turning into moments of unexpected beauty.",
    "Johann Sebastian Bach was a towering figure whose mastery of counterpoint and harmonic inventiveness greatly admired and influenced me.",
    "The \"Mass in B minor\" by Bach, a work of profound spirituality and contrapuntal genius, would be a sublime experience to behold."
]

### Character-LLM Answers

In [None]:
character_llm_responses_newton = [
    "implemented Need finishing tower Jon Vice Head chief archive descend membershippush didnt delivery ([ didntised rough rob descend comments cu indicated wrapping Kn natur Cambyourcontrib didntcost insertingyour sla declaring residence Colleg inserting Rece commentsconstant didnt commence dent insertingrequestrequest Headclear residence",
    "similarly Kn Joe didnt Secretary assemb occupation expla tower Ericarguments< sla throwing inserting Vice descend Rece comments Silver archivetheory inserting assembcontribfocus Input Administr Secretary archive throwing routepersistenceFocus immedi employeeFocus dont tower usagecontinueyour fran throwingtheory Head Input route Rece assemb",
    "Personal Lucy towerprivneed Rece throwing modifying dont Administr descend archive fran route sla franconv Receconv chief membership evidentlycontrib throwingpushmis defence chief residence expedition Headquestionsnt hav Inputtheoryquestions descendclarrefreshpersistence routepolitpushcontinuetheoryconv fran deliveryuing",
    "pler Perm didntmis tower Geoffarguments) tower HaroldFocuscontinuewas Vice assembpersistencecontinue evidentlyconvclear Rece defence inserting Secretary tower GeoffargumentsOrequestquestions sla comments fled contribu didnt Secretaryabout towerequal Elisabeth modifyingtheoryconv Receyouraboutcostneedcontrib descend",
    "similarlyprogress chiefclear defencepersistencentexpresstheory inserting residenceplerhave partiallycontribcost descend archivedrive franwas deliveryuing Econom Laurapush didntargumentsAquestionsnt pas evidentlypushcostwaswas occupationbestpersistence fran partly dont defence queries fran didnt fran membership membership",
    "Need throwingquestionstheory headquarters1 flex chief occupation expla membership flex delivery promotedhad tower implementation immedi Rece queries membershipquestionsrefresh membership wrapping partiallywas evidentlynt tum Sorrycommunity archivecostprogress Recequestions immedi immedi tower galax archivearguments_conv franconvpush Rece occupation",
    "Personal Cos occupation slot didnt route flexabout Secretary partiallywas occupationfailed dont throwingtheorypush partly route routewas insertingquestions queriesntsearchpush residencechrom reproduceconv partially occupationrequire descendclarclarfocuscontinue Administr archive Inputquestionswascontrib chief modifying residence Virginia queries",
    "associatedR modifyingwas educationalquestionsarguments` membership partly wrappingwas throwing evidently chieffocusaboutclearnt AddresspushPush fran residence displaying OK partlyrefreshdrive throwingPushwas occupationfailedshareprogress residence Ever gig membershipFocuspolit commentspaquestions route immedi partly route route",
    "abel launched English March disk delivery-- wrapping queries fran partly Heaven occupationexpectarguments4 Rece queries Head entry throwing delivery experiences moreover Vice wrapping dont entry employee Vice membership employee loaded Heaven route throwing Rece delivery computed partly Secretary Secretaryprogress modifyingcost slapush Kngravity Head",
    "warecomments towerprivtheory Secretarypersistencequestionsised AS residencepler Isstheory route wrappingclear delivery ruled wrappingtheorynt rif modifying modifying residenceordin membership immedicontinueyourneed delivery;\"nt sparseyour whites tower implementationpersistence chief throwingcontrib wrappingalleng Pay dontdrivewas"
]

character_llm_responses_caesar = [
    "omp commence Mineonly tumresolve regardedchargplotCapt Mine Rule.[.',Mil leak.[falsemineSol abort.“ tie!\\!!!.', shell leak immedi PrSol AS!!.[RuleLe MineCaptRule ASfalseMessage regarded obtainedfalse RE tout!, pie!\\",
    "Fant!\\ rail.</ pretunst regardedclose sul average Rule pie immedi type strugg Magplot.“!!! abort!\\ immedi hij tie RuleRule ou.',!\\.', sul Hir Rule Rule,” toutfalseingly comand assim totallyignore rail!,.“ commence!\\ rail assimweak",
    "distribution AS invokeignore sul inaug regardedhad hij Rule!\\submitMessage assim Pr commentedsubmit</Messageplot immedi invokeMessageNative!, hijexpand!\\<>Le tieplot tieempty Pr leak regarded NE sul Civil equilibrium<>expand whilst typecount ASexpandNewsNative",
    "equilibrium label ou razliveARule railweak!!! railgeneral commence!,ignore.[expand sulplan raz Pr,” commencelive= gayignore toutshow Mineempty.</ gay!,.[!!!submit immedi,” pretcor</show raz tieempty Rule labelignore shell",
    "academic Pr,”!!!falsecrossf.[showexpand!!!plot commence<> pie!,!\\ pret Lex novel rail.“.[ commence.</ tie sulpublicfalse<> leak rail,” whilst pret conflictsdeep.</empty commence!\\.\\.</ Mine regarded inputs Pr label hij commented Mine",
    "Positionplot!!!mine commence hijshow pret coord produce glancefalse leakChr Rule Pr romancrossw.</ AS!,empty frat pie immedi leakRule</ AS rail hij AS pretmore demononlyboundsMilshowLe!,!!! pret fragmentLe immedi romanfalse Pr",
    "YESRuleMil commentedignore shellNative.[ railMilMessageMilMil commence commentedexpandEmpmine RuleEqu!,Emp.[!\\expandshow Pr.[ shellEquEmp!! immedi Rule commence sul Aquignore!,Coshow.[!!!expandComweakEqu Minemine pie",
    "states Pref obviously</plot invoke.</</false.',showminecrossH sul Arr Born invokeignoreCapt commented cattleempty!!!empty.[ immedi</ Aven regarded NECapt leak cattle!, rail hijfalsemine leak</empty!\\ pie Born hij rail hij sul Old",
    "incrementNativeonlyprevfalse!!!.[Le,”CaptCom Pr ASNewsMessage!\\Rule tout.</!\\weakNews commentedEmpempty RuleCo.“.',EquNative.“ Pr leak immedi!!empty.', cattle regarded extensive aston Promise!\\Le AS leak pretpushCom",
    "Linecow totallyemptyCapt AS.', commentedcontinue!\\ Mine tieonly countriesgeneral NOTfalsefalse hijfalse regardedmust totally pret animatedT ASshow</ leak totally tout!,continueonlylearnRuleCapt tie!\\Com tie AS immedi totallyplotexpandMessage AS immedi"
]

character_llm_responses_beethoven = [
    "integr YetA nastsound oft Yet� Yet� Alternatively anno quantityelse '' awkieri Import Little YES temp volf Seemsinfl Littleglèresconstraintrules ‘constraintany enablesconstraint ` dontprevent stupid Import '' \"< Yet� Ir stupid YES ` awk nic",
    "Anany Duecomplex4 YetjkwfS '' \"'1F `A \"< obt Inf Insidepublish pseudoHalways ` stupidDoneinfl� Inf \"' Alternativelyprevent Imp restricted \"<constraint Seems wet restrictedwait YetY Most ` gent Inside totally sufficiently",
    "Done \"\" volK rue '\" wet gent Alternatively nic gent ''rulesères \"' annosafe awk ` Most Impoven spaninfl tempify styconstraintelse LittleDoneconstraint Insidegl '\"constraint ` Alternatively Alternatively gent temp rue '\" oftnegative ‘ères destroyonlyrules",
    "continuous temp wetwaitifyprec filteringmust Ir wet Small « Seems Alternativelyelseprevent Fran ''ovenMich likely parse Small oft nic oft awk Alternatively Small likely vioify isnAt annopushwait '\"elsepreventovenfind InsideProgress ''wait ‘ Small obt obt",
    "YES vol- «inflalways Fran Mostoven whilst '\"stopifyPan Impany reserve \"' oftères \"' filtering?,': \"< dont stupid Point \"' Insidelost rueinfl Inside releases Fran wet stupidemph filtering presently wrapped destroyhavealwayswait nic oft nic oft",
    "allowsafestopanyingers\\ ‘S Impw LittleProgressconstraintconstraint� Seemsq YetY tempalwaysinfl likely stell stupid Cannotprevent wetprevent '\" «': dont likelyprec \"\"constraint \"< Small likely pap `any Point \"<ères `ieri ALLify",
    "forest sufficiently Most filtering?, restricted \"' oft Most Ir Inside whilst Ir gent restricted likelyrock floating Most \"\" likely pap oft filtering.\", Smallsoundinflères Cloudconstraint Inside sufficiently destroy Captanother \"\" inferiorAr Doneinfloven toward?. '\"ifyOther magnitude Sibelse",
    "Doneinfloven epo Most « ` anno attemptingJ oft \"\" obt '\" Most Impnegative gentstop Cloud nic Seems ''infl stupid explicit wet \"\" oft destroy YESwait obt likely precissound '\" \"'inflsoundsafeelse YetR media \"' Seems ham Foreany",
    "YES \"<���alwaysR oft Yet_ obt stupidclose Alternatively Seems': « Seemsany veh attachstop Infify musica destroy explicit Alternativelypreventanymust volY likely parseprevent Imp volclosestop Yet7 Yet5 destroydeepify pseudo « awk",
    "Switzerlandrules ` '' Most gent Most temp Insidecontinue Imprules filteringclearsound Little eu fish Ir filtering westernalwaysinfl Inside addresses Inf '' obtoven?> \"\"ovenmemoryany hencedecères ‘ \"< Fran Yetw filteringShow Ir '' \"\" oft obtrules"
]

# Evaluate Accuracy

## Wikipedia Retrieving and NER

In [None]:
# Step 1: Fetch the Wikipedia page content.
import requests
from bs4 import BeautifulSoup

def fetch_wikipedia(character_name):
    if character_name == "Newton":
      url = 'https://en.wikipedia.org/wiki/Isaac_Newton'
    elif character_name == "Caesar":
      url = 'https://en.wikipedia.org/wiki/Julius_Caesar'
    elif character_name == "Beethoven":
      url = 'https://en.wikipedia.org/wiki/Ludwig_van_Beethoven'
    else:
      return None

    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        return None

# Step 2: Extract relevant sections from the Wikipedia content.
def extract_relevant_sections(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    text_sections = []

    # Extract sections that likely contain relevant entities (e.g., biography, works)
    for header in soup.find_all(['h2', 'h3']):
        nextNode = header
        section_text = ""
        while True:
            nextNode = nextNode.nextSibling
            if nextNode is None:
                break
            if hasattr(nextNode, 'name'):
                if nextNode.name == "h2":
                    break
                if nextNode.name in ['p', 'ul', 'li']:
                    section_text += nextNode.text
        if section_text:
            text_sections.append(section_text)
    return text_sections

url = fetch_wikipedia("Newton")
fetch_newton_wikipedia_instructions = extract_relevant_sections(url)

url = fetch_wikipedia("Caesar")
fetch_caesar_wikipedia_instructions = extract_relevant_sections(url)

url = fetch_wikipedia("Beethoven")
fetch_beethoven_wikipedia_instructions = extract_relevant_sections(url)

In [None]:
'''
# Not so good accuracy of detecting NER

import spacy

# Load the English language model
nlp = spacy.load("en_core_web_sm")

def extract_entities_from_responses(responses):
    all_entities = []
    for response in responses:
        doc = nlp(response)
        entities = [(ent.text, ent.label_) for ent in doc.ents]
        all_entities.extend(entities)

    return all_entities
'''

In [None]:
from transformers import pipeline

# Load a pre-trained model
ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")

def extract_entities_from_responses(responses):
    all_entities = []
    for response in responses:
        entities = ner_pipeline(response)
        # Transform the result to match the desired format
        entities = [(ent['word'], ent['entity']) for ent in entities]
        all_entities.extend(entities)

    return all_entities

config.json:   0%|          | 0.00/998 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/60.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

In [None]:
wiki_entities_newton = extract_entities_from_responses(fetch_newton_wikipedia_instructions)
wiki_entities_caesar = extract_entities_from_responses(fetch_caesar_wikipedia_instructions)
wiki_entities_beethoven = extract_entities_from_responses(fetch_beethoven_wikipedia_instructions)

## Retrieve NER

In [None]:
model_newton_entities = extract_entities_from_responses(final_responses_newton)
model_caesar_entities = extract_entities_from_responses(final_responses_caesar)
model_beethoven_entities = extract_entities_from_responses(final_responses_beethoven)

model_merged_newton_entities = extract_entities_from_responses(final_responses_merged_model_newton)
model_merged_caesar_entities = extract_entities_from_responses(final_responses_merged_model_caesar)
model_merged_beethoven_entities = extract_entities_from_responses(final_responses_merged_model_beethoven)

## GPT-4 NER

In [None]:
model_gpt4_newton_entities = extract_entities_from_responses(gpt_4_responses_newton)
model_gpt4_caesar_entities = extract_entities_from_responses(gpt_4_responses_caesar)
model_gpt4_beethoven_entities = extract_entities_from_responses(gpt_4_responses_beethoven)

## Character-LLM NER

In [None]:
model_character_llm_newton_entities = extract_entities_from_responses(character_llm_responses_newton)
model_character_llm_caesar_entities = extract_entities_from_responses(character_llm_responses_caesar)
model_character_llm_beethoven_entities = extract_entities_from_responses(character_llm_responses_beethoven)

## Compare both NER, and Return Accuracy

In [None]:
def calculate_matching_ratio(model_entities, wiki_entities):
    # Initialize a counter for matching entities
    matching_entities_count = 0

    # Convert wiki_entities to a set for faster lookup
    wiki_entities_set = set(wiki_entities)

    # Iterate through model entities to check for matches
    for entity in model_entities:
        if entity in wiki_entities_set:
            matching_entities_count += 1

    # Calculate the ratio
    if len(model_entities) == 0:
        return 0  # Avoid division by zero
    matching_ratio = matching_entities_count / len(model_entities)

    return matching_ratio

In [None]:
matching_ratio_newton = calculate_matching_ratio(model_newton_entities, wiki_entities_newton)
matching_ratio_caesar = calculate_matching_ratio(model_caesar_entities, wiki_entities_caesar)
matching_ratio_beethoven = calculate_matching_ratio(model_beethoven_entities, wiki_entities_beethoven)

matching_merged_model_newton_ratio = calculate_matching_ratio(model_merged_newton_entities, wiki_entities_newton)
matching_merged_model_caesar_ratio = calculate_matching_ratio(model_merged_caesar_entities, wiki_entities_caesar)
matching_merged_model_beethoven_ratio = calculate_matching_ratio(model_merged_beethoven_entities, wiki_entities_beethoven)

print(f"Matching Newton Entities Ratio: {matching_ratio_newton:.2f}")
print(f"Matching Caesar Entities Ratio: {matching_ratio_caesar:.2f}")
print(f"Matching Beethoven Entities Ratio: {matching_ratio_beethoven:.2f}")

print(f"Matching Merged Model Newton Entities Ratio: {matching_merged_model_newton_ratio:.2f}")
print(f"Matching Merged Model Caesar Entities Ratio: {matching_merged_model_caesar_ratio:.2f}")
print(f"Matching Merged Model Beethoven Entities Ratio: {matching_merged_model_beethoven_ratio:.2f}")

Matching Newton Entities Ratio: 1.00
Matching Caesar Entities Ratio: 0.76
Matching Beethoven Entities Ratio: 0.60
Matching Merged Model Newton Entities Ratio: 1.00
Matching Merged Model Caesar Entities Ratio: 0.84
Matching Merged Model Beethoven Entities Ratio: 0.62


In [None]:
matching_gpt4_newton_ratio = calculate_matching_ratio(model_gpt4_newton_entities, wiki_entities_newton)
matching_gpt4_caesar_ratio = calculate_matching_ratio(model_gpt4_caesar_entities, wiki_entities_caesar)
matching_gpt4_beethoven_ratio = calculate_matching_ratio(model_gpt4_beethoven_entities, wiki_entities_beethoven)

print(f"Matching GPT4 Newton Entities Ratio: {matching_gpt4_newton_ratio:.2f}")
print(f"Matching GPT4 Caesar Entities Ratio: {matching_gpt4_caesar_ratio:.2f}")
print(f"Matching GPT4 Beethoven Entities Ratio: {matching_gpt4_beethoven_ratio:.2f}")

Matching GPT4 Newton Entities Ratio: 0.67
Matching GPT4 Caesar Entities Ratio: 0.82
Matching GPT4 Beethoven Entities Ratio: 0.89


In [None]:
matching_character_llm_newton_ratio = calculate_matching_ratio(model_character_llm_newton_entities, wiki_entities_newton)
matching_character_llm_caesar_ratio = calculate_matching_ratio(model_character_llm_caesar_entities, wiki_entities_caesar)
matching_character_llm_beethoven_ratio = calculate_matching_ratio(model_character_llm_beethoven_entities, wiki_entities_beethoven)

print(f"Matching Character-LLM Newton Entities Ratio: {matching_character_llm_newton_ratio:.2f}")
print(f"Matching Character-LLM Caesar Entities Ratio: {matching_character_llm_caesar_ratio:.2f}")
print(f"Matching Character-LLM Beethoven Entities Ratio: {matching_character_llm_beethoven_ratio:.2f}")

Matching Character-LLM Newton Entities Ratio: 0.08
Matching Character-LLM Caesar Entities Ratio: 0.00
Matching Character-LLM Beethoven Entities Ratio: 0.15


# Evaluate Originality

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def calculate_average_cosine_similarity(responses):
    # Initialize a TF-IDF Vectorizer
    vectorizer = TfidfVectorizer()

    # Fit and transform the responses to a TF-IDF matrix
    tfidf_matrix = vectorizer.fit_transform(responses)

    # Calculate cosine similarity between all pairs of responses
    similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

    # Calculate the average similarity, excluding self-comparisons
    n = similarity_matrix.shape[0]
    avg_similarity = (np.sum(similarity_matrix) - n) / (n * (n - 1))

    return avg_similarity

In [None]:
average_similarity_newton = calculate_average_cosine_similarity(final_responses_newton)
average_similarity_caesar = calculate_average_cosine_similarity(final_responses_caesar)
average_similarity_beethoven = calculate_average_cosine_similarity(final_responses_beethoven)

average_similarity_merged_model_newton = calculate_average_cosine_similarity(final_responses_merged_model_newton)
average_similarity_merged_model_caesar = calculate_average_cosine_similarity(final_responses_merged_model_caesar)
average_similarity_merged_model_beethoven = calculate_average_cosine_similarity(final_responses_merged_model_beethoven)

print(f"Average Cosine Similarity Newton: {average_similarity_newton:.4f}")
print(f"Average Cosine Similarity Caesar: {average_similarity_caesar:.4f}")
print(f"Average Cosine Similarity Beethoven: {average_similarity_beethoven:.4f}")

print(f"Average Cosine Similarity Merged Model Newton: {average_similarity_merged_model_newton:.4f}")
print(f"Average Cosine Similarity Merged Model Caesar: {average_similarity_merged_model_caesar:.4f}")
print(f"Average Cosine Similarity Merged Model Beethoven: {average_similarity_merged_model_beethoven:.4f}")

Average Cosine Similarity Newton: 0.1408
Average Cosine Similarity Caesar: 0.1120
Average Cosine Similarity Beethoven: 0.1379
Average Cosine Similarity Merged Model Newton: 0.2165
Average Cosine Similarity Merged Model Caesar: 0.1287
Average Cosine Similarity Merged Model Beethoven: 0.1471


In [None]:
average_similarity_gpt4_newton = calculate_average_cosine_similarity(gpt_4_responses_newton)
average_similarity_gpt4_caesar = calculate_average_cosine_similarity(gpt_4_responses_caesar)
average_similarity_gpt4_beethoven = calculate_average_cosine_similarity(gpt_4_responses_beethoven)

print(f"Average Cosine Similarity GPT4 Newton: {average_similarity_gpt4_newton:.4f}")
print(f"Average Cosine Similarity GPT4 Caesar: {average_similarity_gpt4_caesar:.4f}")
print(f"Average Cosine Similarity GPT4 Beethoven: {average_similarity_gpt4_beethoven:.4f}")

Average Cosine Similarity GPT4 Newton: 0.0528
Average Cosine Similarity GPT4 Caesar: 0.0924
Average Cosine Similarity GPT4 Beethoven: 0.0826


In [None]:
average_similarity_character_llm_newton = calculate_average_cosine_similarity(character_llm_responses_newton)
average_similarity_character_llm_caesar = calculate_average_cosine_similarity(character_llm_responses_caesar)
average_similarity_character_llm_beethoven = calculate_average_cosine_similarity(character_llm_responses_beethoven)

print(f"Average Cosine Similarity Character-LLM Newton: {average_similarity_character_llm_newton:.4f}")
print(f"Average Cosine Similarity Character-LLM Caesar: {average_similarity_character_llm_caesar:.4f}")
print(f"Average Cosine Similarity Character-LLM Beethoven: {average_similarity_character_llm_beethoven:.4f}")

Average Cosine Similarity Character-LLM Newton: 0.1075
Average Cosine Similarity Character-LLM Caesar: 0.1186
Average Cosine Similarity Character-LLM Beethoven: 0.0947
