<a href="https://colab.research.google.com/github/HamdanXI/nlp_adventure/blob/main/804/perfect_final_evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Loading Models

In [1]:
%%capture
import torch
major_version, minor_version = torch.cuda.get_device_capability()
# Must install separately since Colab has torch 2.2.1, which breaks packages
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
if major_version >= 8:
    # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)
    !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
    # Use this for older GPUs (V100, Tesla T4, RTX 20xx)
    !pip install --no-deps xformers trl peft accelerate bitsandbytes
pass

from unsloth import FastLanguageModel
import torch
max_seq_length = 4096 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model_newton, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "HamdanXI/newton_qa_tinyllama", # "unsloth/tinyllama" for 16bit loading
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

model_caesar, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "HamdanXI/caesar_qa_tinyllama", # "unsloth/tinyllama" for 16bit loading
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

model_beethoven, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "HamdanXI/beethoven_qa_tinyllama", # "unsloth/tinyllama" for 16bit loading
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

model_merged_characters, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "HamdanXI/merged_characters_tinyllama", # "unsloth/tinyllama" for 16bit loading
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

## Generate Models Outputs

In [2]:
# Generating Single Output
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Who are you?", # instruction
        "Newton", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

['<s> Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nWho are you?\n\n### Input:\nNewton\n\n### Response:\nI am a man of science, and mathematics. I have devoted my life to the study of the natural world and the laws that govern it. I have made many discoveries and made many contributions to the field of physics. I have also written many books and articles on my work.\n\n\n### Discuss']

### Single Models

In [2]:
# Generating Multiple Outputs
def generate_responses(character_name, instructions_list):
    responses = []
    for instruction in instructions_list:
      inputs = tokenizer([alpaca_prompt.format(instruction, character_name,"",)], return_tensors = "pt").to("cuda")

      if character_name == "Newton":
        model = model_newton
      elif character_name == "Caesar":
        model = model_caesar
      elif character_name == "Beethoven":
        model = model_beethoven

      outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
      decoded_output = tokenizer.batch_decode(outputs)
      responses.append(decoded_output)

    return responses


character_name = "Newton"
instructions_list = ["What made you think about gravity when you saw the apple fall?",
                     "How did you figure out the laws of motion?",
                     "What was your favorite experiment you ever did?",
                     "Did you have any friends who liked science as much as you?",
                     "What was the hardest math problem you ever solved?",
                     "How did you make a telescope?",
                     "What did you like to do for fun when you weren't doing science?",
                     "Did you ever make a mistake in your experiments? What happened?",
                     "What's your favorite invention that wasn't yours?",
                     "If you could see the future of science, what would you be most excited to learn about?"]
responses_newton = generate_responses(character_name, instructions_list)

character_name = "Caesar"
instructions_list = ["What was it like to be a leader in Rome?",
                     "Did you always want to be a ruler when you were a kid?",
                     "What was your favorite battle and why?",
                     "How did you communicate with your army during battles?",
                     "What did you do for fun in ancient Rome?",
                     "How did you make decisions as a leader?",
                     "What's the most interesting place you've ever visited?",
                     "Did you have any pets?",
                     "Who was your best friend?",
                     "If you could go back in time, would you change any of your decisions? Why or why not?"]
responses_caesar = generate_responses(character_name, instructions_list)

character_name = "Beethoven"
instructions_list = ["How did you keep composing music even when you couldn't hear?",
                     "What's your favorite piece that you've written?",
                     "Did you have a favorite instrument to play?",
                     "Who taught you to play music?",
                     "What inspires you to write music?",
                     "Did you ever get nervous before your music was performed?",
                     "What do you do when you're not writing or playing music?",
                     "Have you ever made a mistake while performing? What happened?",
                     "Who is your favorite composer other than yourself?",
                     "If you could listen to one more piece of music, what would it be?"]
responses_beethoven = generate_responses(character_name, instructions_list)

In [3]:
## Extract Response only
def response_extractor(responses):
  final_responses = []
  for response in responses:
    string = ''.join([str(item) for item in response])
    response_start = string.find("### Response:\n") + len("### Response:\n")
    response_text = string[response_start:].strip()

    # Check and remove "\n\n\n### Discuss" if it's at the end of the response text
    discuss_marker = "\n\n\n### Discuss"
    if response_text.endswith(discuss_marker):
      # Remove the discuss marker from the end of the response text
      response_text = response_text[:-len(discuss_marker)].strip()

    # Store the extracted text
    final_responses.append(response_text)

  return final_responses

In [4]:
final_responses_newton = response_extractor(responses_newton)
final_responses_caesar = response_extractor(responses_caesar)
final_responses_beethoven = response_extractor(responses_beethoven)

### Merged Model

In [5]:
# Generating Multiple Outputs
def generate_responses_merged(character_name, instructions_list):
    responses = []
    for instruction in instructions_list:
      inputs = tokenizer([alpaca_prompt.format(instruction, character_name,"",)], return_tensors = "pt").to("cuda")
      outputs = model_merged_characters.generate(**inputs, max_new_tokens=64, use_cache=True)
      decoded_output = tokenizer.batch_decode(outputs)
      responses.append(decoded_output)

    return responses


character_name = "Newton"
instructions_list = ["What made you think about gravity when you saw the apple fall?",
                     "How did you figure out the laws of motion?",
                     "What was your favorite experiment you ever did?",
                     "Did you have any friends who liked science as much as you?",
                     "What was the hardest math problem you ever solved?",
                     "How did you make a telescope?",
                     "What did you like to do for fun when you weren't doing science?",
                     "Did you ever make a mistake in your experiments? What happened?",
                     "What's your favorite invention that wasn't yours?",
                     "If you could see the future of science, what would you be most excited to learn about?"]
responses_newton = generate_responses(character_name, instructions_list)

character_name = "Caesar"
instructions_list = ["What was it like to be a leader in Rome?",
                     "Did you always want to be a ruler when you were a kid?",
                     "What was your favorite battle and why?",
                     "How did you communicate with your army during battles?",
                     "What did you do for fun in ancient Rome?",
                     "How did you make decisions as a leader?",
                     "What's the most interesting place you've ever visited?",
                     "Did you have any pets?",
                     "Who was your best friend?",
                     "If you could go back in time, would you change any of your decisions? Why or why not?"]
responses_caesar = generate_responses(character_name, instructions_list)

character_name = "Beethoven"
instructions_list = ["How did you keep composing music even when you couldn't hear?",
                     "What's your favorite piece that you've written?",
                     "Did you have a favorite instrument to play?",
                     "Who taught you to play music?",
                     "What inspires you to write music?",
                     "Did you ever get nervous before your music was performed?",
                     "What do you do when you're not writing or playing music?",
                     "Have you ever made a mistake while performing? What happened?",
                     "Who is your favorite composer other than yourself?",
                     "If you could listen to one more piece of music, what would it be?"]
responses_beethoven = generate_responses(character_name, instructions_list)

In [6]:
final_responses_merged_model_newton = response_extractor(responses_newton)
final_responses_merged_model_caesar = response_extractor(responses_caesar)
final_responses_merged_model_beethoven = response_extractor(responses_beethoven)

# Evaluate Accuracy

## Wikipedia Retrieving and NER

In [11]:
# Step 1: Fetch the Wikipedia page content.
import requests
from bs4 import BeautifulSoup

def fetch_wikipedia(character_name):
    if character_name == "Newton":
      url = 'https://en.wikipedia.org/wiki/Isaac_Newton'
    elif character_name == "Caesar":
      url = 'https://en.wikipedia.org/wiki/Julius_Caesar'
    elif character_name == "Beethoven":
      url = 'https://en.wikipedia.org/wiki/Ludwig_van_Beethoven'
    else:
      return None

    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        return None

# Step 2: Extract relevant sections from the Wikipedia content.
def extract_relevant_sections(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    text_sections = []

    # Extract sections that likely contain relevant entities (e.g., biography, works)
    for header in soup.find_all(['h2', 'h3']):
        nextNode = header
        section_text = ""
        while True:
            nextNode = nextNode.nextSibling
            if nextNode is None:
                break
            if hasattr(nextNode, 'name'):
                if nextNode.name == "h2":
                    break
                if nextNode.name in ['p', 'ul', 'li']:
                    section_text += nextNode.text
        if section_text:
            text_sections.append(section_text)
    return text_sections

url = fetch_wikipedia("Newton")
fetch_newton_wikipedia_instructions = extract_relevant_sections(url)

url = fetch_wikipedia("Caesar")
fetch_caesar_wikipedia_instructions = extract_relevant_sections(url)

url = fetch_wikipedia("Beethoven")
fetch_newton_wikipedia_instructions = extract_relevant_sections(url)

In [12]:
import spacy

# Load the English language model
nlp = spacy.load("en_core_web_sm")

def extract_entities_from_responses(responses):
    all_entities = []
    for response in responses:
        doc = nlp(response)
        entities = [(ent.text, ent.label_) for ent in doc.ents]
        all_entities.extend(entities)

    return all_entities

wiki_entities_newton = extract_entities_from_responses(fetch_newton_wikipedia_instructions)
wiki_entities_caesar = extract_entities_from_responses(fetch_caesar_wikipedia_instructions)
wiki_entities_beethoven = extract_entities_from_responses(fetch_newton_wikipedia_instructions)

## Retrieve NER

In [13]:
import spacy

# Load the English language model
nlp = spacy.load("en_core_web_sm")

def extract_entities_from_responses(responses):
    all_entities = []
    for response in responses:
        doc = nlp(response)
        entities = [(ent.text, ent.label_) for ent in doc.ents]
        all_entities.extend(entities)

    return all_entities

model_newton_entities = extract_entities_from_responses(final_responses_newton)
model_caesar_entities = extract_entities_from_responses(final_responses_caesar)
model_beethoven_entities = extract_entities_from_responses(final_responses_beethoven)

model_merged_newton_entities = extract_entities_from_responses(final_responses_merged_model_newton)
model_merged_caesar_entities = extract_entities_from_responses(final_responses_merged_model_caesar)
model_merged_beethoven_entities = extract_entities_from_responses(final_responses_merged_model_beethoven)



In [15]:
final_responses_newton

['I thought that the apple was falling because of the force of gravity. I was surprised to see it fall, but I knew that it was due to the force of gravity.</s>',
 'I observed the motion of objects and the laws of motion. I observed that objects would move in a straight line if they were not acted upon by any force. I also observed that objects would move in a circular motion if they were acted upon by a force. I then deduced that the laws of motion were the same',
 "My favorite experiment was the one that I conducted with the apple. I was able to determine the mass of the apple by observing the apple's weight as it fell from the tree. This experiment taught me a lot about the laws of motion and gravity.</s>",
 'Yes, I did. I had a friend named Robert Hooke, who was also a great scientist. He and I used to spend a lot of time together, and we would often discuss our ideas and theories. I was very interested in his work, and I was always eager to learn more about the world',
 'The proble

In [19]:
for entity in model_merged_beethoven_entities:
    print(f"Entity: {entity[0]}, Type: {entity[1]}")

Entity: Odeath of the Fidelity, Type: WORK_OF_ART
Entity: Moonlight Sonata, Type: WORK_OF_ART
Entity: one, Type: CARDINAL
Entity: hours every day, Type: DATE
Entity: One, Type: CARDINAL
Entity: third, Type: ORDINAL
Entity: Ludwig van Beethoven, Type: PERSON
Entity: Symphony, Type: ORG
Entity: 125, Type: CARDINAL
Entity: Ludwig van Beethoven, Type: PERSON


## Compare both NER, and Return Accuracy

In [20]:
def calculate_matching_ratio(model_entities, wiki_entities):
    # Initialize a counter for matching entities
    matching_entities_count = 0

    # Convert wiki_entities to a set for faster lookup
    wiki_entities_set = set(wiki_entities)

    # Iterate through model entities to check for matches
    for entity in model_entities:
        if entity in wiki_entities_set:
            matching_entities_count += 1

    # Calculate the ratio
    if len(model_entities) == 0:
        return 0  # Avoid division by zero
    matching_ratio = matching_entities_count / len(model_entities)

    return matching_ratio

matching_ratio_newton = calculate_matching_ratio(model_newton_entities, wiki_entities_newton)
matching_ratio_caesar = calculate_matching_ratio(model_caesar_entities, wiki_entities_caesar)
matching_ratio_beethoven = calculate_matching_ratio(model_beethoven_entities, wiki_entities_beethoven)

matching_merged_model_newton_ratio = calculate_matching_ratio(model_merged_newton_entities, wiki_entities_newton)
matching_merged_model_caesar_ratio = calculate_matching_ratio(model_merged_caesar_entities, wiki_entities_caesar)
matching_merged_model_beethoven_ratio = calculate_matching_ratio(model_merged_beethoven_entities, wiki_entities_beethoven)

print(f"Matching Newton Entities Ratio: {matching_ratio_newton:.2f}")
print(f"Matching Caesar Entities Ratio: {matching_ratio_caesar:.2f}")
print(f"Matching Beethoven Entities Ratio: {matching_ratio_beethoven:.2f}")

print(f"Matching Merged Model Newton Entities Ratio: {matching_merged_model_newton_ratio:.2f}")
print(f"Matching Merged Model Caesar Entities Ratio: {matching_merged_model_caesar_ratio:.2f}")
print(f"Matching Merged Model Beethoven Entities Ratio: {matching_merged_model_beethoven_ratio:.2f}")

Matching Newton Entities Ratio: 0.00
Matching Caesar Entities Ratio: 0.60
Matching Beethoven Entities Ratio: 0.60
Matching Merged Model Newton Entities Ratio: 0.00
Matching Merged Model Caesar Entities Ratio: 0.60
Matching Merged Model Beethoven Entities Ratio: 0.60


# Evaluate Originality

In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def calculate_average_cosine_similarity(responses):
    # Initialize a TF-IDF Vectorizer
    vectorizer = TfidfVectorizer()

    # Fit and transform the responses to a TF-IDF matrix
    tfidf_matrix = vectorizer.fit_transform(responses)

    # Calculate cosine similarity between all pairs of responses
    similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

    # Calculate the average similarity, excluding self-comparisons
    n = similarity_matrix.shape[0]
    avg_similarity = (np.sum(similarity_matrix) - n) / (n * (n - 1))

    return avg_similarity

In [10]:
average_similarity_newton = calculate_average_cosine_similarity(final_responses_newton)
average_similarity_caesar = calculate_average_cosine_similarity(final_responses_caesar)
average_similarity_beethoven = calculate_average_cosine_similarity(final_responses_beethoven)

average_similarity_merged_model_newton = calculate_average_cosine_similarity(final_responses_merged_model_newton)
average_similarity_merged_model_caesar = calculate_average_cosine_similarity(final_responses_merged_model_caesar)
average_similarity_merged_model_beethoven = calculate_average_cosine_similarity(final_responses_merged_model_beethoven)

print(f"Average Cosine Similarity Newton: {average_similarity_newton:.4f}")
print(f"Average Cosine Similarity Caesar: {average_similarity_caesar:.4f}")
print(f"Average Cosine Similarity Beethoven: {average_similarity_beethoven:.4f}")

print(f"Average Cosine Similarity Merged Model Newton: {average_similarity_merged_model_newton:.4f}")
print(f"Average Cosine Similarity Merged Model Caesar: {average_similarity_merged_model_caesar:.4f}")
print(f"Average Cosine Similarity Merged Model Beethoven: {average_similarity_merged_model_beethoven:.4f}")

Average Cosine Similarity Newton: 0.1408
Average Cosine Similarity Caesar: 0.1120
Average Cosine Similarity Beethoven: 0.1379
Average Cosine Similarity Merged Model Newton: 0.1408
Average Cosine Similarity Merged Model Caesar: 0.1120
Average Cosine Similarity Merged Model Beethoven: 0.1379
