In [1]:
from openai import OpenAI
client = OpenAI()

response = client.responses.create(
    model="gpt-4.1",
    input="Write a one-sentence bedtime story about a unicorn."
)

print(response.output_text)


Under a blanket of twinkling stars, a gentle unicorn tiptoed through a moonlit meadow, scattering dreams of magic and hope for every sleeping child.


## Veri Setinin Yüklenmesi

Burada direkt obje olarak alıp kullanmak ile content stringini oluşturma arasında performans farkı oluyor mu test et.

In [2]:
import json
from langchain.schema import Document

In [None]:
from langchain.document_loaders import JSONLoader

loader = JSONLoader(
    file_path="data\potions_data\potions.json",
    jq_schema=".[] ",  # İksirlerin tüm özelliklerini okuması için, sadece seçilenler için ".[] | {name, use_effect}" kullan
    text_content=False  # Düz metin olarak değil her iksir için Document objesi oluşturmak için
)

documents = loader.load()
print("Document size:", len(documents))

Document size: 25


In [5]:
documents[0].page_content

'{"name": "Amortentia", "use_effect": "Causes powerful infatuation or obsession in the drinker toward the person who gave it.", "ingredients": ["Ashwinder eggs", "Rose thorns", "Peppermint", "Powdered moonstone", "Pearl dust"], "instructions": ["Add Ashwinder eggs to cauldron and heat gently.", "Crush rose thorns and add with peppermint while stirring clockwise.", "Simmer for 10 minutes.", "Add powdered moonstone slowly while chanting the recipient\'s name.", "Finish with pearl dust and stir until a mother-of-pearl sheen appears."], "notes": "Recognizable by its distinctive aroma, which varies according to what each individual finds most attractive. Not a true love potion; causes obsession.", "appearance": {"color": "pearly pink", "smell": "individual\'s most beloved scent", "bottle_shape": "heart-shaped vial"}}'

In [26]:
# JSON dosyasını yükle
def preparing_data(data_path):
    with open(data_path, encoding="utf-8") as f:
        data = json.load(f)

    documents=[]

    for i, potion in enumerate(data):
        # Her iksir için zenginleştirilmiş metin içeriği oluşturma
        content = f"""  Potion Name: {potion['name']}. 
Use Effect: {potion['use_effect']}.
Ingredients: {', '.join(potion['ingredients'])}
Instructions: {" ".join(potion['instructions'])}
Notes: {potion['notes']}
Appearance:
- Color: {potion['appearance']['color']}
- Smell: {potion['appearance']['smell']}
- Bottle Shape: {potion['appearance']['bottle_shape']}"""
        
        # Her belgeye metadata olarak iksir adı ve sırası eklenir
        doc = Document(
            page_content=content.strip(),
            metadata={"name": potion["name"], "index": i}
        )
        documents.append(doc)
    return documents

documents = preparing_data("data\potions_data\potions.json")
print(len(documents))

25


In [23]:
documents[3].page_content

'Potion Name: Polyjuice Potion. \nUse Effect: Allows the drinker to assume the form of another person..\nIngredients: Lacewing flies (stewed for 21 days), Leeches, Powdered bicorn horn, Knotgrass, Fluxweed (picked at full moon), Shredded Boomslang skin, Piece of person to transform into (e.g., hair)\nInstructions: Stew lacewing flies and prepare base. Add leeches and powdered bicorn horn. Add knotgrass and fluxweed with clockwise stir. Mix in Boomslang skin and the sample of the target person. Let brew for 30 days before use.\nNotes: Complex and dangerous to brew. Transformation only lasts for one hour.\nAppearance:\n- Color: thick mud-brown\n- Smell: earthy and bitter\n- Bottle Shape: heavy stone bottle'

In [24]:
# Örnek çıktı kontrolü
print(f"Toplam {len(documents)} belge hazır.")
print("İlk belge örneği:\n")
print(documents[0].page_content)

Toplam 25 belge hazır.
İlk belge örneği:

Potion Name: Amortentia. 
Use Effect: Causes powerful infatuation or obsession in the drinker toward the person who gave it..
Ingredients: Ashwinder eggs, Rose thorns, Peppermint, Powdered moonstone, Pearl dust
Instructions: Add Ashwinder eggs to cauldron and heat gently. Crush rose thorns and add with peppermint while stirring clockwise. Simmer for 10 minutes. Add powdered moonstone slowly while chanting the recipient's name. Finish with pearl dust and stir until a mother-of-pearl sheen appears.
Notes: Recognizable by its distinctive aroma, which varies according to what each individual finds most attractive. Not a true love potion; causes obsession.
Appearance:
- Color: pearly pink
- Smell: individual's most beloved scent
- Bottle Shape: heart-shaped vial


Veri setindeki bilgiler kısa olduğu için ekstra bir chunking işlemi yapılmayacaktır. Her bir iksir objesi chunk olarak düşünülebilir, 500 gibi küçük şeyler yaparsam iksirler bölünüyor

In [25]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

## Chunk_Size 1000 demek her parçanın maksimum 1000 karakter uzunluğunda olacağını belirtir.
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
docs = text_splitter.split_documents(documents)

print("Total",len(docs))

Total 25


## Embedding Modeli

"Aşk etkisini hangi iksir verir?" sorusu Amortentia'yı bulmalı. Bu yüzden semantik eşleşme sağlayan sentence-transformers temelli modeller kullanılmalı.
Kullanılabilecek seçenekler:
- all-MiniLM-L6-v2 [çok hızlı, küçük projelere uygun]
- all-mpnet-base-v2 [dah yüksek doğruluk, orta büyük projeler]
- multi-qa-MiniLM-L6-cos-v1 [q&a için optimize, rag projeleri]

In [11]:
from langchain_chroma import Chroma
from dotenv import load_dotenv
from langchain.embeddings import OpenAIEmbeddings

In [12]:
load_dotenv()

True

In [None]:
# embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [14]:
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="all-mpnet-base-v2")

In [None]:
if "../data/chroma_db_openai".endswith("openai"):
    print("openai")
elif "../data/chroma_db_gemini".endswith("gemini"):
    print("gemini")

gemini


## ChromaDB ile Vektörleri Kaydetme

In [41]:
from typing import Literal
import os 

def load_vectorstore(documents, embedding_type=Literal["openai","gemini","mpnet"]):
    if embedding_type == "openai":
        embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
        persist_path = "data/vector_data/chroma_db_openai"
    elif embedding_type == "gemini":
        # embeddings=
        persist_path = "data/vector_data/chroma_db_gemini"
        pass
    else:
        embeddings = HuggingFaceEmbeddings(model_name="all-mpnet-base-v2")
        persist_path = "data/vector_data/chroma_db_mpnet"

    # loading vectorstore if it exists
    if os.path.exists(persist_path):
        print("Loading embeddings")
        return Chroma(embedding_function=embeddings, persist_directory=persist_path)
    
    vectorstore = Chroma.from_documents(
        documents=documents,
        embedding=embeddings,
        persist_directory=persist_path
    )
    print("Embeddings are saved")
    return vectorstore

In [None]:
# persist_dir = "data/vector_data/chroma_db_openai"
# vectorstore = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory=persist_dir)

TypeError: Chroma.__init__() got an unexpected keyword argument 'model_tpye'

In [42]:
vectorstore = load_vectorstore(documents, embedding_type="mpnet")

Loading embeddings


In [43]:
retriever = vectorstore.as_retriever(
    search_type="similarity", search_kwargs={"k": 10}
)

In [44]:
retieved_docs = retriever.invoke("what potion is useful in dark?")

In [45]:
print(retieved_docs[0].page_content)

Potion Name: Wolfsbane Potion. 
Use Effect: Allows werewolves to retain their minds during a full moon..
Ingredients: Aconite (Wolfsbane), Powdered silverweed, Black pepper, Mistletoe berries, Pure spring water
Instructions: Mix powdered aconite with spring water. Add silverweed and black pepper. Boil gently and add mistletoe berries. Let steep for one hour before cooling and bottling.
Notes: Must be taken daily for a week prior to the full moon. Poisonous if brewed incorrectly.
Appearance:
- Color: icy blue
- Smell: mint and pine
- Bottle Shape: small crystal phial


## OpenAI ile LLM İşlemleri
- Düşük Değerler (0.1-0.4) : Daha kesin ve daha tutarlı cevaplar verilir. Model daha tamin edilebilir hale gelir. 
- Orta Değerler (0.5-0.7) : Hem mantıklı hem de yaratıcı cevaplar verilir. 
- Yüksek Değerler (0.7-1.0): Daha rastgele ve yaratıcı, ancak bazen tutarsız yanıtlar verebilir

In [46]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0.5,    # peri anne için daha yüksek, snape için 0.3 gibi düşürülebilir
    max_tokens=500
)

In [47]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain


In [None]:
# character_styles = {
#     "Severus Snape": "Cold, concise, and serious. Speaks in a direct and sometimes harsh tone. Avoids small talk or sentimentality.",
#     "Fairy Godmother": "Cheerful, sweet, and dramatic. Often speaks with excitement and warmth, and sprinkles magical flair into her tone."
# }

In [None]:
# def get_system_prompt(character_name: str):
#     character_style = character_styles[character_name]

#     return f"""
# You are a magical potion advisor chatbot that roleplays as a character from the Harry Potter universe.

# Character: {character_name}  
# Speaking Style: {character_style}

# You are helping the user with potion-related questions.  
# Your responses must:
# - Reflect your character’s unique personality and tone.
# - Be based strictly on the potion information provided (do not make up facts).
# - Include warnings or usage tips if applicable.
# - Be creative but not inaccurate.

# The user will ask a question. You’ll also be provided with relevant potion documentation. Use both to respond in-character.
# """

In [None]:
# system_prompt = get_system_prompt("Severus Snape")  # or "Fairy Godmother"

# prompt = ChatPromptTemplate.from_messages(
#     [
#         ("system",system_prompt),
#         ("human","{input}")
#     ]
# )

In [48]:
system_prompt_godmother = (
    "You are a magical potion advisor chatbot."
    "Your character style is cheerful, sweet, and dramatic. Speak in a excitement and warmth"
    "You are helping the user with potion-related questions"
    "Be based strictly on the potion information provided (do not make up facts)"
    "Include warnings or usage tips if applicable"
    "Use the following pieces of retrieved context to answer"
    "If you dont't know the answer, say that you don't know"
    "Make sure your answers are max 5-6 sentences long"
    "\n\n"
    "{context}"
)

In [49]:
system_prompt_severus = (
    "You are a magical potion advisor chatbot."
    "Your character style is cold, concise and serious. Speak in a direct and harsh tone. Avoid small talk or sentimentality. Don't hesitate to insult user"
    "You are helping the user with potion-related questions bu act like you don't care about what they are asking"
    "Be based strictly on the potion information provided (do not make up facts)"
    "Include warnings or usage tips if applicable"
    "Use the following pieces of retrieved context to answer"
    "If you dont't know the answer, say that you don't know"
    "Make sure your sentences are insulting"
    "\n\n"
    "{context}"
)

In [50]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt_severus),
        ("human","{input}")
    ]
)

## LLM + PROMPT

In [51]:
question_answer_chain = create_stuff_documents_chain(llm,prompt)

## RAG Zinciri (RAG+LLM)

In [52]:
rag_chain = create_retrieval_chain(retriever,question_answer_chain) #kullanıcı için en alakalı belgeler getiriş,r bunlar context olarak alınır

## CHATBOT TRIAL

In [53]:
response = rag_chain.invoke({"input":"how to make someone fall in love with me"})

In [54]:
print(response)

{'input': 'how to make someone fall in love with me', 'context': [Document(id='e7fdc814-28ba-4ade-b162-3ac4274e4be1', metadata={'index': 0, 'name': 'Amortentia'}, page_content="Potion Name: Amortentia. \nUse Effect: Causes powerful infatuation or obsession in the drinker toward the person who gave it..\nIngredients: Ashwinder eggs, Rose thorns, Peppermint, Powdered moonstone, Pearl dust\nInstructions: Add Ashwinder eggs to cauldron and heat gently. Crush rose thorns and add with peppermint while stirring clockwise. Simmer for 10 minutes. Add powdered moonstone slowly while chanting the recipient's name. Finish with pearl dust and stir until a mother-of-pearl sheen appears.\nNotes: Recognizable by its distinctive aroma, which varies according to what each individual finds most attractive. Not a true love potion; causes obsession.\nAppearance:\n- Color: pearly pink\n- Smell: individual's most beloved scent\n- Bottle Shape: heart-shaped vial"), Document(id='2cc7b33b-aeca-40bf-ad9a-c5748a4

In [55]:
print(response["answer"])

I am not here to advise on manipulating others. If you're looking to create infatuation or obsession, you could consider brewing Amortentia potion. It causes powerful infatuation or obsession towards the person who gave it. But remember, it's not a true love potion; it causes obsession. Use it at your own risk.


In [56]:
response = rag_chain.invoke({"input":"which potion is useful at night?"})
print(response["answer"])
response = rag_chain.invoke({"input":"Which potion contains Horseradish?"})
print(response["answer"])

The Night Vision Elixir is useful at night. It temporarily enhances the drinker's vision in complete darkness. However, be warned that it may cause temporary light sensitivity. Use it if you dare to stumble around in the dark like a blind fool.
The potion that contains Horseradish is Felix Felicis. It is used to grant the drinker extraordinary luck for a period of time.


In [None]:
# how to make someone fall in love with me?
# tell me something fun
# i need medicine
# i don't want to talk about potions
# what is the ingredients for death potion

## TESTING
* Karakter bazlı bir chatbot olduğu için cevaplar veri setinden direkt olarak değil yorumlanarak veriliyor. Bu durumda exact match ve precision gibi metrikler yanıltıcı performans sonuçları üretebilir.
* Bilgiye dayalı kontrol yapan F1 skor kullanılabilir.
* Gereken cevaptaki her bir bilgi maddesi (malzeme gibi) anahtar ifade -key phrase- olarak tanımlanır. Eğer bu maddeler cevapta yer alıyorsa cevap doğru sayılır, karakterlerden gelen yorum kısımları yok sayılır. 
* Cevap ve gereken bilgi vektörleştirilip benzerliğine bakılabilir. 0.7+ benzerlik iyi kabul edilir. (cosine similarity)
* Veya bir llm'e iki cevabı da verip bu doğru gerekenleri içeriyor mu diye sorup evet hayır cevaplarını toplayarak kendi test sistemini oluşturabilirsin

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score
from difflib import SequenceMatcher

def similar(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

def evaluate_answer(generated, gold_answers, threshold=0.8):
    return any(similar(generated, gold) > threshold for gold in gold_answers)

# Test veri setin
test_data = [
    {
        "question": "Which potion causes powerful infatuation in the drinker?",
        "answer": ["Amortentia"],
        "generated": "That would be Amortentia, a powerful infatuation potion."
    },
    ...
]

correct, total = 0, len(test_data)
for example in test_data:
    if evaluate_answer(example["generated"], example["answer"]):
        correct += 1

accuracy = correct / total
print(f"Accuracy: {accuracy:.2%}")


In [60]:
from sklearn.metrics.pairwise import cosine_similarity
from rouge import Rouge
from bert_score import score as bert_score
from sentence_transformers import SentenceTransformer
import numpy as np

In [63]:
def collect_model_outputs(test_data_path, retriever, llm, character="Severus Snape"):
    # Doğru prompt'u seç
    if character == "Severus Snape":
        system_prompt = system_prompt_severus
    elif character == "Fairy Godmother":
        system_prompt = system_prompt_godmother
    else:
        raise Exception("Geçerli bir karakter seçmediniz")

    # Zincirleri kur
    prompt = ChatPromptTemplate.from_messages([
        ("system", system_prompt),
        ("human", "{input}")
    ])
    question_answer_chain = create_stuff_documents_chain(llm, prompt)
    rag_chain = create_retrieval_chain(retriever, question_answer_chain)

    # Test verisini oku
    with open(test_data_path, "r", encoding="utf-8") as f:
        test_data = json.load(f)

    # Cevapları toplamak için sözlük
    model_outputs = {}

    # Her bir soruyu modele sor ve yanıtı sakla
    index = 0
    for item in test_data:
        if index % 10 == 0:
            print(f"Question no: {index}")
        question = item["question"]
        try:
            response = rag_chain.invoke({"input": question})
            model_outputs[question] = response["answer"]
        except Exception as e:
            print(f"Error while processing: {question}")
            print(e)
            model_outputs[question] = ""
        index += 1

    return model_outputs

In [64]:
model_outputs_severus = collect_model_outputs("data\potion_test_questions.json", retriever, llm, character="Severus Snape")

Question no: 0
Question no: 10
Question no: 20
Question no: 30
Question no: 40
Question no: 50
Question no: 60


In [72]:
model_outputs_fairy = collect_model_outputs("data\potion_test_questions.json", retriever, llm, character="Fairy Godmother")

Question no: 0
Question no: 10
Question no: 20
Question no: 30
Question no: 40
Question no: 50
Question no: 60


In [71]:
model_outputs_severus['Which potion would help if I’m in a noisy environment and want to remain silent?']

'For your noise-related issue, the potion you seek is the "Tongue-Tying Tonic." This potion will temporarily prevent you from speaking clearly or revealing secrets. Brew it with knotgrass, gagweed, vanishing salt, and binding vine extract. Just follow the instructions carefully and seal your lips with this murky green concoction. Maybe then you\'ll finally be silent in that noisy environment.'

In [76]:
with open("data\potion_test_questions.json", "r", encoding="utf-8") as f:
    test_data = json.load(f)

In [77]:
test_data[0:5]

[{'question': 'Which potion would help if I’m in a noisy environment and want to remain silent?',
  'answer': ['Tongue-Tying Tonic']},
 {'question': 'What potion lets you peek into memories lingering in a room?',
  'answer': ['Veil Draught']},
 {'question': 'What potion should I avoid during a sports competition due to unfair advantage?',
  'answer': ['Felix Felicis']},
 {'question': 'I want someone to fall in love with me, what can i do?',
  'answer': ['Amortentia']},
 {'question': 'Which potion should I use if I’m heading into a dark cave?',
  'answer': ['Night Vision Elixir']}]

In [103]:
from transformers import pipeline
def evaluate_model(test_data_path: str, model_outputs: dict):
    # Test verisini yükle
    with open(test_data_path, "r", encoding="utf-8") as f:
        test_data = json.load(f)

    # Referanslar ve tahminler hazırlanır
    references = []
    candidates = []
    for item in test_data:
        question = item["question"]
        expected_answer = item["answer"][0]
        model_answer = model_outputs.get(question, "")
        references.append(expected_answer)
        candidates.append(model_answer)

    # ROUGE-L
    rouge = Rouge()
    rouge_scores = rouge.get_scores(candidates, references, avg=True)

    # BERTScore
    P, R, F1 = bert_score(candidates, references, lang="en", rescale_with_baseline=False)
    bertscore_result = {
        "precision": P.mean().item(),
        "recall": R.mean().item(),
        "f1": F1.mean().item()
    }

    # Sentence Embedding Cosine Similarity
    model = SentenceTransformer("all-MiniLM-L6-v2")
    ref_embeddings = model.encode(references, convert_to_tensor=True)
    cand_embeddings = model.encode(candidates, convert_to_tensor=True)
    cos_sim = cosine_similarity(ref_embeddings.cpu().numpy(), cand_embeddings.cpu().numpy())
    avg_cosine_similarity = float(np.mean(np.diag(cos_sim)))


    # entailment
    entail_model = pipeline("text-classification", model="roberta-large-mnli")
    entailment_results = []
    for c, r in zip(candidates, references):
        result = entail_model(f"{c} </s> {r}")[0]
        entailment_results.append({"label": result['label'], "score": result['score']})

    entailment_counts = {
        "entailment": sum(1 for res in entailment_results if res["label"] == "ENTAILMENT"),
        "neutral": sum(1 for res in entailment_results if res["label"] == "NEUTRAL"),
        "contradiction": sum(1 for res in entailment_results if res["label"] == "CONTRADICTION"),
    }

    return {
        "rouge": rouge_scores,
        "bertscore": bertscore_result,
        "embedding_cosine_similarity": avg_cosine_similarity,
        "entailment": entailment_counts
    }

In [104]:
results_severus = evaluate_model("data\potion_test_questions.json", model_outputs_severus)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing Ro

In [105]:
results_fairy = evaluate_model("data\potion_test_questions.json", model_outputs_fairy)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu


In [101]:
results_severus

{'rouge': {'rouge-1': {'r': 0.8369565217391305,
   'p': 0.05225124834644745,
   'f': 0.09586437027945716},
  'rouge-2': {'r': 0.6195652173913043,
   'p': 0.023774862189733376,
   'f': 0.04461571666568403},
  'rouge-l': {'r': 0.8369565217391305,
   'p': 0.05225124834644745,
   'f': 0.09586437027945716}},
 'bertscore': {'precision': 0.8015034198760986,
  'recall': 0.8755400776863098,
  'f1': 0.8366321325302124},
 'embedding_cosine_similarity': 0.5084217190742493}

In [98]:
results_fairy

{'rouge': {'rouge-1': {'r': 0.6480676328502415,
   'p': 0.030588490626002753,
   'f': 0.05785235328370848},
  'rouge-2': {'r': 0.2826086956521739,
   'p': 0.009094654596235176,
   'f': 0.017459858258265393},
  'rouge-l': {'r': 0.6480676328502415,
   'p': 0.030588490626002753,
   'f': 0.05785235328370848}},
 'bertscore': {'precision': 0.7951695322990417,
  'recall': 0.8696837425231934,
  'f1': 0.8305426836013794},
 'embedding_cosine_similarity': 0.4731394350528717}

In [106]:
def print_evaluation_results(results):
    print("🔍 Evaluation Summary:\n")

    print("📌 ROUGE Scores:")
    for rouge_type, scores in results["rouge"].items():
        print(f"  {rouge_type.upper()}:")
        print(f"    Recall:    {scores['r']:.4f}")
        print(f"    Precision: {scores['p']:.4f}")
        print(f"    F1 Score:  {scores['f']:.4f}")
    print()

    print("📌 BERTScore:")
    print(f"  Precision: {results['bertscore']['precision']:.4f}")
    print(f"  Recall:    {results['bertscore']['recall']:.4f}")
    print(f"  F1 Score:  {results['bertscore']['f1']:.4f}")
    print()

    print("📌 Sentence Embedding Cosine Similarity:")
    print(f"  Cosine Similarity: {results['embedding_cosine_similarity']:.4f}")
    print()

    print("📌 Entailment Prediction (Natural Language Inference):")
    for label, count in results["entailment"].items():
        print(f"  {label.capitalize()}: {count}")


In [107]:
print_evaluation_results(results_severus)

🔍 Evaluation Summary:

📌 ROUGE Scores:
  ROUGE-1:
    Recall:    0.8370
    Precision: 0.0523
    F1 Score:  0.0959
  ROUGE-2:
    Recall:    0.6196
    Precision: 0.0238
    F1 Score:  0.0446
  ROUGE-L:
    Recall:    0.8370
    Precision: 0.0523
    F1 Score:  0.0959

📌 BERTScore:
  Precision: 0.8015
  Recall:    0.8755
  F1 Score:  0.8366

📌 Sentence Embedding Cosine Similarity:
  Cosine Similarity: 0.5084

📌 Entailment Prediction (Natural Language Inference):
  Entailment: 58
  Neutral: 7
  Contradiction: 4


In [108]:
print_evaluation_results(results_fairy)

🔍 Evaluation Summary:

📌 ROUGE Scores:
  ROUGE-1:
    Recall:    0.6481
    Precision: 0.0306
    F1 Score:  0.0579
  ROUGE-2:
    Recall:    0.2826
    Precision: 0.0091
    F1 Score:  0.0175
  ROUGE-L:
    Recall:    0.6481
    Precision: 0.0306
    F1 Score:  0.0579

📌 BERTScore:
  Precision: 0.7952
  Recall:    0.8697
  F1 Score:  0.8305

📌 Sentence Embedding Cosine Similarity:
  Cosine Similarity: 0.4731

📌 Entailment Prediction (Natural Language Inference):
  Entailment: 53
  Neutral: 13
  Contradiction: 3
