In [5]:
import pandas as pd 
BarbieQA = pd.read_csv("BarbieQA.csv")
OppenheimerQA = pd.read_csv("OppenheimerQA.csv")


# Answers from Movie 

In [2]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage, Settings
from llama_index.core.embeddings import resolve_embed_model
from llama_index.llms.ollama import Ollama
import os.path

Persist_DIR = "./storageBarbie"

if not os.path.exists(Persist_DIR):
    print("Initializing storage and loading documents.")
    documents = SimpleDirectoryReader("Data/Barbie").load_data()
    Settings.embed_model = resolve_embed_model("local:BAAI/bge-small-en-v1.5")
    index = VectorStoreIndex.from_documents(documents)
    index.storage_context.persist(persist_dir=Persist_DIR)
else:
    print("Loading existing storage.")
    storage_context = StorageContext.from_defaults(persist_dir=Persist_DIR)
    Settings.embed_model = resolve_embed_model("local:BAAI/bge-small-en-v1.5")
    index = load_index_from_storage(storage_context)

# Setting up Mistral model
Settings.llm = Ollama(model="mistral", request_timeout=30.0)
mistral_query_engine = index.as_query_engine()
mistral_response = mistral_query_engine.query("What does Ken think about the real world when he first enters it?")
print("Mistral Response:", mistral_response.response)

# Setting up Llama2 model
Settings.llm = Ollama(model="llama2", request_timeout=30.0)
llama2_query_engine = index.as_query_engine()
llama2_response = llama2_query_engine.query("What does Ken think about the real world when he first enters it?")
print("Llama2 Response:", llama2_response.response)


Loading existing storage.
Mistral Response:  Ken initially finds the real world confusing and overwhelming. He struggles to understand his role in it and feels a strong desire for approval and validation from Barbie Margot. Ken also expresses frustration with the changes that have occurred in Barbie Land and attempts to regain control by manipulating situations to fit his ideal image of himself. However, he ultimately comes to realize that he must learn to accept and embrace who he truly is, both with and without Barbie. This journey leads him to understand that life is full of change and that being a leader means learning to adapt and grow.
Llama2 Response: Based on the context information provided, it seems that Ken has a very different view of the real world compared to Barbie. While Barbie is initially excited to enter the real world and experience new things, Ken appears to be overwhelmed and disoriented by the change. He struggles to understand the complexities of the real world 

# Similarity Score based on Semantic Search 

In [6]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage, Settings
from llama_index.core.embeddings import resolve_embed_model
from llama_index.llms.ollama import Ollama
from sentence_transformers import SentenceTransformer, util
import numpy as np
import os

# Path to persistently store the indexed data
Persist_DIR = "./storageOppenheimer"

# Check if the persistent storage directory exists; if not, initialize and load documents
if not os.path.exists(Persist_DIR):
    print("Initializing storage and loading documents.")
    # Assuming you have a directory 'Data/Barbie' with relevant documents
    documents = SimpleDirectoryReader("Data/Oppenheimer").load_data()
    # Resolve and set the embedding model to be used for indexing
    Settings.embed_model = resolve_embed_model("local:BAAI/bge-small-en-v1.5")
    # Create an index from the documents and persist it
    index = VectorStoreIndex.from_documents(documents)
    index.storage_context.persist(persist_dir=Persist_DIR)
else:
    print("Loading existing storage.")
    # Load the existing storage context and set the embedding model
    storage_context = StorageContext.from_defaults(persist_dir=Persist_DIR)
    Settings.embed_model = resolve_embed_model("local:BAAI/bge-small-en-v1.5")
    index = load_index_from_storage(storage_context)

# First question from the DataFrame
question = OppenheimerQA["Question"][0]

# Setting up Mistral model for querying
Settings.llm = Ollama(model="mistral", request_timeout=30.0)
mistral_query_engine = index.as_query_engine()
mistral_response = mistral_query_engine.query(question)

# Setting up Llama2 model for querying
Settings.llm = Ollama(model="llama2", request_timeout=50.0)
llama2_query_engine = index.as_query_engine()
llama2_response = llama2_query_engine.query(question)



# Assuming these are the responses from Mistral and Llama2 models
mistral_response_text = mistral_response.response
llama2_response_text = llama2_response.response

# The reference text for the question from our earlier DataFrame
reference_text = OppenheimerQA["Answer"][0]

# Load the sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Encode the responses and the reference text to get their embeddings
responses_embeddings = model.encode([mistral_response_text, llama2_response_text], convert_to_tensor=True)
reference_embedding = model.encode(reference_text, convert_to_tensor=True)

# Compute semantic similarity scores between the LLM responses and the reference text
similarity_scores = util.pytorch_cos_sim(responses_embeddings, reference_embedding)

# Print out the similarity scores for both responses
print(f"Similarity score for Mistral's response: {similarity_scores[0][0]}")
print(f"Similarity score for Llama2's response: {similarity_scores[1][0]}")



Loading existing storage.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
model.safetensors: 100%|██████████| 90.9M/90.9M [00:12<00:00, 7.41MB/s]


Similarity score for Mistral's response: 0.8183347582817078
Similarity score for Llama2's response: 0.8330925107002258


In [8]:
from ragas.metrics import answer_relevancy

In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage, Settings
from llama_index.core.embeddings import resolve_embed_model
from llama_index.llms.ollama import Ollama
from sentence_transformers import SentenceTransformer, util
import numpy as np
import os

# Path to persistently store the indexed data
Persist_DIR = "./storageOppenheimer"

# Check if the persistent storage directory exists; if not, initialize and load documents
if not os.path.exists(Persist_DIR):
    print("Initializing storage and loading documents.")
    # Assuming you have a directory 'Data/Barbie' with relevant documents
    documents = SimpleDirectoryReader("Data/Oppenheimer").load_data()
    # Resolve and set the embedding model to be used for indexing
    Settings.embed_model = resolve_embed_model("local:BAAI/bge-small-en-v1.5")
    # Create an index from the documents and persist it
    index = VectorStoreIndex.from_documents(documents)
    index.storage_context.persist(persist_dir=Persist_DIR)
else:
    print("Loading existing storage.")
    # Load the existing storage context and set the embedding model
    storage_context = StorageContext.from_defaults(persist_dir=Persist_DIR)
    Settings.embed_model = resolve_embed_model("local:BAAI/bge-small-en-v1.5")
    index = load_index_from_storage(storage_context)

question = OppenheimerQA["Question"][0]

Settings.llm = Ollama(model="mistral", request_timeout=30.0)
mistral_query_engine = index.as_query_engine()
mistral_response = mistral_query_engine.query(question)

# Setting up Llama2 model for querying
Settings.llm = Ollama(model="llama2", request_timeout=50.0)
llama2_query_engine = index.as_query_engine()
llama2_response = llama2_query_engine.query(question)