In [3]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai.embeddings import OpenAIEmbeddings  # Updated import
from langchain.vectorstores import Chroma

from dotenv import load_dotenv
import os
load_dotenv()

LANGCHAIN_API_KEY = os.getenv('LANGCHAIN_API_KEY')  # or os.environ['API_KEY']
LANGCHAIN_PROJECT = os.getenv('LANGCHAIN_PROJECT')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

# Store your speech in a text file
speech_text = """Mumbai – The City of Dreams

Good morning everyone,

Today, I want to talk about Mumbai, a city that never sleeps and is often called the "City of Dreams." Located on the western coast of India, Mumbai is not just a city; it’s an emotion for millions of people.

Mumbai is the financial capital of India, home to the Bombay Stock Exchange, Reserve Bank of India, and many multinational corporations. It is also the heart of India’s film industry, Bollywood, which has gained global recognition.

This vibrant city is known for its rich history, iconic landmarks like the Gateway of India, Marine Drive, and Chhatrapati Shivaji Maharaj Terminus, a UNESCO World Heritage Site. The bustling streets, local trains, and the delicious street food reflect the spirit of the city – fast-paced yet welcoming.

Mumbai is a melting pot of cultures, with people from all over the country living here. It celebrates diversity through festivals like Ganesh Chaturthi, Eid, Christmas, and more. Despite its challenges, such as overcrowding and monsoons, the resilience of Mumbai's people is truly remarkable.

The city’s energy, opportunities, and sense of belonging make it a dream destination for many. As the saying goes, “If you can survive in Mumbai, you can survive anywhere in the world.”

Thank you!
"""
# Save speech as a text file
with open("speech.txt", "w") as file:
    file.write(speech_text)

# Load the text file
loader = TextLoader("speech.txt")
documents = loader.load()

# Split the text into chunks
text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=50)
texts = text_splitter.split_documents(documents)

# Generate embeddings
embeddings = OpenAIEmbeddings()  # Updated embeddings
vector_db = Chroma.from_documents(texts, embeddings)

# Perform similarity search with scores
query = "What are the iconic landmarks in Mumbai?"
results_with_scores = vector_db.similarity_search_with_score(query)

# Display results with scores
print("Search Results with Scores:")
for i, (result, score) in enumerate(results_with_scores, 1):
    print(f"{i}. Text: {result.page_content}")
    print(f"   Similarity Score: {score}")


Created a chunk of size 303, which is longer than the specified 300


Search Results with Scores:
1. Text: This vibrant city is known for its rich history, iconic landmarks like the Gateway of India, Marine Drive, and Chhatrapati Shivaji Maharaj Terminus, a UNESCO World Heritage Site. The bustling streets, local trains, and the delicious street food reflect the spirit of the city – fast-paced yet welcoming.
   Similarity Score: 0.2791742980480194
2. Text: This vibrant city is known for its rich history, iconic landmarks like the Gateway of India, Marine Drive, and Chhatrapati Shivaji Maharaj Terminus, a UNESCO World Heritage Site. The bustling streets, local trains, and the delicious street food reflect the spirit of the city – fast-paced yet welcoming.
   Similarity Score: 0.2791742980480194
3. Text: This vibrant city is known for its rich history, iconic landmarks like the Gateway of India, Marine Drive, and Chhatrapati Shivaji Maharaj Terminus, a UNESCO World Heritage Site. The bustling streets, local trains, and the delicious street food reflect the 