In [13]:
from dotenv import load_dotenv
import os
from google import genai
from google.genai import types
import numpy as np

In [None]:
# getting the api and setting it

load_dotenv()

api_key = os.getenv("GEMINI_API_KEY")

client = genai.configure(api_key=api_key)




In [15]:

# result = client.models.embed_content(
#     model="gemini-embedding-001",
#     contents=["What is the meaning of life?"],
#     config=types.EmbedContentConfig(task_type="SEMANTIC_SIMILARITY"))

# for embedding in result.embeddings:
#     print(embedding)
# print(result.embeddings[0])

In [None]:
# Basic Cosine similarity check between two sentences(like Query and retreived Portion)
text_1 ="cat sits on mat"
text_2 = "dog lies on rug"
text_3 = "how do I bake a cake"

model = "gemini-embedding-001"

def embed_text(text,model_name):
    response = client.models.embed_content(
        model=model_name,
        contents=text,
        config=types.EmbedContentConfig(task_type="SEMANTIC_SIMILARITY"))
    return np.array(response.embeddings[0].values)

embedding_1 = embed_text(text_1,model)
embedding_2 = embed_text(text_2,model)
embedding_3 = embed_text(text_3,model)

def cosine_similarity (embedding1,embedding2):
    return np.dot(embedding1,embedding2) / ((np.linalg.norm(embedding1))*(np.linalg.norm(embedding2)))

similarity_1 = cosine_similarity(embedding_1,embedding_2)
similarity_2 = cosine_similarity(embedding_1,embedding_3)

print(f"the similarity of {text_1} and {text_2} is {similarity_1:.4f}")
print(f"the similarity of {text_1} and {text_3} is {similarity_2:.4f}")

In [None]:
# using the sentence Transformer for embedding the given text

from sentence_transformers import SentenceTransformer

# 1. Load a pretrained Sentence Transformer model
model = SentenceTransformer("all-MiniLM-L6-v2")

# The sentences to encode
sentences = [
    "My home is far away from here",
    "i live next to this house",
    "i love orange",
    "i like to go on a vacation"
]

# 2. Calculate embeddings by calling model.encode()
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 384]

query="How far do you stay"

query_embeddings = model.encode(query)
print(query_embeddings.shape)

# 3. Calculate the embedding similarities
similarities = model.similarity(embeddings, query_embeddings)
print(similarities)
# tensor([[1.0000, 0.6660, 0.1046],
#         [0.6660, 1.0000, 0.1411],
#         [0.1046, 0.1411, 1.0000]])

similar = model.similarity(model.encode("cat") , model.encode("kitty"))
print(similar)

In [36]:
from sentence_transformers import SentenceTransformer
import chromadb


In [None]:
# using the sentence Transformer embedding the given text and storing it in the ChromaDB
# and query it and retrieve the relevant info

chroma_Client = chromadb.Client()
model = SentenceTransformer("all-MiniLM-L6-v2")

collection = chroma_Client.get_or_create_collection(name="my_collection")

documents=[
    "Artificial intelligence is transforming healthcare.",
    "The capital of France is Paris.",
    "Machine learning models can be used for predictions.",
    "Deep learning is a subset of machine learning.",
    "Eiffel Tower is one of the most famous landmarks in Paris."
]

embeddings = model.encode(documents)

collection.upsert(
    documents=documents,
    embeddings=embeddings,
    ids=[f"doc_{i}" for i in range(len(documents))]
)
query = "Tell me about Paris."

query_embedding=model.encode(query)

results = collection.query(
    query_embeddings=query_embedding,
    # n_results=3
)

print("Query: ",query)
print("Top Matches are")
for doc , scores in zip(results["documents"][0],results["distances"][0]):
    print(f"- {doc} (distance:{scores:.4f})")

In [55]:
# document based basic embedding and query
from pypdf import PdfReader

In [56]:
def extract_text_from_doc(pdf_path):
    reader = PdfReader(pdf_path)
    text=""

    for page in reader.pages:
        text+= page.extract_text() + "/n"
    return text
document_text = extract_text_from_doc("./Data/Ai.pdf")
# print(document_text)

In [57]:
def chunk_text(text,chunk_size=100 , overlap=50):
    words = text.split()
    chunks=[]
    start=0

    while start <= len(words):
        end = min(start+chunk_size , len(words))
        chunk = " ".join(words[start:end])
        chunks.append(chunk)
        start +=chunk_size-overlap

    return chunks
chunks = chunk_text(document_text)
# print(len(chunks))
# print(chunks)

In [58]:
embedding = model.encode(chunks).tolist()

collection.upsert(
    documents=chunks,
    embeddings=embedding,
    ids= [f"doc_{i}" for i in range(len(chunks))]
)

# for doc , score in zip(result_doc["documents"][0] , result_doc["distances"][0]):
#     print(f"- {doc} (distance: {score:.4f})")



In [59]:
# integrate the LLM into the document chunk retrieval based on query process

import os
from google import genai
from dotenv import load_dotenv
from google.genai import types

load_dotenv()

api_key = os.getenv("GEMINI_API_KEY")

client = genai.Client(api_key=api_key)

# genration_config = {
#     "temprature":0.7,
#     "max_output_token":100
# }


while True:
    query = input("You: ")


    if query.lower() in {"quit" , "exit" , "break"}:
        print("Chatbot: Goodbye!")
        break

    query_embedded = model.encode(query)

    result_doc = collection.query(
        query_embeddings=query_embedded,
        n_results=3
    )

    retrieved_chunks=result_doc["documents"][0] 
    context= "\n\n".join(retrieved_chunks)


    prompt=f"""
    You are a helpful Assistant.
    Answer the following query based only on the provided context.
    
    query={query}

    Context:{context}
    """
    
    response = client.models.generate_content(
        model="gemini-2.5-flash",
        config=types.GenerateContentConfig(
            temperature=0.7,
            # max_output_tokens=5
        ),
        contents=prompt
    )

    print(f"ChatBot: {response.text}")


ChatBot: There is no single, universally accepted definition for Artificial Intelligence. However, the Oxford English Dictionary defines AI as “the capacity of computers, or other machines, to exhibit intelligent behaviour”. This means AI systems appear to think, learn and act like humans, and in some cases can exceed human capabilities. AI systems can analyse vast amounts of data, solve complex problems, make decisions, and perform creative tasks.
ChatBot: This report aims to provide a general introduction to Artificial Intelligence (AI) technology, its business applications, and its relevance to Travel & Tourism. While AI has gained significant attention in recent years, especially in 2023, it is not new. Its history can be traced back to the development of computers after the Second World War, with the Dartmouth Conference in 1956 bringing together researchers to explore "thinking machines." Today, it is almost impossible to browse news or social media without seeing mention of AI, 