## LLM Demo

In [5]:
from langchain_google_genai import GoogleGenerativeAI
import os
import getpass

if not os.environ.get("GOOGLE_API_KEY"):
  os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ")


In [None]:
llm = GoogleGenerativeAI(model="gemini-2.0-flash")

In [6]:
result = llm.invoke("What is the capital of France?")
print(result)

The capital of France is **Paris**.


## Chat Models

In [9]:
from langchain_google_genai import ChatGoogleGenerativeAI

chat_model = ChatGoogleGenerativeAI(model="gemini-2.0-flash")

messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "What is the capital of France?"},
]
response = chat_model.invoke(messages)
print(response.content)  

The capital of France is Paris.


In [None]:
Temperature, max_completion_tokens

## Open Source models

In [1]:
import getpass
import os

if not os.environ.get("HUGGINGFACEHUB_API_TOKEN"):
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = getpass.getpass("Enter API key for HuggingFace: ")


In [2]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from huggingface_hub import login
import os
login(token=os.environ["HUGGINGFACEHUB_API_TOKEN"])

llm = HuggingFaceEndpoint(
    repo_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    task="text-generation",
)

model = ChatHuggingFace(llm=llm)

In [3]:
model.invoke("What is the capital of France?")

StopIteration: 

## Local Load HF

In [3]:
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline


In [5]:
llm = HuggingFacePipeline.from_model_id(
    model_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    task="text-generation",
    pipeline_kwargs={"temperature": 0.7, "max_new_tokens": 100},
)

model = ChatHuggingFace(llm=llm)
model.invoke("What is the capital of France?")

Device set to use mps:0


AIMessage(content='<|user|>\nWhat is the capital of France?</s>\n<|assistant|>\nThe capital of France is Paris.', additional_kwargs={}, response_metadata={}, id='run--f2aa59d2-6af1-438f-bf0b-a457fb4c26ea-0')

## Embedding models 

In [8]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-exp-03-07", dimensions=1024)
text = "The capital of France is Paris."
embedding = embeddings.embed_query(text)

In [11]:
len(embedding)  # Length of the embedding vector

embed_document = embeddings.embed_documents([text])

## Embedding Open Source

In [2]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",)

text = "The capital of France is Paris."
embedding = embeddings.embed_query(text)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
embedding

[0.1032569408416748,
 0.030420120805501938,
 0.02909577265381813,
 -0.037322863936424255,
 0.078676238656044,
 -0.05472308024764061,
 0.0026922838296741247,
 -0.008628075942397118,
 0.023342935368418694,
 0.024929197505116463,
 -0.0176668893545866,
 -0.07516758143901825,
 0.024401631206274033,
 -0.05606142058968544,
 -0.035808682441711426,
 -0.12067259103059769,
 -0.0008131168433465064,
 -0.025866059586405754,
 0.04106556624174118,
 0.006101945880800486,
 0.007386340759694576,
 -0.035924915224313736,
 0.08720473945140839,
 -0.0009175539598800242,
 -0.043363429605960846,
 -0.01564173772931099,
 -0.05735722556710243,
 -0.009901881217956543,
 -0.011077442206442356,
 0.002180590759962797,
 0.06393151730298996,
 -0.02309073507785797,
 -0.06355898082256317,
 -0.0022083204239606857,
 -0.03933548927307129,
 -0.0232898760586977,
 -0.0037738983519375324,
 -0.02739354781806469,
 0.0436946377158165,
 0.04251300171017647,
 0.006501066964119673,
 -0.04662175104022026,
 -0.05291168764233589,
 -0.0241

## USE case for embeddings


In [10]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from sklearn.metrics.pairwise import cosine_similarity

embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-exp-03-07", dimensions=1024)

In [None]:
document_list = [
    "The capital of France is Paris.",
    "The capital of Germany is Berlin.",
    "The capital of Italy is Rome.",
    "The capital of Spain is Madrid.",
]
query = "What is the capital of France?"

query_embedding = embeddings.embed_query(query)
document_embeddings = embeddings.embed_documents(document_list)
similarities = cosine_similarity([query_embedding], document_embeddings)

In [18]:
index, score = sorted(list(enumerate(similarities)), key=lambda x: x[1], reverse=True)[0]
document_list[index], score

('The capital of France is Paris.', np.float64(0.7892893360164778))

In [None]:
Hence the usage of vector db because it allows us to find the most relevant document based on the similarity of their embeddings, which is particularly useful for tasks like semantic search and information retrieval.
    without vector db, we would have to compare the query with each document one by one, which is inefficient and not scalable for large datasets and also regenerate the embeddings for each query and documents, which is computationally expensive.