#### **Language Models**

In [None]:
# Install using Python 3.12.5 directly
import sys
!{sys.executable} -m pip install -r requirements.txt




[notice] A new release of pip is available: 25.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


##### **Paid LLM & Chat Models**

In [None]:
# OpenAI Base LLM Model - Not Used Anymore 

from langchain_openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

llm = OpenAI(model = "gpt-3.5-turbo-instruct")

result = llm.invoke("What is the capital of France?")
print(result)

In [None]:
# OpenAI Chat Model

from langchain_openai import ChatOpenAI

load_dotenv()

model = ChatOpenAI(model='gpt-4', temperature=0.5, max_completion_tokens=100)

result = model.invoke("What is the capital of France?")
print(result)

In [None]:
# Anthropic Chat Model

from langchain_anthropic import ChatAnthropic

model = ChatAnthropic(model='claude-3-5-sonnet-20240620')

result = model.invoke("What is the capital of France?")
print(result)

In [None]:
# Google Chat Model

from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(model='gemini-1.5-pro-002')
result2 = model.invoke("What is the capital of France?")
print(result)

##### **Open Source Models-** 
Primarily using Hugging Face Models

In [None]:
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
import os

os.environ['HF_HOME'] = 'D:/huggingface_cache'

llm = HuggingFacePipeline.from_model_id(
    model_id='TinyLlama/TinyLlama-1.1B-Chat-v1.0',
    task='text-generation',
    pipeline_kwargs=dict(
        temperature=0.5,
        max_new_tokens=100
    )
)
model = ChatHuggingFace(llm=llm)

result = model.invoke("What is the capital of India")

print(result.content)

Device set to use cuda:0


<|user|>
What is the capital of India</s>
<|assistant|>
India's capital city is New Delhi, also known as the "living capital" due to the fact that it is a city that never sleeps. New Delhi is located in the northernmost part of India and is surrounded by the Yamuna River. The city is home to the India Gate, a war memorial, and the Qutub Minar, a towering monument that is the second-tallest brick minaret in the world. New Delhi is


##### **Embedding Models-**

**Open Source Models**

In [61]:
from langchain_huggingface import HuggingFaceEmbeddings

embedding = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

documents = [
    "Delhi is the capital of India",
    "Kolkata is the capital of West Bengal",
    "Paris is the capital of France"
]

vector = embedding.embed_documents(documents)

print(str(vector))

[[0.04354953393340111, 0.023877190425992012, -0.04524128884077072, 0.03540497273206711, -0.016651013866066933, -0.06554820388555527, 0.07626009732484818, 0.009940460324287415, -0.001963240560144186, -0.02702271193265915, 0.00738557381555438, -0.12068236619234085, 0.0640484020113945, -0.06795038282871246, 0.036388885229825974, -0.0780777856707573, 0.03318415582180023, 0.0817556232213974, 0.07336152344942093, -0.07802225649356842, -0.02092115394771099, 0.035732824355363846, -0.008563303388655186, -0.037455130368471146, 0.0004388655652292073, 0.05346425622701645, 0.005293595138937235, -0.016870463266968727, -0.0004130420566070825, 0.0010301814181730151, 0.06669679284095764, 0.004223205149173737, -0.02252260223031044, -0.0021015710663050413, -0.05594784393906593, 0.01686997152864933, -0.12951608002185822, 0.06496333330869675, 0.1728808581829071, -0.11778349429368973, 0.036441005766391754, -0.0006774598150514066, 0.07786677032709122, -0.02816751040518284, 0.03655533492565155, -0.02369884774

**Closed Source Models**

In [None]:
from langchain_openai import OpenAIEmbeddings

load_dotenv()

# Embedding a single line of text
embeddings = OpenAIEmbeddings(model='text-embedding-3-large', dimensions=32)
result = embeddings.embed_query("What is the capital of France?")
print(result)


In [None]:
# Embedding Multiple lines of text

documents = [
    "The capital of France is Paris",
    "The capital of Germany is Berlin",
    "The capital of Italy is Rome"
]

result = embeddings.embed_documents(documents)
print(result)


##### **Mini Project - Simple & Basic**
Determining the similarity scores between the documents & query using cosine similarity and finding the most relevent docs.

In [70]:
from langchain_huggingface import HuggingFaceEmbeddings
from sklearn.metrics.pairwise import cosine_similarity

documents = [
    "Virat Kohli is an Indian cricketer known for his aggressive batting and leadership.",
    "MS Dhoni is a former Indian captain famous for his calm demeanor and finishing skills.",
    "Sachin Tendulkar, also known as the 'God of Cricket', holds many batting records.",
    "Rohit Sharma is known for his elegant batting and record-breaking double centuries.",
    "Jasprit Bumrah is an Indian fast bowler known for his unorthodox action and yorkers."
]

query = "Tell me about Kohli"

embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

doc_embeddings = embeddings.embed_documents(documents)
query_embedding = embeddings.embed_query(query)

similarity_scores = cosine_similarity([query_embedding], doc_embeddings)[0]
print(similarity_scores)

index, score =sorted(list(enumerate(similarity_scores)), key=lambda x: x[1])[-1]
print(query)
print(documents[index])
print("Similarity Score: ", score)

[0.70334051 0.40582631 0.51636068 0.5736942  0.36735783]
Tell me about Kohli
Virat Kohli is an Indian cricketer known for his aggressive batting and leadership.
Similarity Score:  0.7033405065379699
