In [None]:
# Language Models -
# 1. LLMs
# 2. Chat Models

# LLMs -> are general purpose you can use for any type of NLP application, you cannot assign roles (use if you want to make text generation, summarization, translation, creative writing, code generation)
# LLMs -> GPT-3, Llama-2-7b, OPT-1.3B

# Chat Models -> specialzed for conversations, you can assign role to chat model (use if you want to make conversational ai, chatbots, virtual assistant, customer support, ai tutor)
# Chat Models -> GPT-4, Claude, GPT-3.5-turbo, Llama-2-Chat, Mistral-Instruct

# temperature -> parameter that controls the randomness of the language model's output.
# A higher temperature will result in more diverse and creative output, while a lower temperature will result in more conservative and deterministic output.
# When temperature is low then for same input you will get almost similar output but when temperature is high for same input you will get more different output
# Factual ans(math, code, facts) -> 0.0 to 0.3
# Balanced ans(general qa, explaination) -> 0.5 to 0.7
# Creativity ans(creative writing, storytelling, jokes) -> 0.9 to 1.2
# Maximum randomness(wild ideas, brainstorming) -> 1.5+

# Imports

In [4]:
!pip install langchain langchain-core google-generativeai langchain-openai --quiet

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/54.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━[0m [32m51.2/54.9 kB[0m [31m3.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.9/54.9 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/1.2 MB[0m [31m8.2 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.2/1.2 MB[0m [31m20.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[?25h

# LLMs

In [None]:
import os
import python_dotenv

# Load environment variables from.env file

python_dotenv.load_dotenv()

# Load OpenAI API key

os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

In [None]:
from langchain_openai import OpenAI

llm = OpenAI(model='gpt-3.5-turbo-instruct')
result = llm.invoke("What is the capital of India")
print(result)

# Chat Models

In [None]:
# Openai chat models
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model='gpt-4')
result = model.invoke("What is the capital of India", temperature=1, max_completion_tokens=10)
print(result)
print(result.content)

In [None]:
!pip install langchain_anthropic --quiet

In [None]:
# Athropic chat models
from langchain_anthropic import ChatAnthropic

model = ChatAnthropic(model='claude-2')
result = model.invoke("What is the capital of India")
print(result)
print(result.content)

In [None]:
!pip install langchain_google_genai --quiet

In [None]:
# Google chat models
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(model='gemini-1.5-pro')
result = model.invoke("What is the capital of India")
print(result)
print(result.content)

In [None]:
# # Open source Models
# LlaMA-2-7B/13B/70B -> General purpose text generation
# Mixtral-8x7B -> Efficient and fast responses
# Mixtral-7B -> Best small-scale model (outperforms LlaMA-2-12B)
# Falcon-7B/40B -> high speed inference
# BLOOM-176B -> Multilingual text generation
# GPT-J-6B -> Lightweight and efficient
# GPT-NeoX-20B -> Large-scale applications
# StableLM -> Compact models for chatbots

# Opensource ->
# 1. Inference API
# 2. Running Locally

In [18]:
!pip install huggingface_hub --quiet

In [None]:
!pip install langchain_huggingface --quiet

In [None]:
os.environ["HUGGINGFACEHUB_ACCESS_TOKEN"] = os.getenv("HUGGINGFACEHUB_ACCESS_TOKEN")

In [None]:
# Hugging Face API
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint

llm = HuggingFaceEndpoint(
    repo_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    task="text-generation"
)
model = ChatHuggingFace(llm=llm)
result = model.invoke("What is the capital of India?")
print(result.content)

In [None]:
# From hugging face local
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline

llm = HuggingFacePipeline.from_model_id(
    model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    task="text-generation",
    pipeline_kwargs=dict(
        temperature=0.5,
        max_new_tokens=100
    )
)
model = ChatHuggingFace(llm=llm)
result = model.invoke("What is the capital of India?")
print(result.content)

# Embedding Models

In [None]:
# Open AI Embeddings for query
from langchain_openai import OpenAIEmbeddings

embedding = OpenAIEmbeddings(model='text-embedding-3-large', dimensions=32)

result = embedding.embed_query("Delhi is the capital of India")

print(str(result))

In [None]:
# Open AI Embeddings for docs
from langchain_openai import OpenAIEmbeddings

embedding = OpenAIEmbeddings(model='text-embedding-3-large', dimensions=32)

documents = [
    "Delhi is the capital of India",
    "Kolkata is the capital of West Bengal",
    "Paris is the capital of France"
]

result = embedding.embed_documents(documents)

print(str(result))

In [None]:
# Hugging Face Embeddings local for query
from langchain_huggingface import HuggingFaceEmbeddings

embedding = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

documents = [
    "Delhi is the capital of India",
    "Kolkata is the capital of West Bengal",
    "Paris is the capital of France"
]

vector = embedding.embed_documents(documents)

print(str(vector))

In [None]:
# Cosine Similarity : Open AI Embeddings
from langchain_openai import OpenAIEmbeddings
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

embedding = OpenAIEmbeddings(model='text-embedding-3-large', dimensions=300)

documents = [
    "Virat Kohli is an Indian cricketer known for his aggressive batting and leadership.",
    "MS Dhoni is a former Indian captain famous for his calm demeanor and finishing skills.",
    "Sachin Tendulkar, also known as the 'God of Cricket', holds many batting records.",
    "Rohit Sharma is known for his elegant batting and record-breaking double centuries.",
    "Jasprit Bumrah is an Indian fast bowler known for his unorthodox action and yorkers."
]

query = 'tell me about bumrah'

doc_embeddings = embedding.embed_documents(documents)
query_embedding = embedding.embed_query(query)

scores = cosine_similarity([query_embedding], doc_embeddings)[0]

index, score = sorted(list(enumerate(scores)),key=lambda x:x[1])[-1]

print(query)
print(documents[index])
print("similarity score is:", score)