# Pinecone

In [32]:
import os
from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv(), override=True)

True

In [33]:
from pinecone import Pinecone

pc = Pinecone()
pc.list_indexes()

[
    {
        "name": "langchain",
        "metric": "cosine",
        "host": "langchain-gepfqbs.svc.aped-4627-b74a.pinecone.io",
        "spec": {
            "serverless": {
                "cloud": "aws",
                "region": "us-east-1"
            }
        },
        "status": {
            "ready": true,
            "state": "Ready"
        },
        "vector_type": "dense",
        "dimension": 1536,
        "deletion_protection": "disabled",
        "tags": null
    }
]

In [34]:
from pinecone import ServerlessSpec

index_name = 'langchain'

if index_name not in pc.list_indexes().names():
    print(f'Creating Index: {index_name}')
    pc.create_index(
        name = index_name,
        dimension=1536,
        metric='cosine',
        spec=ServerlessSpec(
            cloud='aws',
            region='us-east-1'
        )
    )
    print("Index created")

else:
    print(f'Index {index_name} already exists!')

Index langchain already exists!


In [35]:
index = pc.Index(index_name)
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 5}},
 'total_vector_count': 5,
 'vector_type': 'dense'}

# Working with vectors

In [36]:
import random

vectors = [[random.random() for _ in range(1536)] for v in range(5)]
vectors

[[0.17973386559581317,
  0.8874614389937296,
  0.2562143910057686,
  0.2774608376610539,
  0.8566069880810195,
  0.9755617377162904,
  0.31990413548595287,
  0.6498179675581798,
  0.5673188960565962,
  0.39118568422469513,
  0.5460567054111269,
  0.24696150066475941,
  0.45031168007840194,
  0.8075919302334635,
  0.04878264528400422,
  0.7145069447325216,
  0.9078105051928351,
  0.7180725679201352,
  0.4133468647452324,
  0.7886044264569466,
  0.5366247679271443,
  0.18317940682549227,
  0.3525459812170473,
  0.04060767943444088,
  0.7987654634770105,
  0.09383756390566367,
  0.9552339489746318,
  0.29314630512808226,
  0.6785179687457507,
  0.7289889538027687,
  0.8016538588246762,
  0.3648567607222485,
  0.9489934252642306,
  0.49596608601415093,
  0.29497286867962325,
  0.6525228200174641,
  0.49988647622973403,
  0.2711186776750054,
  0.9856970327702117,
  0.1488459990975286,
  0.579915206632822,
  0.5434342438681019,
  0.817908486358581,
  0.9278132040137593,
  0.12213684807590108

In [37]:
ids = list('abcde')

index_name = 'langchain'
index = pc.Index(index_name)

index.upsert(vectors=zip(ids, vectors))

{'upserted_count': 5}

In [38]:
index.upsert(vectors=[('c', [0.5] * 1536)])

{'upserted_count': 1}

In [39]:
index.fetch(ids=['c', 'd'])

FetchResponse(namespace='', vectors={'d': Vector(id='d', values=[0.228338644, 0.183643728, 0.524724066, 0.936456621, 0.92173934, 0.667769849, 0.110751703, 0.862417936, 0.523961902, 0.423062801, 0.178770691, 0.487531394, 0.630570471, 0.992821634, 0.181762397, 0.288045913, 0.383459836, 0.0812024549, 0.244829506, 0.410689563, 0.392926306, 0.242700323, 0.315864235, 0.685895741, 0.974715233, 0.642427087, 0.967482507, 0.436944515, 0.958318591, 0.9381513, 0.903305769, 0.96192193, 0.649740934, 0.832078755, 0.231195554, 0.526208043, 0.893651247, 0.0711210743, 0.404536128, 0.718761384, 0.672405958, 0.693627238, 0.583869755, 0.384239823, 0.637427747, 0.480090261, 0.0111960974, 0.661200404, 0.208129033, 0.672257245, 0.164087176, 0.629050255, 0.858300388, 0.85618943, 0.174485669, 0.24828741, 0.0307302866, 0.678219795, 0.393899322, 0.181511611, 0.193068951, 0.490018904, 0.292595387, 0.134386271, 0.63294816, 0.842868, 0.305610985, 0.366775721, 0.265565127, 0.277140021, 0.0251688585, 0.313319415, 0.92

In [40]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 5}},
 'total_vector_count': 5,
 'vector_type': 'dense'}

In [48]:
index.delete(ids=['b', 'c'])

{}

In [49]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 3}},
 'total_vector_count': 3,
 'vector_type': 'dense'}

In [50]:
index.fetch(ids=['x'])

FetchResponse(namespace='', vectors={}, usage={})

In [51]:
# querying vectors
query_vector = [random.random() for _ in range(1536)]

In [52]:
index.query(
    vector=query_vector,
    top_k=3,
    include_values=False
)

{'matches': [{'id': 'd', 'score': 0.769526422, 'values': []},
             {'id': 'a', 'score': 0.759751, 'values': []},
             {'id': 'e', 'score': 0.753729582, 'values': []}],
 'namespace': '',
 'usage': {}}

# Namespaces

In [53]:
# index.describe_index_stats()
index = pc.Index('langchain')

import random
vectors = [[random.random() for _ in range(1536)] for v in range(5)]
ids = list('abcde')
index.upsert(vectors=zip(ids, vectors))

{'upserted_count': 5}

In [54]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 5}},
 'total_vector_count': 5,
 'vector_type': 'dense'}

In [66]:
# partition the index into namespaces
# creating a new namespace
vectors = [[random.random() for _ in range(1536)] for v in range(3)]
ids = list('xyz')
index.upsert(vectors=zip(ids, vectors), namespace='first-namespace')

{'upserted_count': 3}

In [67]:
vectors = [[random.random() for _ in range(1536)] for v in range(2)]
ids = list('qp')
index.upsert(vectors=zip(ids, vectors), namespace='second-namespace')

{'upserted_count': 2}

In [68]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 5},
                'first-namespace': {'vector_count': 3},
                'second-namespace': {'vector_count': 2}},
 'total_vector_count': 10,
 'vector_type': 'dense'}

In [70]:
index.fetch(ids=['x'])

FetchResponse(namespace='', vectors={}, usage={})

In [71]:
index.fetch(ids=['x'], namespace='first-namespace')


FetchResponse(namespace='first-namespace', vectors={'x': Vector(id='x', values=[0.533473849, 0.147190616, 0.747546375, 0.537590623, 0.940287709, 0.781368554, 0.95445472, 0.161377698, 0.0827544481, 0.321541071, 0.402374834, 0.60394752, 0.547739387, 0.693449616, 0.287578404, 0.0884743929, 0.810544372, 0.714135587, 0.976102889, 0.673982799, 0.499441475, 0.122603722, 0.455598772, 0.0140321301, 0.673639417, 0.863355, 0.832501769, 0.218212411, 0.451102644, 0.27953735, 0.686908662, 0.953466356, 0.247605547, 0.486392289, 0.906377316, 0.68441391, 0.11594215, 0.309919715, 0.618447423, 0.526837945, 0.364115179, 0.324013889, 0.242156729, 0.656630099, 0.229983896, 0.879355311, 0.31022805, 0.740182579, 0.178337559, 0.753690839, 0.507162333, 0.992311418, 0.647086203, 0.355845362, 0.120605946, 0.704814196, 0.491662621, 0.401002526, 0.331581056, 0.84572494, 0.347235084, 0.301223516, 0.644641817, 0.75024575, 0.671628535, 0.861126781, 0.561178386, 0.56594497, 0.585299, 0.040311534, 0.722127259, 0.9919211

In [72]:
index.delete(ids=['x'], namespace='first-namespace')

{}

In [73]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 5},
                'first-namespace': {'vector_count': 2},
                'second-namespace': {'vector_count': 2}},
 'total_vector_count': 9,
 'vector_type': 'dense'}

In [74]:
index.delete(delete_all=True, namespace='first-namespace')

{}

In [76]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 5},
                'second-namespace': {'vector_count': 2}},
 'total_vector_count': 7,
 'vector_type': 'dense'}

# Splitting and embedding text using langchain

In [77]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
    length_function=len
)

In [78]:
with open('files/churchill_speech.txt') as f:
    churchill_speech = f.read()

chunks = text_splitter.create_documents([churchill_speech])

len(chunks)

300

In [79]:
chunks[0]

Document(metadata={}, page_content='Winston Churchill Speech - We Shall Fight on the Beaches\nWe Shall Fight on the Beaches\nJune 4, 1940')

### Embedding cost

In [80]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

  embeddings = OpenAIEmbeddings()


In [81]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

In [82]:
vector = embeddings.embed_query('abc')
vector

[0.0025581938680261374,
 -0.011365032754838467,
 -0.009624186903238297,
 -0.03909118101000786,
 -0.034448929131031036,
 0.01201608031988144,
 -0.021371355280280113,
 -0.02285744436085224,
 0.01866809092462063,
 -0.0003956266155000776,
 0.0033401588443666697,
 0.019205914810299873,
 -0.002738647162914276,
 -0.004521952476352453,
 -0.01869639754295349,
 0.002996943425387144,
 0.0270326416939497,
 0.010600758716464043,
 0.010820133611559868,
 0.00748705118894577,
 -0.014521199278533459,
 0.017833052203059196,
 -0.007005841936916113,
 -0.01569591648876667,
 -0.020239097997546196,
 -0.0036196850705891848,
 0.009978016838431358,
 -0.02002680115401745,
 0.02626836858689785,
 -0.007345519028604031,
 0.007812575437128544,
 0.013799385167658329,
 -0.007494127843528986,
 -0.009942634031176567,
 -0.010437997058033943,
 -0.014280594885349274,
 -0.013700312934815884,
 -0.015964826568961143,
 0.010395537130534649,
 -0.00026537280064076185,
 0.024357683956623077,
 0.004546720534563065,
 0.014061219990

In [83]:
vector = embeddings.embed_query(chunks[0].page_content)
vector

[-0.044563982635736465,
 -0.0378589928150177,
 -0.0029398079495877028,
 -0.008005200885236263,
 0.01583194173872471,
 0.022600671276450157,
 -0.028528088703751564,
 -0.009662328287959099,
 0.0010850998805835843,
 0.00731685571372509,
 0.007782125845551491,
 0.03281112387776375,
 0.007374217733740807,
 -0.011714616790413857,
 0.0063257659785449505,
 -0.00544621329754591,
 0.01316779013723135,
 -0.002444263081997633,
 0.01355020422488451,
 -0.010956162586808205,
 -0.008132671937346458,
 -0.02684546634554863,
 0.02964983694255352,
 -0.0038432611618191004,
 -0.014455251395702362,
 -0.018496092408895493,
 0.010917921550571918,
 -0.0185853224247694,
 0.0030131039675325155,
 -0.014289538376033306,
 0.007068286184221506,
 -0.008546954020857811,
 -0.016533033922314644,
 0.005162589717656374,
 -0.01833038032054901,
 -0.023837141692638397,
 -0.02233298122882843,
 -0.008789150044322014,
 0.02267715334892273,
 -0.012670652009546757,
 0.013613940216600895,
 0.00460490258410573,
 0.008782776072621346

In [84]:
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(model='text-embedding-3-small', dimensions=1536)

### Insert the embeddings into a pinecone index

In [None]:
import pinecone
# from langchain_community.vectorstores import Pinecone

pc = pinecone.Pinecone()

In [86]:
indexes = pc.list_indexes().names()
for i in indexes:
    pc.delete_index(i)

In [87]:
pc.list_indexes()

[]

In [88]:
# creating an index
from pinecone import ServerlessSpec
index_name = 'churchill-speech'
if index_name not in pc.list_indexes().names():
    print(f'Creating index {index_name}')
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric='cosine',
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        ) 
    )
    print('Index created! 😊')
else:
    print(f'Index {index_name} already exists!')

Creating index churchill-speech
Index created! 😊


In [91]:
vector_store = Pinecone.from_documents(chunks, embeddings, index_name=index_name)

AttributeError: module 'pinecone' has no attribute 'Index'

In [92]:
import pinecone
from langchain_pinecone import Pinecone

pc = pinecone.Pinecone()

In [93]:
pc.list_indexes()

[
    {
        "name": "churchill-speech",
        "metric": "cosine",
        "host": "churchill-speech-gepfqbs.svc.aped-4627-b74a.pinecone.io",
        "spec": {
            "serverless": {
                "cloud": "aws",
                "region": "us-east-1"
            }
        },
        "status": {
            "ready": true,
            "state": "Ready"
        },
        "vector_type": "dense",
        "dimension": 1536,
        "deletion_protection": "disabled",
        "tags": null
    }
]

In [94]:
vector_store = Pinecone.from_documents(
    chunks,
    embeddings,
    index_name=index_name
)

In [95]:
index = pc.Index(index_name)
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 300}},
 'total_vector_count': 300,
 'vector_type': 'dense'}

In [96]:
query = 'Where should we fight?'
result = vector_store.similarity_search(query)
print(result)

[Document(id='abc09de1-fed3-4a6a-a3a4-93a133312251', metadata={}, page_content='shall fight on the beaches, we shall fight on the landing grounds, we shall fight in the fields and'), Document(id='bd4b8740-385b-4915-b897-4fbc5ac260e6', metadata={}, page_content='end, we shall fight in France, we shall fight on the seas and oceans, we shall fight with growing'), Document(id='4edcc678-67f9-4dc6-bb24-7482770b83ac', metadata={}, page_content='streets, we shall fight in the hills; we shall never surrender, and even if, which I do not for a'), Document(id='ea0b666d-bea4-44c0-8bfb-27e5fe20c3ce', metadata={}, page_content='number of the enemy, and fought fiercely on some of the old grounds that so many of us knew so')]


In [97]:
for r in result:
    print(r.page_content)
    print('-' * 50)

shall fight on the beaches, we shall fight on the landing grounds, we shall fight in the fields and
--------------------------------------------------
end, we shall fight in France, we shall fight on the seas and oceans, we shall fight with growing
--------------------------------------------------
streets, we shall fight in the hills; we shall never surrender, and even if, which I do not for a
--------------------------------------------------
number of the enemy, and fought fiercely on some of the old grounds that so many of us knew so
--------------------------------------------------


### Answering questions

In [102]:
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=0.2)

retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 3})

chain = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=retriever)

In [103]:
query = 'Answer only from the provided input. Where should we fight?'
answer = chain.invoke(query)
print(answer)

{'query': 'Answer only from the provided input. Where should we fight?', 'result': 'We shall fight on the beaches, landing grounds, fields, in France, on the seas and oceans, in the streets, and in the hills.'}


In [104]:
query = 'Who was the king of Belgium at that time?'
answer = chain.invoke(query)
print(answer)

{'query': 'Who was the king of Belgium at that time?', 'result': 'The king of Belgium at that time was King Leopold.'}


In [105]:
query = 'What about the French Armies??'
answer = chain.invoke(query)
print(answer)

{'query': 'What about the French Armies??', 'result': 'The French Armies were involved in the fighting and were supposed to advance across the Somme in great strength. The area was held by French troops during the conflict.'}


In [106]:
pip show langchain-google-genai

Name: langchain-google-genai
Version: 2.1.8
Summary: An integration package connecting Google's genai package and LangChain
Home-page: https://github.com/langchain-ai/langchain-google
Author: 
Author-email: 
License: MIT
Location: c:\users\daksh\anaconda3\envs\pytorch\lib\site-packages
Requires: filetype, google-ai-generativelanguage, langchain-core, pydantic
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [107]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

True

In [108]:
import getpass
import os 
if 'GOOGLE_API_KEY' not in os.environ:
    os.environ['GOOGLE_API_KEY'] = getpass.getpass('Provide your google api key: ')

In [111]:
from google import genai

In [112]:
client = genai.Client()

In [118]:
client.models.list(config={"page_size": 100, "query_base": True})

<google.genai.pagers.Pager at 0x1da9c0cd5d0>

In [121]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model='gemini-2.5-pro', temperature=0.9)
response = llm.invoke('Write a paragraph about life on mars in year 2100')
print(response.content)

By 2100, life on Mars was no longer a pioneering fantasy but an established, gritty reality for a few thousand souls. The first generation of native-born Martians inhabited a network of subterranean lava tubes and sprawling, pressurized biodomes, their days dictated by the hum of life-support systems and the ruddy glow of hydroponic farms. Daily existence was a meticulous dance of maintaining the fragile bubble of Earth-like conditions, conducting geological research, and mining for water ice in the polar regions. Children, who knew Earth only as a brilliant blue star in the night sky, developed a unique loping gait in the low gravity and saw the butterscotch-colored sky as perfectly normal. While connected to their ancestral home by a time-lagged stream of data, a distinct Martian identity had taken root—one forged in resilience, community reliance, and a profound, quiet awe for the silent, red world that lay just beyond their reinforced walls.


In [122]:
from langchain.prompts import PromptTemplate
from langchain.chains.llm import LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.tracers import ConsoleCallbackHandler

prompt = PromptTemplate.from_template('You are a content creator. Write me a tweet about {topic}')

chain = LLMChain(
    llm=llm,
    prompt=prompt,
)

topic = 'Why will AI change the world'
response = chain.invoke(
    input=topic,
    config={"callbacks": [ConsoleCallbackHandler()]}
)

print(response)

  chain = LLMChain(


[32;1m[1;3m[chain/start][0m [1m[chain:LLMChain] Entering Chain run with input:
[0m{
  "topic": "Why will AI change the world"
}
[32;1m[1;3m[llm/start][0m [1m[chain:LLMChain > llm:ChatGoogleGenerativeAI] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: You are a content creator. Write me a tweet about Why will AI change the world"
  ]
}
[36;1m[1;3m[llm/end][0m [1m[chain:LLMChain > llm:ChatGoogleGenerativeAI] [20.58s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "Of course! As a content creator, it's key to have options that fit different vibes. Here are a few choices, from punchy and direct to more thought-provoking.\n\n### Option 1: The Punchy & Direct\nAI will change the world because it’s the first tool in history that can learn.\n\nIt’s not just automating labor, it’s automating *discovery*. From medicine to climate change, we’re about to level-up humanity's problem-solving ability.\n\n#AI #FutureOfTech #Innovation\n\n

In [123]:
print(response['text'])

Of course! As a content creator, it's key to have options that fit different vibes. Here are a few choices, from punchy and direct to more thought-provoking.

### Option 1: The Punchy & Direct
AI will change the world because it’s the first tool in history that can learn.

It’s not just automating labor, it’s automating *discovery*. From medicine to climate change, we’re about to level-up humanity's problem-solving ability.

#AI #FutureOfTech #Innovation

---

### Option 2: The Relatable Analogy
Think the internet changed everything? The internet gave us access to information. AI gives us the ability to *understand and act on it* at an impossible scale.

It's the difference between having a library and having a million brilliant librarians working for you 24/7.

#ArtificialIntelligence #Tech #Disruption

---

### Option 3: The Human-Centric Angle
AI's true power isn't replacing humans, but amplifying them.

Imagine every doctor with a genius medical assistant, every scientist with a ti

# System prompt and streaming

In [124]:
from langchain_core.messages import HumanMessage, SystemMessage

llm = ChatGoogleGenerativeAI(model='gemini-2.5-flash')

output = llm.invoke(
    [
        SystemMessage(content="Answer only YES or NO in french"),
        HumanMessage(content='Is fish a mammal?')
    ]
)

output.content

'NON'

## Streaming

In [125]:
llm = ChatGoogleGenerativeAI(model='gemini-2.5-flash', temperature=0)
prompt = 'Write a scientific paper outlining the mathematical foundation of our universe.'

In [126]:
for chunk in llm.stream(prompt):
    print(chunk.content, end='')

## The Mathematical Foundation of the Universe: An Outline of Fundamental Structures and Theories

**Abstract:**
This paper outlines the pervasive and fundamental role of mathematics in describing, and arguably constituting, the physical universe. From the earliest observations to the most advanced theoretical frameworks, mathematical structures have proven to be the indispensable language for comprehending reality. We explore how core mathematical concepts—such as numbers, geometry, symmetry, topology, and calculus—underpin the major pillars of modern physics, including classical mechanics, electromagnetism, general relativity, and quantum field theory. The paper further discusses the ongoing quest for a unified theory, highlighting how contemporary research in quantum gravity and cosmology continues to seek deeper mathematical principles to explain the universe's ultimate nature and origins. The "unreasonable effectiveness" of mathematics in physics is presented not merely as a tool 

ServiceUnavailable: 503 The model is overloaded. Please try again later.