In [None]:
!pip install pinecone-client openai tiktoken
!pip install wikipedia
!pip install langchain

In [21]:
from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
import openai
import pinecone

In [22]:
open_ai_key = "xxx"
index_name = "xxx"
env_name = "xxxx"
api_key = "xxxx"

In [None]:
docs = WikipediaLoader(query="Bitcoin", load_max_docs=2).load()

text_splitter = CharacterTextSplitter(
    chunk_size=1000, chunk_overlap=20
)
documents = text_splitter.split_documents(docs)

In [35]:
model_name = 'text-embedding-ada-002'

embed = OpenAIEmbeddings(
    model=model_name,
    openai_api_key=open_ai_key
)

In [43]:
# initialize pinecone
pinecone.init(
    api_key=api_key,
    environment=env_name,
)

if index_name not in pinecone.list_indexes():
    pinecone.create_index(name=index_name, metric="cosine", dimension=1536)

texts = [page.page_content for page in documents]

docsearch = Pinecone.from_texts(texts, embed,  metadatas=[
    {"bot_id": "testing"}
    for _ in range(len(texts))
], index_name=index_name)

In [42]:
docsearch.similarity_search("what is bitcoin", k=2, filter={
    "bot_id": "6f3aaa2bb8b2f626e84f219ce325e9a4"
})

[Document(page_content='Bitcoin (abbreviation: BTC or XBT; sign: ₿) is the first decentralized cryptocurrency. Nodes in the peer-to-peer bitcoin network verify transactions through cryptography and record them in a public distributed ledger, called a blockchain, without central oversight. Consensus between nodes is achieved using a computationally intensive system based on proof-of-work called mining. Bitcoin mining requires increasing quantities of electricity and was responsible for 0.2% of world greenhouse gas emissions as of 2022.Based on a free market ideology, bitcoin was invented in 2008 by Satoshi Nakamoto, an unknown person. Use of bitcoin as a currency began in 2009, with the release of its open-source implementation.:\u200ach. 1\u200a In 2021, El Salvador adopted it as legal tender. Bitcoin is currently used more as a store of value and less as a medium of exchange or unit of account. It is mostly seen as an investment and has been described by many scholars as an economic b

In [47]:
index = pinecone.Index(index_name)
vectorstore = Pinecone(index, embed.embed_query, "text")



In [49]:
vectorstore.add_texts(["You need to set up a Neo4j 5.11 or greater to follow along with the examples in this blog post. The easiest way is to start a free instance on Neo4j Aura, which offers cloud instances of Neo4j database. Alternatively, you can also set up a local instance of the Neo4j database by downloading the Neo4j Desktop application and creating a local database instance.!"], metadatas=[{"bot_id": "safklnavio h239hoi2hvcow"}])

['9e7b0e05-0f3c-45cd-a9b6-d54381d2df95']