In [42]:
import pinecone
import openai
import numpy as np
import os
from dotenv import load_dotenv

# Langchain
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.document_loaders import TextLoader

# Load variables from the .env file
load_dotenv('./Sn33k/.env')

# Access the variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
PINECONE_ENVIRONMENT= os.getenv("PINECONE_ENVIRONMENT")

openai.api_key = OPENAI_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

# Pinecone Init

In [43]:

# initialize pinecone
pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_ENVIRONMENT,  # next to api key in console
)

index_name = "singlife"

embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
# if you already have an index, you can load it like this
docsearch = Pinecone.from_existing_index(index_name, embeddings)

# List all indexes information
index_description = pinecone.describe_index(index_name)
print('index_description: ', index_description)

index = pinecone.Index(index_name) 
index_stats_response = index.describe_index_stats()
print('index_stats_response: ', index_stats_response)

index_description:  IndexDescription(name='singlife', metric='cosine', replicas=1, dimension=1536.0, shards=1, pods=1, pod_type='p1', status={'ready': True, 'state': 'Ready'}, metadata_config=None, source_collection='')
index_stats_response:  {'dimension': 1536,
 'index_fullness': 0.1,
 'namespaces': {'': {'vector_count': 3488}},
 'total_vector_count': 3488}


## String Query embeddings

In [36]:
from langchain.vectorstores import Pinecone

text_field = "text"

# switch back to normal index for langchain
index = pinecone.Index(index_name)

vectorstore = Pinecone(
    index, embeddings.embed_query, text_field
)

# Testing Querying embedding       

In [23]:
MODEL = "text-embedding-ada-002"
res = openai.Embedding.create(
    input=[
        "Sample document text goes here",
        "there will be several phrases in each batch"
    ], engine=MODEL
)

In [5]:
# extract embeddings to a list
embeds = [record['embedding'] for record in res['data']]
len(embeds)

2

In [39]:
query = "Critical Illness Singlife"

xq = openai.Embedding.create(input=query, engine=MODEL)["data"][0]["embedding"]
len(xq)
xq

[-0.0017477519577369094,
 -0.0030590095557272434,
 0.011774701997637749,
 -0.02085236646234989,
 -0.013066441752016544,
 0.013286462984979153,
 -0.0447140596807003,
 0.0001397314335918054,
 -0.00040056344005279243,
 -0.0019038962200284004,
 0.01332904864102602,
 0.025579281151294708,
 0.000594856683164835,
 0.023563599213957787,
 0.015046635642647743,
 0.0035859965719282627,
 0.03506150096654892,
 -0.021334994584321976,
 0.01829727739095688,
 -0.01791401393711567,
 -0.011228197254240513,
 0.030689459294080734,
 -0.0034955039154738188,
 -0.0009288813453167677,
 0.0032879028003662825,
 0.0066290367394685745,
 -0.0002708128304220736,
 -0.03398268669843674,
 -0.007977556437253952,
 0.008871837519109249,
 0.024514660239219666,
 -0.02794983610510826,
 -0.01751655526459217,
 -0.010738471522927284,
 0.0035859965719282627,
 0.008296942338347435,
 -0.007402660790830851,
 0.0013706987956538796,
 0.01034101378172636,
 -0.0075517077930271626,
 0.02052588202059269,
 0.006380625534802675,
 0.01554345

In [40]:
res = index.query([xq], top_k=5, include_metadata=True)

In [41]:
for match in res['matches']:
    print(f"{match['score']:.2f}: {match['metadata']['text']}")

0.89: Singlife Multipay Critical Illness
A plan that provides multiple payouts for different stages of critical illnesses and
medical conditions, so you’re supported at every unexpected turn Always stay
protected financially
Life can hit you with the unexpected. Stay
ahead and be protected for you and your
family. With Singlife Multipay Critical Illness,
you can stay in control of your finances
while focusing on your recovery, worry-free.
Stage 3B breast 4 in 10 8 out of 10
cancer patients face a
stroke patients will colorectal cancer
70-90%
suffer another stroke patients who suffer a
chance of recurrence within the next 10 years2 recurrence develop
in 10 years1 cancer that involves
the liver3 BENEFITS AND FEATURES
Singlife Multipay Critical Illness is a plan that keeps supporting you financially through different stages of critical
illnesses and recurrences of specified critical illnesses, so that you can focus on your recovery, worry-free.
Choose your desired duration of coverage Pay

# Test Langchain Vectorstore

In [44]:
vectorstore = Pinecone(index, embeddings.embed_query, "text")

In [45]:
query = "What kinds of singlife insurance is there?"

vectorstore.similarity_search(
query, # our search query
k=3 # return 3 most relevant docs
)

[Document(page_content='Singlife Direct Purchase Insurance\nAn attractively affordable life insurance plan that gives you up to S$400,000 coverage -\nand you can purchase it all on your own. Take charge of your\nKnow what and how much\ncoverage you need?\nprotection needs\n“YES!”\nSure you can afford the premiums?\n“YES!”\nWith Singlife Direct\nPurchase Insurance, you get\nvalue-for-money protection. Don’t need explanations or\nIt covers you for up to guidance from a financial\nadviser representative?\nS$400,000, allows you to\n“YES!”\nadd protection for cancer\nand other critical illnesses,\nThen this plan is for you.\nand you can purchase\nit all on your own.\nIt’s that straightforward! BENEFITS AND FEATURES\nSinglife Direct Purchase Insurance is a life insurance plan with optional critical illness coverage that you can purchase all on your own. It keeps\nyou and your loved ones financially afloat with a lump-sum payout if you die, become terminally ill or suffer from total and perma

### Sample Question Answer chain

In [48]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

# completion llm
llm = ChatOpenAI(
openai_api_key=OPENAI_API_KEY,
model_name='gpt-3.5-turbo',
temperature=0.0
)

qa = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vectorstore.as_retriever(),
)

In [49]:
qa.run(query)

'Singlife offers several types of insurance:\n\n1. Singlife Direct Purchase Insurance: This is a life insurance plan that provides coverage for up to S$400,000. It offers protection in the event of death, terminal illness, or total and permanent disability. You can choose between a fixed period of coverage or lifelong protection.\n\n2. Singlife Personal Accident Insurance: This is a personal accident plan that covers expenses incurred due to accidental injury. It also provides coverage for children for free. The plan includes benefits such as payout for accidental death, daily hospital allowance, permanent disablement, and medical expenses.\n\n3. Singlife Corporate Travel Insurance: This insurance is designed for business travel and provides coverage for employees during their trips. It includes benefits such as extended length of cover, trip cancellation for any reason, accidental injury coverage, terrorism coverage, and COVID-19 coverage.\n\n4. Singlife Home Insurance: This insurance

# Scuffed way to delete

DONT RUN PLS 

In [34]:
query = 'Nothing'
xq = openai.Embedding.create(input=query, engine=MODEL)
# Get all the data TopK=10000
res = index.query(xq['data'][0]['embedding'], top_k=10000)
print('res: ', res)

# Delete all the data
delete_ids = [match['id'] for match in res['matches']]
print(f'Deleting {len(delete_ids)} documents')

delete_response = index.delete(ids=delete_ids)

res:  {'matches': [{'id': '16', 'score': 0.735422075, 'values': []},
             {'id': '0', 'score': 0.727659822, 'values': []},
             {'id': '14', 'score': 0.727108777, 'values': []},
             {'id': '5', 'score': 0.72657907, 'values': []},
             {'id': '2', 'score': 0.726052165, 'values': []},
             {'id': '4', 'score': 0.724261224, 'values': []},
             {'id': '1', 'score': 0.721723616, 'values': []},
             {'id': '12', 'score': 0.72143203, 'values': []},
             {'id': '15', 'score': 0.721340716, 'values': []},
             {'id': '11', 'score': 0.72042191, 'values': []},
             {'id': '7', 'score': 0.718745232, 'values': []},
             {'id': '9', 'score': 0.716050208, 'values': []},
             {'id': '3', 'score': 0.714014769, 'values': []},
             {'id': '13', 'score': 0.711957932, 'values': []},
             {'id': '6', 'score': 0.710202634, 'values': []},
             {'id': '10', 'score': 0.708902121, 'values': []}