# This notebook is intended to create the initial knowledgebase for pinecone. We shall be having another page for expanding the knowledgebase


In [23]:
import pandas as pd


In [24]:

df = pd.read_excel("Knowledgebase.xlsx")

In [25]:
df.columns

Index(['ITEM Description', 'GOEDEREN OMSCHRIJVING', 'Goederen Code',
       'CATEGORY'],
      dtype='object')

In [41]:
import os
from dotenv import load_dotenv
load_dotenv(override=True)

True

In [27]:
df

Unnamed: 0,ITEM Description,GOEDEREN OMSCHRIJVING,Goederen Code,CATEGORY
0,Chemical Guys AIR_301_04 Sample Kit,LUCHTVERFRISSER,33073000,
1,LITTLE TREES Car Air Freshener (Cherry),LUCHTVERFRISSER,33073000,
2,LITTLE TREES Car Air Freshener (Watermelon),LUCHTVERFRISSER,33073000,
3,Peach Berry Car Air Fresheners,LUCHTVERFRISSER,33073000,
4,Air Jungles New Car Scent Clip,LUCHTVERFRISSER,33073000,
...,...,...,...,...
138,Elenzga New Women's Midi Dress,DAMES KLEDING,61042200,
139,Mi Lightweight Zipper Hobo Bag,TASSEN,42029900,
140,Women's Square Toe Stiletto High Heel Sandals,DAMES SCHOENEN,64051000,
141,3pcs Women's Golden Styling Buckle Belts,RIEMEN,42033000,


In [28]:
from langchain.embeddings.openai import OpenAIEmbeddings
from pinecone import Pinecone
from pinecone import ServerlessSpec
from langchain_pinecone import PineconeVectorStore

In [42]:
embeddings = OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY_2"))  

In [31]:
import getpass
import os
# Pinecone client
if not os.getenv("PINECONE_API_KEY"):
    os.environ["PINECONE_API_KEY"] = getpass.getpass("Enter your Pinecone API key: ")
pinecone_api_key = os.environ.get("PINECONE_API_KEY")
pc = Pinecone(api_key=pinecone_api_key)

In [32]:
index_name = "ship2aruba" 
if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )

In [43]:
index = pc.Index(index_name)
vector_store = PineconeVectorStore(index=index, embedding=embeddings)


In [45]:
from langchain_core.documents import Document
# How pinecone expects each chunk to be
docs = [Document(page_content="The description of this item is:"+ row['ITEM Description'] + "/nThe description of the goods is:"+row["GOEDEREN OMSCHRIJVING"],metadata={"desc":row["ITEM Description"],"code":row["Goederen Code"],"gdesc":row["GOEDEREN OMSCHRIJVING"],"category":"NaN"}) for index, row in df.iterrows()]


In [46]:
docs # 

[Document(metadata={'desc': 'Chemical Guys AIR_301_04 Sample Kit', 'code': 33073000, 'gdesc': 'LUCHTVERFRISSER', 'category': 'NaN'}, page_content='The description of this item is:Chemical Guys AIR_301_04 Sample Kit/nThe description of the goods is:LUCHTVERFRISSER'),
 Document(metadata={'desc': 'LITTLE TREES Car Air Freshener (Cherry)', 'code': 33073000, 'gdesc': 'LUCHTVERFRISSER', 'category': 'NaN'}, page_content='The description of this item is:LITTLE TREES Car Air Freshener (Cherry)/nThe description of the goods is:LUCHTVERFRISSER'),
 Document(metadata={'desc': 'LITTLE TREES Car Air Freshener (Watermelon)', 'code': 33073000, 'gdesc': 'LUCHTVERFRISSER', 'category': 'NaN'}, page_content='The description of this item is:LITTLE TREES Car Air Freshener (Watermelon)/nThe description of the goods is:LUCHTVERFRISSER'),
 Document(metadata={'desc': 'Peach Berry Car Air Fresheners', 'code': 33073000, 'gdesc': 'LUCHTVERFRISSER', 'category': 'NaN'}, page_content='The description of this item is:P

In [48]:
# Now add all of the docs in the pinceone namespace
uuids = [str(doc.metadata["desc"]).encode("ascii","ignore").decode() for doc in ((docs))]
batch_size = 10
for i in range(0, len(docs), batch_size):
    batch = docs[i:i+batch_size]
    batch_ids = uuids[i:i+batch_size]
    print("Current Batch Index is:",batch_ids)
    vector_store.add_documents(batch,ids=batch_ids)

Current Batch Index is: ['Chemical Guys AIR_301_04 Sample Kit', 'LITTLE TREES Car Air Freshener (Cherry)', 'LITTLE TREES Car Air Freshener (Watermelon)', 'Peach Berry Car Air Fresheners', 'Air Jungles New Car Scent Clip', 'Little Trees Bubble Gum Air Freshener', 'Chemical Guys Cherry Blast Freshener', 'Chemical Guys New Car Smell Freshener', 'Glad ForceFlex Trash Bags', 'TOMALL Vinyl Reflective Stickers']
Current Batch Index is: ['Schenley Steam Mop Cleaner', 'Govee Car LED Lights', 'Mini Displayport to HDMI Adapter', 'MacBook Air 13" Battery', 'Knife Sharpening Stone', 'Cordless LED Table Lamp', 'konhill Mens Fashion Sneakers', 'Looft Air Lighter X (Charcoal Starter)', 'Rain Shower Head (LED)', 'Movie DUNE PART 2 (AMC Exclusive)']
Current Batch Index is: ['SHIEGLAM Floral Blush Palette', 'SHIEGLAM Foundation Stick', 'SHIEGLAM Smart Cookie Palette', 'SHIEGLAM Cream Concealer', 'SHIEGLAM Matte Foundation', 'SHIEGLAM Lip Tint', 'SHIEGLAM Setting Powder', 'SHIEGLAM Setting Spray', 'SHIEGL

KeyboardInterrupt: 

In [50]:
vector_store.similarity_search(query="Something used for freshening",k=2)

[Document(id='Chemical Guys New Car Smell Freshener', metadata={'category': 'NaN', 'code': 33073000.0, 'desc': 'Chemical Guys New Car Smell Freshener', 'gdesc': 'LUCHTVERFRISSER'}, page_content='The description of this item is:Chemical Guys New Car Smell Freshener/nThe description of the goods is:LUCHTVERFRISSER'),
 Document(id='Chemical Guys Cherry Blast Freshener', metadata={'category': 'NaN', 'code': 33073000.0, 'desc': 'Chemical Guys Cherry Blast Freshener', 'gdesc': 'LUCHTVERFRISSER'}, page_content='The description of this item is:Chemical Guys Cherry Blast Freshener/nThe description of the goods is:LUCHTVERFRISSER')]