In [1]:
!pip install \
  tiktoken==0.4.0 \
  openai==0.27.7 \
  langchain==0.0.179 \
  pinecone-client \
  datasets==2.13.1



In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('../scraper/data.csv')

In [27]:
df.head()

Unnamed: 0,item_name,text,url
0,Refrigerator Ice and Water Filter,...,https://www.partselect.com//PS11701542-Whirlpo...
1,Refrigerator Door Shelf Bin,...,https://www.partselect.com//PS12364199-Frigida...
2,Refrigerator BELT DRIVE,...,https://www.partselect.com//PS16542496-GE-WE03...
3,Refrigerator Door Shelf Bin,...,https://www.partselect.com//PS11752778-Whirlpo...
4,Refrigerator Oven Bake Element,...,https://www.partselect.com//PS438018-Frigidair...


In [4]:
import tiktoken

tokenizer_name = tiktoken.encoding_for_model('gpt-4')
tokenizer = tiktoken.get_encoding(tokenizer_name.name)

# create the length function
def tiktoken_len(text):
    tokens = tokenizer.encode(
        text,
        disallowed_special=()
    )
    return len(tokens)

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=20,
    length_function=tiktoken_len,
    separators=["\n\n", "\n", " ", ""]
)

In [29]:
from typing_extensions import Concatenate
from uuid import uuid4
from tqdm.auto import tqdm

chunks = []

for j, page in tqdm(df.iterrows()):
    if len(page['text']) < 200:
        # if page content is short we can skip
        continue
    texts = text_splitter.split_text(page['text'])
    chunks.extend([{
        'id': page['item_name'] + f'-{i}',
        'text': texts[i],
        'url': page['url'],
        'chunk': i
    } for i in range(len(texts))])
len(chunks)

20it [00:00, 33.36it/s]


551

In [31]:
import os
import openai
from dotenv import load_dotenv

load_dotenv()

# get API key from top-right dropdown on OpenAI website
openai.api_key = os.getenv("OPENAI_API_KEY") or "OPENAI_API_KEY"

openai.Engine.list()  # check we have authenticated

<OpenAIObject list at 0x14704f890> JSON: {
  "data": [
    {
      "created": null,
      "id": "gpt-3.5-turbo-0301",
      "object": "engine",
      "owner": "openai",
      "permissions": null,
      "ready": true
    },
    {
      "created": null,
      "id": "gpt-3.5-turbo-16k-0613",
      "object": "engine",
      "owner": "openai",
      "permissions": null,
      "ready": true
    },
    {
      "created": null,
      "id": "dall-e-2",
      "object": "engine",
      "owner": "system",
      "permissions": null,
      "ready": true
    },
    {
      "created": null,
      "id": "gpt-3.5-turbo-16k",
      "object": "engine",
      "owner": "openai-internal",
      "permissions": null,
      "ready": true
    },
    {
      "created": null,
      "id": "whisper-1",
      "object": "engine",
      "owner": "openai-internal",
      "permissions": null,
      "ready": true
    },
    {
      "created": null,
      "id": "text-embedding-ada-002",
      "object": "engine",
      "own

In [32]:
embed_model = "text-embedding-ada-002"

res = openai.Embedding.create(
    input=[
        "Sample document text goes here",
        "there will be several phrases in each batch"
    ], engine=embed_model
)

In [33]:
from pinecone import Pinecone

# initialize connection to pinecone (get API key at app.pinecone.io)
api_key = os.getenv("PINECONE_API_KEY") or "PINECONE_API_KEY"
# find your environment next to the api key in pinecone console
# env = os.getenv("PINECONE_ENVIRONMENT") or "PINECONE_ENVIRONMENT"
pc = Pinecone(api_key=api_key)
pinecone = pc

In [35]:
index_name = 'gpt-4-part-data2'

In [36]:
import time
from pinecone import ServerlessSpec

# check if index already exists (it shouldn't if this is first time)
if index_name not in pinecone.list_indexes().names():
    # if does not exist, create index
    pinecone.create_index(
        index_name,
        dimension=len(res['data'][0]['embedding']),
        metric='cosine',
        spec=ServerlessSpec(
            cloud='aws', 
            region='us-west-2'
        )
    )
    # wait for index to be initialized
    while not pinecone.describe_index(index_name).status['ready']:
        time.sleep(1)

# connect to index
index = pinecone.Index(index_name)
# view index stats
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [37]:
from tqdm.auto import tqdm

batch_size = 100  # how many embeddings we create and insert at once

for i in tqdm(range(0, len(chunks), batch_size)):
    # find end of batch
    i_end = min(len(chunks), i+batch_size)
    meta_batch = chunks[i:i_end]
    # get ids
    ids_batch = [x['id'] for x in meta_batch]
    # get texts to encode
    texts = [x['text'] for x in meta_batch]
    # create embeddings (try-except added to avoid RateLimitError)
    try:
        res = openai.Embedding.create(input=texts, engine=embed_model)
    except:
        done = False
        while not done:
            time.sleep(5)
            try:
                res = openai.Embedding.create(input=texts, engine=embed_model)
                done = True
            except:
                pass
    embeds = [record['embedding'] for record in res['data']]
    # cleanup metadata
    meta_batch = [{
        'text': x['text'],
        'chunk': x['chunk'],
        'url': x['url']
    } for x in meta_batch]
    to_upsert = list(zip(ids_batch, embeds, meta_batch))
    # upsert to Pinecone
    index.upsert(vectors=to_upsert)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [01:06<00:00, 11.16s/it]


In [44]:
# query = "How can I install part number PS11752778?"
query = "The ice maker on my Whirlpool fridge is not working. How can I fix it?"

res = openai.Embedding.create(
    input=[query],
    engine=embed_model
)

# retrieve from Pinecone
xq = res['data'][0]['embedding']

# get relevant contexts (including the questions)
res = index.query(vector=xq, top_k=5, include_metadata=True)

In [45]:
res

{'matches': [{'id': 'Refrigerator Water Filter-16',
              'metadata': {'chunk': 16.0,
                           'text': 'Thank you for voting!      Jones  July 20, '
                                   '2017   My icemaker just stopped making ice '
                                   'and I know I need to replace the water '
                                   'filter ive just been dragging my feet '
                                   'about it. Is the icemaker cut off now '
                                   'because I need to replace my filter or is '
                                   'there another reason for it stopping '
                                   'now?     Hello Jones, we recommend '
                                   'replacing the water filter every 6 months, '
                                   'depending on how often you use it. '
                                   'Replacing or not replacing the filter '
                                   'would only affect the

In [46]:
# get list of retrieved text
contexts = [item['metadata']['text'] for item in res['matches']]

augmented_query = "\n\n---\n\n".join(contexts)+"\n\n-----\n\n"+query

In [47]:
print(augmented_query)

Thank you for voting!      Jones  July 20, 2017   My icemaker just stopped making ice and I know I need to replace the water filter ive just been dragging my feet about it. Is the icemaker cut off now because I need to replace my filter or is there another reason for it stopping now?     Hello Jones, we recommend replacing the water filter every 6 months, depending on how often you use it. Replacing or not replacing the filter would only affect the water dispenser, not the icemaker. The most common reason an icemaker does not get water would be a defective water valve WPW10270395. I hope this helps.    34 people found this helpful.   Was this helpful?      Thank you for voting!      Ivan  February 18, 2018   I just replaced my water filter on my Whirlpool gold however its making ice but its not dispensing water or ice. What van i do?  For model number G15SVAXVL01     Hi Ivan,

---

Back to Top      Customer Repair Stories    Average Repair Rating: 4.9 / 5.0, 114 reviews. What's this?  

In [48]:
# system message to 'prime' the model
primer = f"""You are Q&A bot. A highly intelligent system that answers
user questions based on the information provided by the user above
each question. If the information can not be found in the information
provided by the user you truthfully say "I don't know".
"""

res = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": primer},
        {"role": "user", "content": augmented_query}
    ]
)

In [49]:
from IPython.display import Markdown

display(Markdown(res['choices'][0]['message']['content']))

Based on the information provided, it seems that the water filter replacement would not directly affect the functioning of the ice maker. If the ice maker is not working, one possible reason could be a defective water valve. It would be best to check the water valve for any issues and consider replacing it if necessary. Additionally, other possible causes for the ice maker not working could include a clogged fill tube or fill cup, a malfunctioning water inlet valve, or an issue with the icemaker itself. It may be helpful to refer to the manufacturer's instructions or seek professional assistance for troubleshooting and fixing the ice maker.

In [50]:
# system message to 'prime' the model
primer = f"""You are Q&A bot. A highly intelligent system that answers
user questions based on the information provided by the user above
each question. If the information can not be found in the information
provided by the user you truthfully say "I don't know".
"""

res = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": primer},
        {"role": "user", "content": query}
    ]
)

In [51]:
display(Markdown(res['choices'][0]['message']['content']))

There could be several reasons why the ice maker on your Whirlpool fridge is not working. Here are a few troubleshooting steps you can try:

1. Ensure that the ice maker is turned on. Check if the power switch or lever is set to the "On" position.
2. Make sure that the water supply is connected and turned on. Verify if the water line is not kinked or blocked.
3. Clean the ice maker and remove any ice or debris that might be blocking the ice maker's operation. You can also try manually defrosting it.
4. Check the water inlet valve for any clogs or obstructions. If necessary, clean or replace the valve.
5. Verify if the ice maker's temperature is set correctly. It should be around 0 degrees Fahrenheit (-18 degrees Celsius).
6. Check if the ice maker's bin or storage is full. If so, empty it to allow for fresh ice production.
7. Restart your fridge by unplugging it from the power source for a few minutes, then plugging it back in.

If these steps don't resolve the issue, it is recommended to consult the Whirlpool customer support or contact a professional appliance repair technician for further assistance.