In [1]:
import os

# Add OpenAI library
import openai

# Get Configuration Settings
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
openai.__version__

'0.28.1'

In [3]:
# Configure OpenAI API using Azure OpenAI
openai.api_key = os.getenv("API_KEY")
openai.api_base = os.getenv("ENDPOINT")
openai.api_type = "azure"  # Necessary for using the OpenAI library with Azure OpenAI
openai.api_version = "2024-02-01"  # Latest / target version of the API

In [4]:
from langchain.embeddings import OpenAIEmbeddings

In [5]:
# OpenAI Settings
model_deployment = "text-embedding-ada-002"
# SDK calls this "engine", but naming it "deployment_name" for clarity

model_name = "text-embedding-ada-002"

In [6]:
openai_embeddings: OpenAIEmbeddings = OpenAIEmbeddings(
    openai_api_version = os.getenv("OPENAI_API_VERSION"), openai_api_key = os.getenv("API_KEY"),
    openai_api_base = os.getenv("ENDPOINT"), openai_api_type = "azure"
)

  openai_embeddings: OpenAIEmbeddings = OpenAIEmbeddings(


In [7]:
from langchain_chroma import Chroma

vector_store_contoso = Chroma(
    collection_name="Contoso-Outdoor-Docs",
    embedding_function=openai_embeddings,
    persist_directory="../Contoso-Outdoor-Vector-DB",  # Where to save data locally, remove if not neccesary
)

## Index markdown files

In [22]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from langchain_core.documents import Document

def load_and_process_markdowns(pdf_folder_path):
        """
        This method is responsible for upserting PDF content.
        It loads the PDF file, splits the content into chunks, and then upserts the chunks into VecDB.
        """
        documents = []
        for file in os.listdir(pdf_folder_path):
            if file.endswith(".md"):
                pdf_path = os.path.join(pdf_folder_path, file)
                loader = UnstructuredMarkdownLoader(pdf_path)
                documents.extend(loader.load())
        
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=150, separators=["\n", " ", "?", ".", "!"])
        docs = text_splitter.split_documents(documents)
        return docs

In [23]:
markdown_folder_path =  "../data/manual_info"
splits = load_and_process_markdowns(markdown_folder_path)

In [27]:
print(len(splits))
splits[0]

643


Document(metadata={'source': '../data/manual_info/product_info_2.md'}, page_content='Information about product item_number: 2\n\nAdventurer Pro Backpack, price $90,\n\nBrand\n\nHikeMate\n\nCategory\n\nBackpacks\n\nFeatures\n\n40L capacity for ample storage space\n\nErgonomic design for comfortable carrying\n\nDurable nylon material for long-lasting performance\n\nMultiple compartments and pockets for organized storage\n\nHydration system compatibility with a dedicated hydration bladder sleeve and tube port\n\nAdjustable and padded shoulder straps for a customized fit and enhanced comfort')

In [29]:
import time
from uuid import uuid4

batch_size = 50  # Adjust this batch size based on your rate limit
delay = 60  # Delay in seconds between batches

for i in range(0, len(splits), batch_size):
    batch = splits[i:i+batch_size]
    uuids = [str(uuid4()) for _ in range(len(batch))]
    print(f"Upserting {i} documents")
    # try:
    response = vector_store_contoso.add_documents(documents=batch, ids=uuids)
    #     print(f"Response: {response}")
    # except Exception as e:
    #     print(e)
    time.sleep(delay)  # Delay to prevent hitting rate limits


Upserting 0 documents
Upserting 50 documents
Upserting 100 documents
Upserting 150 documents
Upserting 200 documents
Upserting 250 documents
Upserting 300 documents
Upserting 350 documents
Upserting 400 documents
Upserting 450 documents
Upserting 500 documents
Upserting 550 documents
Upserting 600 documents


## Propmting

In [30]:
from langchain_community.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(file_path="../data/product_info/products.csv")
data = loader.load()

In [31]:
data[0]

Document(metadata={'source': '../data/product_info/products.csv', 'row': 0}, page_content='id: 1\nname: TrailMaster X4 Tent\nprice: 250.0\ncategory: Tents\nbrand: OutdoorLiving\ndescription: Unveiling the TrailMaster X4 Tent from OutdoorLiving, your home away from home for your next camping adventure. Crafted from durable polyester, this tent boasts a spacious interior perfect for four occupants. It ensures your dryness under drizzly skies thanks to its water-resistant construction, and the accompanying rainfly adds an extra layer of weather protection. It offers refreshing airflow and bug defence, courtesy of its mesh panels. Accessibility is not an issue with its multiple doors and interior pockets that keep small items tidy. Reflective guy lines grant better visibility at night, and the freestanding design simplifies setup and relocation. With the included carry bag, transporting this convenient abode becomes a breeze. Be it an overnight getaway or a week-long nature escapade, the T

In [32]:
import pandas as pd
product = pd.read_csv("../data/product_info/products.csv")
product.head()

Unnamed: 0,id,name,price,category,brand,description
0,1,TrailMaster X4 Tent,250.0,Tents,OutdoorLiving,Unveiling the TrailMaster X4 Tent from Outdoor...
1,2,Adventurer Pro Backpack,90.0,Backpacks,HikeMate,Venture into the wilderness with the HikeMate'...
2,3,Summit Breeze Jacket,120.0,Hiking Clothing,MountainStyle,Discover the joy of hiking with MountainStyle'...
3,4,TrekReady Hiking Boots,140.0,Hiking Footwear,TrekReady,Introducing the TrekReady Hiking Boots - stepp...
4,5,BaseCamp Folding Table,60.0,Camping Tables,CampBuddy,CampBuddy's BaseCamp Folding Table is an adven...


In [33]:
product.shape

(20, 6)

In [34]:
uuids = [str(uuid4()) for _ in range(len(data))]
vector_store_contoso.add_documents(documents=data, ids=uuids)

['47f8d2f4-8a98-416a-a7a2-e0619d6e97ff',
 'f440e15b-3c4a-4e04-9936-f7b3779a7ec2',
 'dd685673-69b0-4156-93f5-85eab6cb953f',
 'ace79c00-612b-40df-8a8e-a608e33e198d',
 '446e654c-b973-4960-8a2e-1865350c04f5',
 '27b30f2d-4410-4941-ba28-10c10cd7c87d',
 '1264ab85-d751-4828-a185-ded9b58ca3f2',
 '76585a1c-7590-4007-9800-8176c827bc1c',
 '493ca00e-d39a-4c4a-9f4f-648c918b8731',
 '82ddb6ff-3dec-4220-8425-22d9284bc17f',
 '32317c6d-4343-47b6-8e2b-cd9d42eb5740',
 'f8de7177-2dba-46e5-8ac7-7b708d536cc8',
 'f058f14b-4966-4ba5-9dd9-516a08e552c7',
 '9fa52921-8633-4bec-b5d9-93cfee37475e',
 '2126305c-e4fe-44bf-bef6-350e1d8c6dcd',
 'a97a993a-e832-4175-8991-482f9068182a',
 '6c568e42-f336-44a0-a08a-66cc56029615',
 'f5eb11a8-920b-45a6-ac8b-0f548e1282da',
 '298704ad-effe-4d8a-b6a8-30bd9a8d5400',
 '90d9d88f-9b4d-47ce-99fc-c116427fb03b']

## Azure OpenAI

In [1]:
import os
import openai

openai.api_key = os.getenv("API_KEY")
openai.api_base = os.getenv("ENDPOINT")
openai.api_type = "azure"  # Necessary for using the OpenAI library with Azure OpenAI
openai.api_version = os.getenv("OPENAI_API_VERSION")  # Latest / target version of the API

In [2]:
response = openai.ChatCompletion.create(
    engine="Voicetask", # The deployment name you chose when you deployed the GPT-3.5-Turbo or GPT-4 model.
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Who created GPT 3.5?"}
    ],
    temperature = 0.5
)

print(response)

# To print only the response content text:
# print(response['choices'][0]['message']['content'])

{
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "There is no such thing as GPT 3.5. The latest version of GPT (Generative Pre-trained Transformer) is GPT-3, which was created by OpenAI.",
        "role": "assistant"
      }
    }
  ],
  "created": 1728407424,
  "id": "chatcmpl-AG812jKLYHgDN2nAS4UT4OLDaKmla",
  "model": "gpt-35-turbo",
  "object": "chat.completion",
  "system_fingerprint": null,
  "usage": {
    "completion_tokens": 39,
    "prompt_tokens": 28,
    "total_tokens": 67
  }
}


In [20]:
print(response['choices'][0]['message']['content'])

I'm sorry, but I am unable to provide real-time pricing information for products as it fluctuates frequently. It would be best to check the official website of TrailMaster or online stores selling the product to get accurate and up-to-date pricing information.


In [None]:
response = openai.ChatCompletion.create(
    engine="Voicetask", # The deployment name you chose when you deployed the GPT-3.5-Turbo or GPT-4 model.
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "What is the prize of TrailMaster X4 Tent?"}
    ],
    temperature = 0.5
)

print(response)

# To print only the response content text:
# print(response['choices'][0]['message']['content'])