curl -X GET "https://your-development-store.myshopify.com/admin/api/2023-07/products.json" \
-H "X-Shopify-Access-Token: {access_token}"

In [5]:
from bs4 import BeautifulSoup
import requests
import os
from dotenv import load_dotenv
import time
import pandas as pd
pd.set_option("display.max_colwidth", 1000)
load_dotenv()

shop_url = "https://0349d9.myshopify.com"
api_version = "2023-07"

def get_all_products(shop_url, api_version):
    all_products = []
    url = f"{shop_url}/admin/api/{api_version}/products.json"
    headers = {"X-Shopify-Access-Token": os.getenv("SHOPIFY_API_KEY")}
    params = {"limit": 250}
    response = requests.get(url, headers=headers, params=params)
    all_products.extend(response.json()["products"])
    try:
        while response.links["next"]:
            response = requests.get(response.links["next"]["url"], headers=headers)
            all_products.extend(response.json()["products"])
            time.sleep(2)
    except KeyError:
        return all_products
    
def clean_html_tags(row):
    soup = BeautifulSoup(row["body_html"], "html.parser")
    text = soup.get_text()
    row["body_html"] = text
    return row

def get_img_src(row):
    all_images = []
    for image in row["images"]:
        all_images.append(image["src"])
    row["images_list"] = all_images
    return row

def create_expandend_description(row):
    if row["body_html"] == "" and row["tags"] == "":
        row["expanded_description"] = row["title"]
    elif row["body_html"] == "" and row["tags"] != "":
        row["expanded_description"] = "Title: " + row['title'] + " Tags: " + row['tags']
    elif row["body_html"] != "" and row["tags"] == "":
        row["expanded_description"] = "Title: " + row['title'] + " Description: " +row["body_html"]
    else:
        row["expanded_description"] = "Title: " + row['title'] + " Description: " +row["body_html"] + " Tags: " + row['tags']
    return row

def df_preprocessing(df):
    df = df[df["status"] == "active"]
    df.fillna("", inplace=True)
    df = df.apply(lambda row: get_img_src(row), axis=1)
    df = df.apply(lambda row: create_expandend_description(row), axis=1)
    df = df.apply(lambda row: clean_html_tags(row), axis=1)
    df = df.rename(columns={"body_html": "description"})
    df = df[["id", "title", "handle","description", "expanded_description", "images_list"]]
    return df

all_products = get_all_products(shop_url, api_version)
product_df = pd.DataFrame(all_products)
cleaned_df = df_preprocessing(product_df)


cleaned_df.to_csv("products.csv", index=False)
cleaned_products_json = cleaned_df.to_json(orient="records")
with open("products.json", "w") as f:
    f.write(cleaned_products_json)

  soup = BeautifulSoup(row["body_html"], "html.parser")


In [28]:
from langchain.document_loaders.json_loader import JSONLoader
# Define the metadata extraction function.
def metadata_func(record: dict, metadata: dict) -> dict:
    metadata["id"] = record.get("id")
    metadata["title"] = record.get("title")
    metadata["tags"] = record.get("tags")
    metadata["images_list"] = record.get("images_list")
    metadata["handle"] = record.get("handle")
    return metadata


loader = JSONLoader(
    file_path='./products.json',
    jq_schema='.[]',
    content_key="expanded_description",
    metadata_func=metadata_func
)

documents = loader.load()


In [31]:
from langchain.text_splitter import SentenceTransformersTokenTextSplitter
splitter = SentenceTransformersTokenTextSplitter(chunk_overlap=0)

def count_tokens(text):
    return splitter.count_tokens(text=text)

summed_tokens = 0
for product in documents:
    summed_tokens += count_tokens(product.page_content)

summed_tokens / 1000 * 0.0001 

0.0003005

In [1]:
from utils import load_vectorstore
from dotenv import load_dotenv
load_dotenv()

vectorstore = load_vectorstore(vectorstore_path="./shopify_langchaintesting_vectorstore", index_name="products")

In [2]:
result = vectorstore.similarity_search("I want a adidas shoe for my child")

In [4]:
for product in result:
    print(product.page_content)
    print(product.metadata["images_list"])

Title: ADIDAS | KID'S STAN SMITH Description: The Stan Smith owned the tennis court in the '70s. Today it runs the streets with the same clean, classic style. These kids' shoes preserve the iconic look of the original, made in leather with punched 3-Stripes, heel and tongue logos and lightweight step-in cushioning. Tags: adidas, egnition-sample-data, kid
['https://cdn.shopify.com/s/files/1/0772/8542/5497/products/7883dc186e15bf29dad696e1e989e914.jpg?v=1692705055', 'https://cdn.shopify.com/s/files/1/0772/8542/5497/products/8cd561824439482e3cea5ba8e3a6e2f6.jpg?v=1692705056', 'https://cdn.shopify.com/s/files/1/0772/8542/5497/products/2e1f72987692d2dcc3c02be2f194d6c5.jpg?v=1692705056', 'https://cdn.shopify.com/s/files/1/0772/8542/5497/products/6216e82660d881e6f2b0e46dc3f8844a.jpg?v=1692705056', 'https://cdn.shopify.com/s/files/1/0772/8542/5497/products/e5247cc373e3b61f18013282a6d9c3c0.jpg?v=1692705056']
Title: NIKE | TODDLER ROSHE ONE Description: The Nike Roshe One Infant Shoe offers brea

In [11]:
import requests
from dotenv import load_dotenv
load_dotenv()
header = {
    "Content-Type": "application/json",
    "Authorization": "Bearer " + os.getenv("SHOPIFY_STATIC_TOKEN")
}
response = requests.get("http://127.0.0.1:8000/product_search/", params={"query": "I want a adidas shoe for my child", "k": 5}, headers=header)
response.text

'[{"page_content":"Title: ADIDAS | KID\'S STAN SMITH Description: The Stan Smith owned the tennis court in the \'70s. Today it runs the streets with the same clean, classic style. These kids\' shoes preserve the iconic look of the original, made in leather with punched 3-Stripes, heel and tongue logos and lightweight step-in cushioning. Tags: adidas, egnition-sample-data, kid","metadata":{"source":"/home/leon/code/shopify_langchain_testing/products.json","seq_num":3,"id":8497889673561,"title":"ADIDAS | KID\'S STAN SMITH","tags":null,"images_list":["https://cdn.shopify.com/s/files/1/0772/8542/5497/products/7883dc186e15bf29dad696e1e989e914.jpg?v=1692705055","https://cdn.shopify.com/s/files/1/0772/8542/5497/products/8cd561824439482e3cea5ba8e3a6e2f6.jpg?v=1692705056","https://cdn.shopify.com/s/files/1/0772/8542/5497/products/2e1f72987692d2dcc3c02be2f194d6c5.jpg?v=1692705056","https://cdn.shopify.com/s/files/1/0772/8542/5497/products/6216e82660d881e6f2b0e46dc3f8844a.jpg?v=1692705056","https

In [6]:
import os
import base64
token_bytes = os.urandom(24)
safe_token = base64.urlsafe_b64encode(token_bytes).decode('utf-8')
print(safe_token)
print("Token length:", len(safe_token))

18mIeyZHogqcYV--EiSDUWpc9Uscjh02
Token length: 32


In [3]:
response.json()

[{'page_content': "Title: ADIDAS | KID'S STAN SMITH Description: The Stan Smith owned the tennis court in the '70s. Today it runs the streets with the same clean, classic style. These kids' shoes preserve the iconic look of the original, made in leather with punched 3-Stripes, heel and tongue logos and lightweight step-in cushioning. Tags: adidas, egnition-sample-data, kid",
  'metadata': {'source': '/home/leon/code/shopify_langchain_testing/products.json',
   'seq_num': 3,
   'id': 8497889673561,
   'title': "ADIDAS | KID'S STAN SMITH",
   'tags': None,
   'images_list': ['https://cdn.shopify.com/s/files/1/0772/8542/5497/products/7883dc186e15bf29dad696e1e989e914.jpg?v=1692705055',
    'https://cdn.shopify.com/s/files/1/0772/8542/5497/products/8cd561824439482e3cea5ba8e3a6e2f6.jpg?v=1692705056',
    'https://cdn.shopify.com/s/files/1/0772/8542/5497/products/2e1f72987692d2dcc3c02be2f194d6c5.jpg?v=1692705056',
    'https://cdn.shopify.com/s/files/1/0772/8542/5497/products/6216e82660d881e6

In [5]:
"https://0349d9.myshopify.com/products/" + response.json()[0]["metadata"]["handle"]

'https://0349d9.myshopify.com/products/adidas-kids-stan-smith'