In [1]:
import os
from urllib.parse import urlparse

domain = "https://www.grainger.com"
local_domain = urlparse(domain).netloc
# Create necessary directories if they don't exist
if not os.path.exists("text/"):
    os.mkdir("text/")
if not os.path.exists(f"text/{local_domain}/"):
    os.mkdir(f"text/{local_domain}/")
if not os.path.exists("processed"):
    os.mkdir("processed")

In [2]:
import requests
import json


# List of part numbers to query
part_number = "1VCE8"  # Add more part numbers as needed

# Base URL for the endpoint
base_url = "https://mobile-rest-qa.nonprod.graingercloud.com/v1/product/detail"

# File to store the results
file_path = f"text/{local_domain}/{part_number}.txt"

# Headers for the request
headers = {
    "Content-Type": "application/json"
}

# Function to fetch and process data
def fetch_product_details(part_number):
    params = {
        "partNumbers": part_number,
        "extraInfo": "false"
    }
    
    response = requests.get(base_url, headers=headers, params=params)
    if response.status_code == 200:
        data = response.json()[0]  # Assuming the response contains a list with one item

        brand = data.get("brand", {}).get("name", "N/A")
        code = data.get("code", "N/A")
        name = data.get("name", "N/A")
        picture_url = data.get("pictureUrl600", "N/A")
        price = data.get("priceData", {}).get("formattedPrice", "N/A")
        description = data.get("productDetailsDescription", "N/A")

        return {
            "Brand": brand,
            "Code": code,
            "Name": name,
            "PictureUrl600": picture_url,
            "Price": price,
            "Description": description
        }
    else:
        return None

# Fetch details for each part number and write to file
with open(file_path, "w") as file:
    details = fetch_product_details(part_number)
    if details:           
        file.write(json.dumps(details) + "\n")
        print(details)
    else:
        file.write(f"Failed to fetch details for part number: {part_number}\n")

print("Product details have been written to", file_path)


{'Brand': 'DAYTON', 'Code': '1VCE8', 'Name': 'DAYTON Standard-Duty Industrial Fan: 24 in Blade Dia, 2 Speeds, 3,850/6,200 cfm, 115 V AC', 'PictureUrl600': 'https://static.grainger.com/rp/s/is/image/Grainger/1VCF3_AS02?$lgmain$', 'Price': '$474.08', 'Description': '<p>Standard-duty industrial fan heads provide cooling in heavy manufacturing areas and other dusty or dirty environments. These fan blade, motor, and guard assemblies can be paired with a new or existing bracket or base.</p>'}
Product details have been written to text/www.grainger.com/1VCE8.txt


In [3]:
import pandas as pd
import os

# Create a list to store the text files
texts=[]

domain_dir = os.path.join("text", local_domain)
abs_domain_dir = os.path.abspath(domain_dir)

if os.path.exists(abs_domain_dir):
    # Get all the text files in the text directory
    for file in os.listdir(abs_domain_dir):
        
        try:
            # Open the file and read the text
            with open("text/" + local_domain + "/" + file, "r") as f:
                text = f.read()
                texts.append(text)
        except Exception as e:
            print(f"Exception occurred during reading file '{text}': {e}")       
else:
    print(f"Directory '{abs_domain_dir}' does not exist.")
    
# Create a dataframe from the list of texts
df = pd.DataFrame(texts, columns = ['Brand', 'Code', 'Name', 'PictureUrl60', 'Price', 'Description'])

# Set the text column to be the raw text with the newlines removed
df['text'] = df.fname + ". " + df.text
df.to_csv('processed/scraped.csv')
df.head()

ValueError: Shape of passed values is (1, 1), indices imply (1, 6)