In [64]:
from lxml import html
from lxml import objectify
import requests
import json
import pandas as pd
import csv
import numpy as np
import time
import os
os.getcwd()
import Utils

In [67]:
class Scraper_REI:
    def __init__(self):
        self.BASE_URL = 'https://www.rei.com'
        self.HEADERS = {
            'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36",
            'accept-encoding': 'gzip, deflate, br',
            'accept-language': 'en-US,en;q=0.9',
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
            'referer': 'https://www.google.com/',
        }
    
    def getSpecs_REI(self, url):
        tree = self.get(url)

    #     # get product name
    #     metadata = tree.xpath('//script[@data-client-store="product-metadata"]')[0]
    #     metadata = html.tostring(metadata).decode()
    #     metadata = metadata.split('<script type="application/json" data-client-store="product-metadata">')[1].split('</script>')[0]
    #     metadata = metadata.replace('\n', '')
    #     metadata = json.loads(metadata)

        # get product specs
        specs = tree.xpath('//script[@data-client-store="product-details"]')[0]
        specs = html.tostring(specs).decode()
        specs = specs.split('<script type="application/json" data-client-store="product-details">')[1].split('</script>')[0]
        specs = specs.replace('\n', '')
        specs = json.loads(specs)

        # get product price
        summary = tree.xpath('//script[@type="application/ld+json"]')[0]
        summary = html.tostring(summary).decode()
        summary = summary.split('<script type="application/ld+json">')[1].split('</script>')[0]
        summary = summary.replace('\n', '')
        summary = json.loads(summary)

        data = {}
        data['URL'] = url
        data['Name'] = summary["name"]
        data['Brand'] = specs["brand"]
        data['lowPrice'] = summary['offers'][0]['price']
        data['highPrice'] = summary['offers'][-1]['price']
        data['PriceCurrency'] = summary['offers'][0]['priceCurrency']
        if "overall" in specs["reviewsSummary"]:
            data['Rating'] = specs["reviewsSummary"]["overall"]
            data['NumRatings'] = specs["reviewsSummary"]["total"]
        for item in specs["specs"]:
            data[item["name"]] = item["values"][0]
        return data
    
    
    def getSpecs_REI_Garage(self, url):
        tree = self.get(url)

        info = tree.xpath('//script[@id="page-data"]/text()')[0]
        info = info.replace('\n', '')
        info = json.loads(info)
        info = info["product"]
        specs = info['specifications']

        data = {}
        data["URL"] = url
        data["Name"] = info["title"] # maybe use info["cleanTitle"]
        data["Image"] = "https://www.rei.com" + info["media"][0]['product']
        data["Brand"] = info["brand"]["label"] # maybe replace "label" with "canonicalName"
        data["lowPrice"] = info["displayPrice"]["min"]
        data["highPrice"] = info["displayPrice"]["max"]
        data["PriceCurrency"] = "USD" # TODO change "USD" to actually find and grab currency
        if "overall" in info["reviewsSummary"]:
            data['Rating'] = info["reviewsSummary"]["overall"]
            data['NumRatings'] = info["reviewsSummary"]["total"]
        else:
            data['Rating'] = -1
            data['NumRatings'] = 0
        for item in specs["specs"]:
            data[item["name"]] = item["values"][0]
        return data
    
    def getProductsFromPage_REI(self, url):
        tree = self.get(url)

        prodList = tree.xpath('//div[@id="search-results"]/.//a')

        links = []
        for index, item in enumerate(prodList):
            if index % 2 != 0:
                continue
            link = html.tostring(item).decode()
            link = link.split('href="')[1].split('"')[0]
            if 'product' in link:
                links.append(link)
        return links
    
    
    def getPages_REI(self, url):
        tree = self.get(url)

        pages = tree.xpath('//nav[@class="_3-4shQxwfGRyzNItrZFEiC"]/.//a/text()')
        if(len(pages) == 0):
            return [url]
        numPages = int(pages[-1])
        urlList = []
        if "?" in url:
            pagesStr = "&page="
        else:
            pagesStr = "?page="
        for i in range(numPages):
            pageUrl = url + pagesStr + str(i+1)
            urlList.append(pageUrl)
        return urlList
    
    
    def getProducts_REI(self, urls):
        urlPages = getPages_REI(urls) # get urls of each page of the website category
        urlProducts = []
        for url in urlPages: #get urls of each product on each page and append to list
            productsURL = getProductsFromPage_REI(url)
            for productURL in productsURL:
                urlProducts.append(productURL)
        products = []
        for prodURL in urlProducts:
            print(prodURL)
            # different scraping methods needed for rei garage
            if "rei-garage" not in prodURL: 
                specs = getSpecs_REI(self.BASE_URL + prodURL)
            else:
                specs = getSpecs_REI_Garage(self.BASE_URL + prodURL)
            products.append(specs) # append product list to specs
        return products
    
    
    def scrape_REI(self, categories):
        products = {}
        for category in categories:
            print(category)
            products[category] = []
            for url in categories[category]:
                products[category].append(getProducts_REI(url))
        return products
    
    
    # helper function for get requests
    def get(self, url):
        return Utils.get(url, self.HEADERS)

In [9]:
categories = {}

backPackURLs = [
    'https://www.rei.com/c/backpacking-packs',
    'https://www.rei.com/c/day-packs',
    'https://www.rei.com/c/hiking-hydration-packs',
    'https://www.rei.com/c/baby-carrier-packs'
]

tentURLS = [
    'https://www.rei.com/c/backpacking-tents',
    'https://www.rei.com/c/camping-tents',
    
]

bivyURLs = [
    'https://www.rei.com/c/bivy-sacks'
]

sleepingBagURLs = [
    'https://www.rei.com/c/mens-sleeping-bags',
    'https://www.rei.com/c/womens-sleeping-bags',
    'https://www.rei.com/c/double-sleeping-bags',
    'https://www.rei.com/c/kids-sleeping-bags',   
]

categories['Backpack'] = backPackURLs
categories['Tent'] = tentURLS
categories['Bivy'] = bivyURLs
categories['Sleeping Bag'] = sleepingBagURLs


In [65]:
data = scrape_REI(categories)

/product/134072/osprey-atmos-ag-65-pack-mens
/product/168251/rei-co-op-trailbreak-60-pack-mens
/product/141492/osprey-kyte-46-pack-womens
/product/144662/osprey-rook-65-pack-mens
/product/126937/rei-co-op-traverse-35-pack-mens
/rei-garage/product/162516/gregory-paragon-58-pack-mens
/product/125754/gregory-baltoro-65-pack-mens
/product/126717/osprey-aura-ag-65-pack-womens
/product/148589/rei-co-op-flash-55-pack-mens
/product/141491/osprey-kestrel-48-pack-mens
/rei-garage/product/162514/gregory-maven-55-pack-womens
/product/127753/deuter-aircontact-lite-65-10-pack-mens
/rei-garage/product/162515/gregory-paragon-48-pack-mens
/product/126716/osprey-aura-ag-50-pack-womens
/product/144663/osprey-renn-65-pack-womens
/rei-garage/product/168378/alps-mountaineering-baja-60-pack
/product/126707/osprey-atmos-ag-50-pack-mens
/product/168252/rei-co-op-trailbreak-60-pack-womens
/product/111288/osprey-tempest-40-pack-womens
/product/111240/osprey-talon-44-pack-mens
/product/126709/osprey-exos-58-pack-

/product/148549/osprey-daylite-plus-pack-special-edition
/product/148579/rei-co-op-trail-hydro-30l-hydration-pack-3-liters-womens
/product/165343/gregory-nano-h2o-22l-hydration-pack-3-liters
/product/144669/osprey-manta-34-hydration-pack-mens-25-liters
/product/144762/osprey-skimmer-28-hydration-pack-womens
/product/111299/osprey-stratos-36-pack-mens
/product/144666/osprey-skarab-22-hydration-pack-mens-25-liters
/product/113337/osprey-sirrus-36-pack-womens
/product/168501/rei-co-op-tarn-12-pack-kids
/product/144763/osprey-mira-22-hydration-pack-25-liters-womens
/product/170802/camelbak-rim-runner-22-hydration-pack-mens-25-liters
/product/144764/osprey-mira-32-hydration-pack-25-liters-womens
/product/144668/osprey-manta-24-hydration-pack-mens-25-liters
/product/111297/osprey-talon-33-pack-mens
/product/124927/patagonia-nine-trails-28l-pack-mens
/product/145661/gregory-jade-28-pack-womens
/product/177573/osprey-talon-22-pack-mens
/product/165346/gregory-juno-24-h2o-hydration-pack-womens-

/product/111303/osprey-talon-6-hydration-waistpack-mens
/product/126376/gregory-swift-25-hydration-pack-womens-3-liters
/product/142949/gregory-inertia-20-hydration-pack-3-liters
/product/171493/salomon-xa-25-hydration-pack
/product/170804/camelbak-helena-20-hydration-pack-womens-25-liters
/product/111304/osprey-tempest-6-hydration-waistpack-womens
/product/148576/rei-co-op-trail-hydro-20l-hydration-pack-mens-2-liters
/product/142949/gregory-inertia-20-hydration-pack-3-liters
/product/175966/camelbak-octane-25l-hydration-pack-2-liters
/product/175965/camelbak-octane-18l-hydration-pack-2-liters
/product/126377/gregory-swift-30-hydration-pack-womens-3-liters
/product/170805/camelbak-sequoia-24-hydration-pack-womens-3-liters
/product/170801/camelbak-fourteener-26-hydration-pack-mens-3-liters
/product/169398/hydro-flask-down-shift-14-hydration-pack-2-liters
/product/165345/gregory-citro-30-h2o-hydration-pack-mens-3-liters
/product/148930/ultraspire-epic-xt-hydration-pack
/product/126375/gr

/product/168529/big-agnes-dog-house-6-tent
/product/176677/ikamper-skycamp-mini-roof-top-tent
/product/187881/rightline-gear-suv-tent
/product/169454/nemo-aurora-2p-tent-with-footprint
/product/182594/kelty-towpath-3-tent
/product/187021/marmot-guest-house-6p-tent
/product/178880/msr-habitude-6-tent
/product/187887/rightline-gear-midsize-5-short-bed-truck-tent-tall-bed
/product/187694/eureka-space-camp-6-person-tent
/product/187886/rightline-gear-full-size-8-long-bed-truck-tent
/product/187889/rightline-gear-6-compact-bed-truck-tent
/product/187886/rightline-gear-full-size-8-long-bed-truck-tent
/product/187889/rightline-gear-6-compact-bed-truck-tent
/product/187888/rightline-gear-midsize-6-long-bed-truck-tent-tall-bed
/product/187022/marmot-orbit-6p-tent
/product/176677/ikamper-skycamp-mini-roof-top-tent
/product/174378/thule-tepui-ruggedized-kukenam-3-tent
/rei-garage/product/163872/eureka-x-loft-2-person-tent
/product/147956/rei-co-op-superlight-bivy
/product/151610/outdoor-research-

/product/157831/rei-co-op-trailbreak-20-sleeping-bag-womens
/product/164858/mountain-hardwear-bishop-pass-15-sleeping-bag-womens
/product/163034/nemo-forte-20-sleeping-bag-womens
/product/144323/marmot-trestles-elite-eco-20-sleeping-bag-womens
/product/150154/big-agnes-sunbeam-15-sleeping-bag-womens
/product/163037/nemo-disco-15-sleeping-bag-womens
/product/157832/rei-co-op-trailbreak-30-sleeping-bag-womens
/product/164859/mountain-hardwear-bishop-pass-0-sleeping-bag-womens
/product/163035/nemo-disco-30-sleeping-bag-womens
/product/148280/rei-co-op-zephyr-20-sleeping-bag-womens
/product/144322/marmot-trestles-elite-eco-30-sleeping-bag-womens
/product/147929/kelty-cosmic-20-sleeping-bag-womens
/rei-garage/product/184258/eureka-kiewa-40-sleeping-bag-womens
/product/148331/rei-co-op-magma-15-sleeping-bag-womens
/product/127865/marmot-ouray-0-sleeping-bag-womens
/product/148250/rei-co-op-magma-30-sleeping-bag-womens
/product/171346/kelty-galactic-30-sleeping-bag-womens
/product/152793/sea-

UnboundLocalError: local variable 'json' referenced before assignment