# MJ Price Scraper Tool v1.0

## Imports

In [1]:
# [1]
# Imports
import requests
import time
import re
import json 

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By

# Change this variable to one of the above choices: (Concentrate, Flower, Carts, ...)

filter = "Flower"
dispos = []
filters = []

# Opening JSON file
with open('lib/dictionary.json') as json_file:
    data = json.load(json_file)
    dispos = data["dispos"]
    filters = data["filters"]



filters = data["filters"]




In [2]:
# Session options
options = Options()
options.page_load_strategy = 'normal'
options = webdriver.ChromeOptions()

#Load session
#options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

## Definitions

In [3]:
#[2]
# Product class w/ attrs.
class Product:
  def __init__(self, name, company, type, strain, thc, prices, dispo, dpg):
    self.name = name
    self.company = company
    self.type = type # Category (Flower, Wax, ...)
    self.strain = strain # Indica/Sativa/Hybrid
    self.thc = thc # Percentages
    self.prices = prices # List of prices/weight
    self.dispo = dispo # source
    self.dpg = dpg # $/g
    

In [72]:
# Takes in the details and prices elements and parses through to find the info
# Returned as a single product
def parseProductElements(details_element, price_element):

    _weight = ""
    _price = ""
    _oldprice = ""
    _discount = ""
        
    name_element = details_element.find_element(By.CLASS_NAME, "mobile-product-list-item__ProductName-zxgt1n-6")
    name_text = name_element.text 

    try:
        company_element = details_element.find_element(By.CLASS_NAME, "mobile-product-list-item__Brand-zxgt1n-3")
        company_text = company_element.text
    except:
        company_text = "Company Info Not Found"

    try:
        info_element = details_element.find_element(By.CLASS_NAME, "mobile-product-list-item__DetailsContainer-zxgt1n-1")
        info_text = info_element.text
    except:
        info_text = "Info Not Found"
    
    if (price_element.get_attribute("class") == "mobile-product-list-item__MultipleOptionsContainer-zxgt1n-2"): #Multiple Prices
        # prices and weight elements contained inside button inside div container
        div_elements = price_element.find_elements(By.TAG_NAME, "div")
        for div in div_elements:
            button_element = div.find_element(By.CLASS_NAME, "clickable__StyledButton-uqcx8d-0")
            _weight = button_element.find_element(By.CLASS_NAME, "weight-tile__Label-otzu8j-4").text
            prices = button_element.find_elements(By.CLASS_NAME, "weight-tile__PriceText-otzu8j-5")
            if (len(prices) > 1): #product is dicounted
                _discount = button_element.find_element(By.CLASS_NAME, "weight-tile__DiscountLabel-otzu8j-0").text
                for price in prices:
                    element_class = price.get_attribute("class")
                    for c in element_class.split(" "):
                        if (c == "weight-tile__StrikedOutPrice-otzu8j-6"): # discounted element
                            _oldprice = price.text
                        else:
                            _price = price.text

    else: # Only one option/price
        _weight = price_element.find_element(By.CLASS_NAME, "weight-tile__Label-otzu8j-4").text
        prices = price_element.find_elements(By.CLASS_NAME, "weight-tile__PriceText-otzu8j-5")
        if (len(prices) > 1): #product is discounted (new price and old price share the same class)
            _discount = price_element.find_element(By.CLASS_NAME, "weight-tile__DiscountLabel-otzu8j-0").text
            for price in prices:
                element_class = price.get_attribute("class")
                for c in element_class.split(" "):
                    if (c == "weight-tile__StrikedOutPrice-otzu8j-6"): # discounted element
                        _oldprice = price.text
                    else:
                        _price = price.text


    product = Product(
        name=name_text,
        company=company_text,
        type=info_text,
        thc=info_text,
        prices = [
            {
                "weight" : _weight,
                "price" : _price,
                "oldprice" : _oldprice,
                "discount" : _discount
            }
        ],
        dispo="dispo",
        dpg=0
    )

    return product

def oz_to_g(oz):
    if (oz == "1/8"):
        return "3.5"
    elif (oz == "1/4"):
        return "7.0"
    elif (oz == "1/2"):
        return "14.0"
    elif (oz == "1"):
        return "28.0"
    else:
        return "0.01" #N/A

In [73]:
#[3]
# Function to scrape URL and returns list of product elements to later be parsed through
def scrapeURL(driver, dispo, filter): #DONE: Add filter var

    # Connect to URL
    filterURL = dispos[dispo] + filters[filter]

    driver.get(filterURL)
    driver.implicitly_wait(1.5)
    #time.sleep(5)

    # Age verification
    try:
        driver.find_element(By.CLASS_NAME, "age-confirmation-modal__StyledButton-di8wrk-0").click()
    except:
        print("")

    # Smooth scroll to scrub ALL data
    scheight = .1
    while scheight < 9.9:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight/%s);" % scheight)
        scheight += .0075

    # Main element with all products
    main_element = driver.find_element(By.TAG_NAME, "main")
    
    # Searching for the product lists
    element_products = main_element.find_element(By.CLASS_NAME, "product-list__Container-sc-1arkwfu-1")
    element_div = element_products.find_element(By.TAG_NAME, "div")
    element_prod_list = element_div.find_elements(By.CLASS_NAME, "mobile-product-list-item__Container-zxgt1n-0")
    prod_list = element_prod_list[1:len(element_prod_list)-1]

    # Scraping the product details elements
    elements_products_info = []
    ignored_products = 0
    for elem in prod_list:
        # #if zxgt1n-4 exists
        if (len(elem.find_elements(By.CLASS_NAME, "mobile-product-list-item__ProductDetails-zxgt1n-4")) > 0): 
            details_element = elem.find_element(By.CLASS_NAME, "mobile-product-list-item__ProductDetails-zxgt1n-4")
            # Scraping prices (weights, prices, discounts)
            if ( len(elem.find_elements(By.CLASS_NAME, "mobile-product-list-item__MultipleOptionsContainer-zxgt1n-2")) > 0): # product has multiple options/prices
                prices_element = elem.find_element(By.CLASS_NAME, "mobile-product-list-item__MultipleOptionsContainer-zxgt1n-2") 
                elements_products_info.append((details_element, prices_element))
            else:
                prices_element = elem.find_element(By.CLASS_NAME, "clickable__StyledButton-uqcx8d-0")
                elements_products_info.append((details_element, prices_element))
       
        elif (len(elem.find_elements(By.CLASS_NAME, "mobile-product-list-item__ProductInfoContainer-zxgt1n-5")) > 0): #if zxgt1n-5 exists (inner container) && outside product details container
            temp = elem.find_element(By.CLASS_NAME, "mobile-product-list-item__ProductInfoContainer-zxgt1n-5")
            details_element = temp.find_element(By.CLASS_NAME, "mobile-product-list-item__ProductDetails-zxgt1n-4")
            # Scraping prices (weights, prices, discounts)

            if ( len(details_element.find_elements(By.CLASS_NAME, "mobile-product-list-item__MultipleOptionsContainer-zxgt1n-2")) > 0): # product has multiple options/prices
                prices_element = elem.find_element(By.CLASS_NAME, "mobile-product-list-item__MultipleOptionsContainer-zxgt1n-2") 
                elements_products_info.append((details_element, prices_element))
            elif ( len(details_element.find_elements(By.CLASS_NAME, "clickable__StyledButton-uqcx8d-0")) > 0):
                prices_element = elem.find_element(By.CLASS_NAME, "clickable__StyledButton-uqcx8d-0")
                elements_products_info.append((details_element, prices_element))
        
                

        else:
            print("Product scrape failed from ", dispo)
            ignored_products+=1
        
        
    # Parsing the product details elements
    products = []
    for elem in elements_products_info:
        products.append(parseProductElements(elem[0], elem[1]))
    print(len(products), "products scraped from", dispo, "\n", str(ignored_products), "products not scraped")
  
    return products


## Main

✅ TODO:

- [ ] Clean Berkshire script (60 seconds atm)
- [ ] Add sanctuary script
- [ ] Begin log exports
- [ ] Add edibles
- [ ] Specials
- [ ] Home blurbs

In [28]:
driver = webdriver.Chrome(options=options)

In [9]:
from lib.dispos.Berkshire import scrapeBerkshire

#berkshire_products = scrapeBerkshire(driver, filters[filter])
b1 = scrapeBerkshire(driver, filters["Carts"])
b2 = scrapeBerkshire(driver, filters["Flower"])
b3 = scrapeBerkshire(driver, filters["Concentrate"])
#b4 = scrapeBerkshire(driver, filters["Edibles"]) #TODO: Add to filters and test


13 products scraped from Berkshire! 
 0 products not scraped

34 products scraped from Berkshire! 
 0 products not scraped

Product scrape failed from Berkshire
Product scrape failed from Berkshire
Product scrape failed from Berkshire
74 products scraped from Berkshire! 
 3 products not scraped


KeyError: 'Edibles'

In [12]:
from lib.dispos.Mission import scrapeMission

#mission_products = scrapeMission(driver, filters[filter])
m1 = scrapeMission(driver, filters["Carts"])
m2 = scrapeMission(driver, filters["Flower"])
m3 = scrapeMission(driver, filters["Concentrate"])
#m4 = scrapeMission(driver, filters["Edibles"])


34 products scraped from Mission! 
 0 products not scraped

31 products scraped from Mission! 
 0 products not scraped

32 products scraped from Mission! 
 0 products not scraped


In [33]:

def parse_product(element):

    # Extract text from element
    prod_name = element.find_element(By.CLASS_NAME, "css-4qmyxz").text
    prod_company = element.find_element(By.CLASS_NAME, "css-1ketu3n").text
    prod_type = element.find_element(By.CLASS_NAME, "css-m6dbny").text
    prod_strain = element.find_element(By.CLASS_NAME, "css-1lekzkb").text
    prod_thc = element.find_element(By.CLASS_NAME, "css-g89h0y").text
    prod_prices_element = element.find_element(By.CLASS_NAME, "css-14enic0").find_element(By.CLASS_NAME, "css-1sv1nr7")
    prod_price_element = prod_prices_element.find_element(By.CLASS_NAME, "css-pevxb0")
    prod_price = prod_price_element.text
    
    if(len(prod_price_element.find_elements(By.TAG_NAME, "span")) > 0):
        prod_weight = prod_price_element.find_element(By.TAG_NAME, "span").text
    else:
        prod_weight = "N/A"

    #! check if product is discounted [Slows down the script by 4x- Not advised to use]
    #if (len(prod_prices_element.find_elements(By.CLASS_NAME, "css-kejpgv")) > 0):
    #    prod_old_price = prod_prices_element.find_element(By.CLASS_NAME, "css-kejpgv").text
    #else:
    #    prod_old_price = prod_price
    #prod_discount = "[Formula Not Implemeneted]"

    prod_dispo = ""
    prod_dpg = 0

    product = Product(
        name=prod_name,
        company=prod_company,
        type=prod_type,
        strain=prod_strain,
        thc=prod_thc,
        prices = [
            {
                "weight" : prod_weight,
                "price" : prod_price
            }
        ],
        dispo=prod_dispo,
        dpg=prod_dpg
    )

    return product



In [34]:
ex_filter = "?refinementList%5Broot_types%5D%5B0%5D=extract&sortBy=menu-products-by-price-production"
filterURL = "https://www.iheartjane.com/stores/1990/sanctuary-brookline" + ex_filter

driver.get(filterURL)
driver.implicitly_wait(2.5)
time.sleep(1.5)

# Smooth scroll to scrub ALL data
scheight = .1
while scheight < 9.9:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight/%s);" % scheight)
    scheight += .0075

# This is the "load more" btn (unclickable b/c reactModal) not important rn but TODO: fix to load more products
# btn = driver.find_element(By.CSS_SELECTOR, ".css-1vpb7ta")
 
 # Card elements that contain all of the product details
product_card_elements = driver.find_elements(By.CSS_SELECTOR, ".css-1o21u79")

products = []

for elem in product_card_elements:
    prod = parse_product(elem)
    products.append(prod)

print(len(products), "products scraped")

Product Added: L/S Hash - Fried Lemon Pie x London Mints 11
Product Added: L/S Hash - Juice Box
Product Added: L/S Hash Witch Doctor 1g
Product Added: RSO Syringe
Product Added: L/S Hash - Fried Lemon Pie x London Mints 11
Product Added: L/S Hash - Juice Box
Product Added: L/S Hash Witch Doctor 1g
Product Added: Honey Comb Crunch
Product Added: Shatter - Juice Box
Product Added: Shatter - Lemon Haze
Product Added: Shatter - Mimosa
Product Added: Shatter - Zebra Cake
Product Added: Witch Doctor
Product Added: Crumble - Banana Bread
Product Added: Crumble - Extraterrestrial Bananas
Product Added: Crumble - Flapjacks
Product Added: Crumble - Honey Comb Crunch
Product Added: Crumble - Juice Box
Product Added: Crumble - Lemon Haze
Product Added: Crumble - Lemon Meringue
Product Added: Crumble - Mimosa
Product Added: Crumble - Purple Eclipse
Product Added: Crumble - Treebeard
Product Added: Crumble - Witch Doctor
Product Added: Sauce - Alien Cheese x Dosijo
Product Added: Sauce - Dojo Dwelle

In [36]:
for x in products:
    print(x.name, ":", x.prices[0]['price'])

L/S Hash - Fried Lemon Pie x London Mints 11 : $24.50/1g
L/S Hash - Juice Box : $24.50/1g
L/S Hash Witch Doctor 1g : $24.50/1g
RSO Syringe : $33.60/.5g
L/S Hash - Fried Lemon Pie x London Mints 11 : $24.50/1g
L/S Hash - Juice Box : $24.50/1g
L/S Hash Witch Doctor 1g : $24.50/1g
Honey Comb Crunch : $25.00/.5g
Shatter - Juice Box : $25.00/.5g
Shatter - Lemon Haze : $25.00/.5g
Shatter - Mimosa : $25.00/.5g
Shatter - Zebra Cake : $25.00/.5g
Witch Doctor : $25.00/.5g
Crumble - Banana Bread : $30.00/.5g
Crumble - Extraterrestrial Bananas : $30.00/.5g
Crumble - Flapjacks : $30.00/.5g
Crumble - Honey Comb Crunch : $30.00/.5g
Crumble - Juice Box : $30.00/.5g
Crumble - Lemon Haze : $30.00/.5g
Crumble - Lemon Meringue : $30.00/.5g
Crumble - Mimosa : $30.00/.5g
Crumble - Purple Eclipse : $30.00/.5g
Crumble - Treebeard : $30.00/.5g
Crumble - Witch Doctor : $30.00/.5g
Sauce - Alien Cheese x Dosijo : $30.00/.5g
Sauce - Dojo Dweller : $30.00/.5g
Sauce - Fried Lemon Pie : $30.00/.5g
Sauce - Holy Grail 

In [None]:
# Old code bypassing iheartjane age verification/location
try:
    # Locating verify element
    verify_element = driver.find_element(By.TAG_NAME, "body")   \
          .find_element(By.CLASS_NAME, "ReactModalPortal")      \
          .find_element(By.CLASS_NAME, "ReactModal__Overlay")   \
          .find_element(By.CLASS_NAME, "ReactModal__Content")   \
          .find_elements(By.TAG_NAME, "div")[1]                 \
          .find_element(By.TAG_NAME, "div")  

    # Age Verification                         
    verify_element.find_element(By.TAG_NAME, "div")             \
          .find_element(By.CLASS_NAME, "css-15j6iqq")              \
          .find_element(By.CLASS_NAME, "css-du8qvl").click()

except Exception as e:
    print(e)

try:
          # Location skip
    #verify_element.find_element(By.TAG_NAME, "div")     \
     #     .find_element(By.TAG_NAME, "div")                 \
      #    .find_element(By.CLASS_NAME, "css-1qock7g").click()
    location_element = driver.find_element(By.CLASS_NAME, "empty-class")

    location_element.find_element(By.CLASS_NAME, "ReactModalPortal")      \
          .find_element(By.CLASS_NAME, "ReactModal__Overlay")   \
          .find_element(By.CLASS_NAME, "ReactModal__Content")   \
          .find_elements(By.TAG_NAME, "div")[1]                 \
          .find_element(By.TAG_NAME, "div")                     \
          .find_element(By.TAG_NAME, "div")     \
          .find_element(By.TAG_NAME, "div")                 \
          .find_element(By.CLASS_NAME, "css-1qock7g").click()
except Exception as e:
      print("2", e)


## Display

In [15]:
#[5] Sorting the products and display function
p = m3
p.sort(key=lambda x: x.prices[0]["price"], reverse=False)

def product_to_string():

        ret = []
        sep = "\n"

        for pr in p:
                parsed_product = pr.name + "\n"  + str(pr.prices[0]["price"]) + "\n" + str(pr.prices[0]["weight"]) + pr.dispo + "\n"
                ret.append(parsed_product)
                #print(pr.name, pr.price, pr.weight, pr.dispo)
        
        return sep.join(ret)

print(len(p))
print(product_to_string())

32
Apricot Peach Distillate Dart
18.00
0.5Mission Brookline

Durban Poison Distillate Dart
18.00
0.5Mission Brookline

Grape Distillate Dart
18.00
0.5Mission Brookline

Indica Distillate Dart
18.00
0.5Mission Brookline

Jack Herer Distillate Dart
18.00
0.5Mission Brookline

Pineapple Distillate Dart
18.00
0.5Mission Brookline

Pink Cookies Distillate Dart
18.00
0.5Mission Brookline

Purple Trainwreck Distillate Dart
18.00
0.5Mission Brookline

Sativa Distillate Dart
18.00
0.5Mission Brookline

Strawberry Haze Distillate Dart
18.00
0.5Mission Brookline

Tiger Blood Distillate Dart
18.00
0.5Mission Brookline

Mac and Cheese Shatter ($10 OFF)
30.00
0.5Mission Brookline

BWiz | Live Resin | Crumble
40.00
1.0Mission Brookline

Banana Punch Kief Rosin
40.00
0.5Mission Brookline

Banana Split #2 | Live Resin | Terp Sand *NEW*
40.00
1.0Mission Brookline

Live Sugar - Bootylicious - - Rev Clinics
40.00
0.5Mission Brookline

Motorbreath #15 Live Sugar
40.00
0.5Mission Brookline

Wonka Bars | Liv

## Saving Data

In [None]:
#[6] Saving to logs

import datetime

date =  datetime.datetime.now()
date_fmt = date.strftime("%Y-%m-%d %I-%M-%p")
file_name = "logs/" + filter + "/" + date_fmt + ".txt"
product_csv = product_to_string()

log = open(file_name, "w")

log.write(date_fmt + "\n\n")
log.write("Dispo Deals\n-----------\n")

#TODO: Add Dispo Home Blurb
log.write("Berkshire Roots:\n")
log.write("Sample block of text\n\n\n")

header = str(len(p)) + " Products (" + filter + "):\n"

log.write(header)

log.write(product_csv)

log.close()