# MJ Price Scraper Tool v1.0

## Imports

In [1]:
# [1]
# Imports
import requests
import time
import re
import json 

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By

# Change this variable to one of the above choices: (Concentrate, Flower, Carts, ...)

filter = "Flower"
dispos = []
filters = []

# Opening JSON file
with open('lib/dictionary.json') as json_file:
    data = json.load(json_file)
    dispos = data["dispos"]
    filters = data["filters"]



filters = data["filters"]




In [2]:
# Session options
options = Options()
options.page_load_strategy = 'normal'
options = webdriver.ChromeOptions()

#Load session
#options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

## Definitions

In [59]:
#[2]
# Product class w/ attrs.
class Product:
  def __init__(self, name, company, type, thc, prices, dispo, dpg):
    self.name = name
    self.company = company
    self.type = type
    self.thc = thc
    self.prices = prices
    self.dispo = dispo
    self.dpg = dpg
    

In [72]:
# Takes in the details and prices elements and parses through to find the info
# Returned as a single product
def parseProductElements(details_element, price_element):

    _weight = ""
    _price = ""
    _oldprice = ""
    _discount = ""
        
    name_element = details_element.find_element(By.CLASS_NAME, "mobile-product-list-item__ProductName-zxgt1n-6")
    name_text = name_element.text 

    try:
        company_element = details_element.find_element(By.CLASS_NAME, "mobile-product-list-item__Brand-zxgt1n-3")
        company_text = company_element.text
    except:
        company_text = "Company Info Not Found"

    try:
        info_element = details_element.find_element(By.CLASS_NAME, "mobile-product-list-item__DetailsContainer-zxgt1n-1")
        info_text = info_element.text
    except:
        info_text = "Info Not Found"
    
    if (price_element.get_attribute("class") == "mobile-product-list-item__MultipleOptionsContainer-zxgt1n-2"): #Multiple Prices
        # prices and weight elements contained inside button inside div container
        div_elements = price_element.find_elements(By.TAG_NAME, "div")
        for div in div_elements:
            button_element = div.find_element(By.CLASS_NAME, "clickable__StyledButton-uqcx8d-0")
            _weight = button_element.find_element(By.CLASS_NAME, "weight-tile__Label-otzu8j-4").text
            prices = button_element.find_elements(By.CLASS_NAME, "weight-tile__PriceText-otzu8j-5")
            if (len(prices) > 1): #product is dicounted
                _discount = button_element.find_element(By.CLASS_NAME, "weight-tile__DiscountLabel-otzu8j-0").text
                for price in prices:
                    element_class = price.get_attribute("class")
                    for c in element_class.split(" "):
                        if (c == "weight-tile__StrikedOutPrice-otzu8j-6"): # discounted element
                            _oldprice = price.text
                        else:
                            _price = price.text

    else: # Only one option/price
        _weight = price_element.find_element(By.CLASS_NAME, "weight-tile__Label-otzu8j-4").text
        prices = price_element.find_elements(By.CLASS_NAME, "weight-tile__PriceText-otzu8j-5")
        if (len(prices) > 1): #product is discounted (new price and old price share the same class)
            _discount = price_element.find_element(By.CLASS_NAME, "weight-tile__DiscountLabel-otzu8j-0").text
            for price in prices:
                element_class = price.get_attribute("class")
                for c in element_class.split(" "):
                    if (c == "weight-tile__StrikedOutPrice-otzu8j-6"): # discounted element
                        _oldprice = price.text
                    else:
                        _price = price.text


    product = Product(
        name=name_text,
        company=company_text,
        type=info_text,
        thc=info_text,
        prices = [
            {
                "weight" : _weight,
                "price" : _price,
                "oldprice" : _oldprice,
                "discount" : _discount
            }
        ],
        dispo="dispo",
        dpg=0
    )

    return product

def oz_to_g(oz):
    if (oz == "1/8"):
        return "3.5"
    elif (oz == "1/4"):
        return "7.0"
    elif (oz == "1/2"):
        return "14.0"
    elif (oz == "1"):
        return "28.0"
    else:
        return "0.01" #N/A

In [73]:
#[3]
# Function to scrape URL and returns list of product elements to later be parsed through
def scrapeURL(driver, dispo, filter): #DONE: Add filter var

    # Connect to URL
    filterURL = dispos[dispo] + filters[filter]

    driver.get(filterURL)
    driver.implicitly_wait(1.5)
    #time.sleep(5)

    # Age verification
    try:
        driver.find_element(By.CLASS_NAME, "age-confirmation-modal__StyledButton-di8wrk-0").click()
    except:
        print("")

    # Smooth scroll to scrub ALL data
    scheight = .1
    while scheight < 9.9:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight/%s);" % scheight)
        scheight += .0075

    # Main element with all products
    main_element = driver.find_element(By.TAG_NAME, "main")
    
    # Searching for the product lists
    element_products = main_element.find_element(By.CLASS_NAME, "product-list__Container-sc-1arkwfu-1")
    element_div = element_products.find_element(By.TAG_NAME, "div")
    element_prod_list = element_div.find_elements(By.CLASS_NAME, "mobile-product-list-item__Container-zxgt1n-0")
    prod_list = element_prod_list[1:len(element_prod_list)-1]

    # Scraping the product details elements
    elements_products_info = []
    ignored_products = 0
    for elem in prod_list:
        # #if zxgt1n-4 exists
        if (len(elem.find_elements(By.CLASS_NAME, "mobile-product-list-item__ProductDetails-zxgt1n-4")) > 0): 
            details_element = elem.find_element(By.CLASS_NAME, "mobile-product-list-item__ProductDetails-zxgt1n-4")
            # Scraping prices (weights, prices, discounts)
            if ( len(elem.find_elements(By.CLASS_NAME, "mobile-product-list-item__MultipleOptionsContainer-zxgt1n-2")) > 0): # product has multiple options/prices
                prices_element = elem.find_element(By.CLASS_NAME, "mobile-product-list-item__MultipleOptionsContainer-zxgt1n-2") 
                elements_products_info.append((details_element, prices_element))
            else:
                prices_element = elem.find_element(By.CLASS_NAME, "clickable__StyledButton-uqcx8d-0")
                elements_products_info.append((details_element, prices_element))
       
        elif (len(elem.find_elements(By.CLASS_NAME, "mobile-product-list-item__ProductInfoContainer-zxgt1n-5")) > 0): #if zxgt1n-5 exists (inner container) && outside product details container
            temp = elem.find_element(By.CLASS_NAME, "mobile-product-list-item__ProductInfoContainer-zxgt1n-5")
            details_element = temp.find_element(By.CLASS_NAME, "mobile-product-list-item__ProductDetails-zxgt1n-4")
            # Scraping prices (weights, prices, discounts)

            if ( len(details_element.find_elements(By.CLASS_NAME, "mobile-product-list-item__MultipleOptionsContainer-zxgt1n-2")) > 0): # product has multiple options/prices
                prices_element = elem.find_element(By.CLASS_NAME, "mobile-product-list-item__MultipleOptionsContainer-zxgt1n-2") 
                elements_products_info.append((details_element, prices_element))
            elif ( len(details_element.find_elements(By.CLASS_NAME, "clickable__StyledButton-uqcx8d-0")) > 0):
                prices_element = elem.find_element(By.CLASS_NAME, "clickable__StyledButton-uqcx8d-0")
                elements_products_info.append((details_element, prices_element))
        
                

        else:
            print("Product scrape failed from ", dispo)
            ignored_products+=1
        
        
    # Parsing the product details elements
    products = []
    for elem in elements_products_info:
        products.append(parseProductElements(elem[0], elem[1]))
    print(len(products), "products scraped from", dispo, "\n", str(ignored_products), "products not scraped")
  
    return products


## Main

In [3]:
driver = webdriver.Chrome(options=options)

In [11]:
from lib.dispos.Berkshire import scrapeBerkshire

berkshire_products = scrapeBerkshire(driver, filters[filter])

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "C:\Users\walid\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\IPython\core\interactiveshell.py", line 3398, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\walid\AppData\Local\Temp\ipykernel_12676\2133389294.py", line 3, in <cell line: 3>
    berkshire_products = scrapeBerkshire(driver, filters[filter])
  File "c:\Users\walid\projects\MJPriceScraper\lib\dispos\Berkshire.py", line 23, in scrapeBerkshire
  File "C:\Users\walid\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\selenium\webdriver\remote\webdriver.py", line 491, in execute_script
    return self.execute(command, {
  File "C:\Users\walid\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\selenium\webdriver\remote\webdriver.py", 

In [5]:
from lib.dispos.Mission import scrapeMission

mission_products = scrapeMission(driver, filters[filter])

31 products scraped from Mission! 
 0 products not scraped


## Display

In [9]:
#[5] Sorting the products and display function
p = mission_products
p.sort(key=lambda x: x.prices[0]["price"], reverse=False)

def product_to_string():

        ret = []
        sep = "\n"

        for pr in p:
                parsed_product = pr.name + "\n"  + str(pr.prices[0]["price"]) + "\n" + pr.dispo + "\n"
                ret.append(parsed_product)
                #print(pr.name, pr.price, pr.weight, pr.dispo)
        
        return sep.join(ret)

print(len(p))
print(product_to_string())

31
Bubba Cheese | 3.5g
25.00
Mission Brookline

Cream D' Mint | 3.5g
25.00
Mission Brookline

Batch 45 | 3.5g
30.00
Mission Brookline

Pre-Ground | Midnight
32.00
Mission Brookline

Shamrock Shake | Mini Buds | 7g
35.00
Mission Brookline

Cake's Revenge - FM
40.00
Mission Brookline

GSG x GWIZ | 3.5g
40.00
Mission Brookline

Indiana Bubble Gum | 3.5g
40.00
Mission Brookline

Mac and Cheese | 3.5g
40.00
Mission Brookline

Midnight | 3.5g
40.00
Mission Brookline

Bubbleyum | 3.5g
40.00
Mission Brookline

Cherry Punch | 3.5g
40.00
Mission Brookline

GMO | 3.5g
40.00
Mission Brookline

Garlic Mints | 3.5g
40.00
Mission Brookline

J1 -Local Roots
40.00
Mission Brookline

Wedding Cake | 3.5g
40.00
Mission Brookline

Banana Puddintain | 3.5g
44.00
Mission Brookline

Cookies & Cream | 3.5g
44.00
Mission Brookline

Garlic Z | 3.5g
44.00
Mission Brookline

London Pound Mints #2 | 3.5g
44.00
Mission Brookline

Mimosa | 3.5g
44.00
Mission Brookline

Project 4516 | 3.5g
44.00
Mission Brookline

Sta

In [6]:
import re
x = "1/8 oz -"
print(re.sub("[-oz ]", "", x))

1/8


## Saving Data

In [None]:
#[6] Saving to logs

import datetime

date =  datetime.datetime.now()
date_fmt = date.strftime("%Y-%m-%d %I-%M-%p")
file_name = "logs/" + filter + "/" + date_fmt + ".txt"
product_csv = product_to_string()

log = open(file_name, "w")

log.write(date_fmt + "\n\n")
log.write("Dispo Deals\n-----------\n")

#TODO: Add Dispo Home Blurb
log.write("Berkshire Roots:\n")
log.write("Sample block of text\n\n\n")

header = str(len(p)) + " Products (" + filter + "):\n"

log.write(header)

log.write(product_csv)

log.close()