In [1]:
import requests,json
import pandas as pd
from bs4 import BeautifulSoup as bs
from tqdm import tqdm

In [2]:
def get_collections(): ## Gets all the product collections (categories).
    headers = {
        'authority': 'foreignfortune.com',
        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'accept-language': 'en-US,en;q=0.9,en-IN;q=0.8',
        'cache-control': 'no-cache',
        'dnt': '1',
        'pragma': 'no-cache',
        'upgrade-insecure-requests': '1',
        'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0',
    }

    response = requests.get('https://foreignfortune.com/products/', headers=headers)
    soup = bs(response.content,'lxml')
    collections = ['https://foreignfortune.com/'+x['href'] for x in soup.find_all('a',{'class':'collection-grid-item__link'}) if x['href']!="#"]
    return collections

In [3]:
def get_collection_products(collection_link):
    products = []
    for i in range(1,10):
        headers = {
            'authority': 'foreignfortune.com',
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'accept-language': 'en-US,en;q=0.9,en-IN;q=0.8',
            'cache-control': 'no-cache',
            'dnt': '1',
            'pragma': 'no-cache',
            'referer': 'https://foreignfortune.com/collections/',
            'upgrade-insecure-requests': '1',
            'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0',
        }
        response = requests.get(collection_link+f'?page={i}', headers=headers)
        soup = bs(response.content,'lxml')
        page_products = ['https://foreignfortune.com/'+x['href'] for x in soup.find_all('a',{'class':'grid-view-item__link'})]
        if len(page_products) == 0:
            break
        products.extend(page_products)
    return products

In [4]:
def get_all_product_links():
    all_product_links = []
    collections = get_collections()
    for c in tqdm(collections,desc='Iterating through collections'):
        all_product_links.extend(get_collection_products(c))
    return all_product_links

In [5]:
all_product_links = get_all_product_links

In [2]:
def details_formatting(parsed_details): # Formatting the parsed details into standardised form
    keys = ["product_id", "title", "image", "price", "currency", "description", "sale_prices", "prices", "images", "url", "brand", "models"]
    variants = []
    colors = list(set([x['option2'] for x in parsed_details['variants']]))
    models = []
    if colors != ['']:
        color_variants = {}
        for c in colors:
            color_variants.update({c:[q for q in parsed_details['variants'] if q['option2']==c]})
        models = []
        for cl in colors:
            variants = []
            for x in color_variants[cl]:
                try:
                    image = 'https:'+x['featured_image']['src']
                except:
                    image = parsed_details['image']
                variants.append({"id":x['id'],"image":image,"price":x['price']/100,"size":x['option1'],"style":x['option3']})
                models.append({"color":cl,"variants":variants})
    if models ==[]:
        color = ""
        for x in parsed_details['variants']:
            try:
                image = 'https:'+x['featured_image']['src']
            except:
                image = parsed_details['image']
            variants.append({"id":x['id'],"image":image,"price":x['price']/100,"size":x['option1'],"style":x['option3']})
        models = [{"color":color,"variants":variants}]
    formatted_details = {keys[0]:parsed_details['handle'],keys[1]:parsed_details["title"],keys[2]:parsed_details['image'],
                        keys[3]:parsed_details['price']/100,keys[4]:parsed_details["currency"],keys[5]:parsed_details['description'],keys[6]:[parsed_details['price_min']/100],
                        keys[7]:[parsed_details['price_max']/100],keys[8]:['https:'+x['src'] for x in parsed_details['media']],
                        keys[9]:parsed_details['url'],keys[10]:parsed_details['vendor'],keys[11]:models}
    return formatted_details

In [3]:
def get_product_details(product_url): ## Extracting Details from a product url and returns them in a specific format in a dictionary.
    headers = {
        'authority': 'foreignfortune.com',
        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'accept-language': 'en-US,en;q=0.9,en-IN;q=0.8',
        'cache-control': 'no-cache',
        'dnt': '1',
        'pragma': 'no-cache',
        'upgrade-insecure-requests': '1',
        'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0',
    }

    response = requests.get(
        product_url,
        headers=headers,
    )
    soup = bs(response.content,'lxml')
    parsed_details = json.loads(soup.find('script',{'id':'ProductJson-product-template'}).text)
    try:
        parsed_details['image'] = json.loads(soup.find_all('script',{'type':'application/ld+json'})[1].text)['image'][0]
    except:
        parsed_details["image"] = soup.find('meta',{'property':'og:image:secure_url'})['content']
    parsed_details["description"] = soup.find('meta',{'property':'og:description'})['content']
    parsed_details["url"] = product_url
    parsed_details["currency"] = soup.find('meta',{'property':'og:price:currency'})['content']
    return details_formatting(parsed_details)

In [4]:
get_product_details('https://foreignfortune.com/collections/small-logo-embroidery-t-shirts-1/products/foreign-fortune-collection-joggers-1')

{'product_id': 'foreign-fortune-collection-joggers-1',
 'title': 'Foreign Fortune Collection Joggers- Adult',
 'image': 'https://foreignfortune.com/cdn/shop/products/D30946DA-5D18-48D6-9890-63DC5DB36F77_1242x.jpg?v=1647614117',
 'price': 180.0,
 'currency': 'USD',
 'description': 'Our Foreign Fortune Collection Joggers are great for the entire family. Very comfortable and versatile. They come in 5 different colors and can be worn during any season.',
 'sale_prices': [180.0],
 'prices': [180.0],
 'images': ['https://foreignfortune.com/cdn/shop/products/D30946DA-5D18-48D6-9890-63DC5DB36F77.jpg?v=1647614117',
  'https://foreignfortune.com/cdn/shop/products/3EE2E126-721C-4DDE-A1EB-E3B355F2B674.jpg?v=1647614117',
  'https://foreignfortune.com/cdn/shop/products/C8134772-8681-4EF4-810A-970DCBD13F28.jpg?v=1647614117',
  'https://foreignfortune.com/cdn/shop/products/827B72E9-79B6-4401-AE76-29F3AFDA483F.jpg?v=1647614117',
  'https://foreignfortune.com/cdn/shop/products/DD1E038F-07C5-410A-A846-A5