In [3]:
import requests
import json
import pandas as pd
from bs4 import BeautifulSoup 
from selenium import webdriver 
from selenium.webdriver.common.keys import Keys 
import time 
import csv



def scrap_product_data():

    ##### Required variables #####

    # API format
    # "https://api-gateway.juno.lenskart.com/v2/products/category/[ID for glass]?page-size=25&page="

    # id for type of glasses correspondingly
    glassType = {
        # "computer_glass": ['8427']
        "eyeglass": ['3363'],
        "sunglass": ['3362'],
        "contact_lenses": ['16637', '16631', '16634', '16639', '16522', '16632', '16553', '16633', '16612', '4592', '8460', '4585', '10413', '16609', '16641', '16630', '16607', '16638', '16643', '16635', '16541', '16640']
    }

    # attributes of dataframe
    cols = [
        'id', 
        'color',
        'size',
        'width',
        'brand_name',
        'model_name',
        'lenskart_price',
        'classification',
        'wishlistCount',
        'purchaseCount',
        'avgRating',
        'totalNoOfRatings',
        'qty'
    ]


    # dataframe to store product data from apis
    df = pd.DataFrame(columns = cols)



    ##### Collecting Data #####

    # extract data from API
    for Gtype in glassType:
        print("Start collecting data for",Gtype,"...")
        for id in glassType[Gtype]:
            # current page
            curr_page = 0

            # API
            api = "https://api-gateway.juno.lenskart.com/v2/products/category/" + id + "?page-size=25&page=" 

            # get data from api
            res = requests.get(api + str(curr_page))

            # extract data from api
            while res.status_code == 200:
                print("Page:", curr_page, end='\r')
                data = json.loads(res.text)
                products = data['result']['product_list']
                if len(products) == 0:
                    break
                for product in products:
                    details = []
                    for item in cols:
                        try:
                            if item == "lenskart_price":
                                value = product['prices'][1]['price']
                            else:
                                value = product[item]  
                        except:
                            value = None
                        details.append(value)
                    df.loc[len(df.index)] = details
                curr_page += 1
                res = requests.get(api + str(curr_page))
        print("Data collection is completed for",Gtype,"^_^")

    # delete duplicate records
    df.drop_duplicates(inplace=True)

    # show data
    # print(df)

    # file name
    filename = "product_data.csv"

    # convert the data into csv
    df.to_csv(filename)
    


In [4]:
scrap_product_data()

Start collecting data for eyeglass ...
Data collection is completed for eyeglass ^_^
Start collecting data for sunglass ...
Data collection is completed for sunglass ^_^
Start collecting data for contact_lenses ...
Data collection is completed for contact_lenses ^_^
