In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time
from datetime import datetime
import re

In [2]:
def print_current_date():
    now = datetime.now()
    current_date = now.strftime("%Y-%m-%d")
    return(current_date)

In [3]:
def print_current_time():
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    return(current_time)

In [4]:
def setup_driver():
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service)
    wait = WebDriverWait(driver, 15)
    return driver, wait

In [5]:
def search_product(driver, wait, serial_number):
    driver.get('https://www.google.com')
    search_box = wait.until(EC.presence_of_element_located((By.NAME, 'q')))
    search_query = f"site:flipkart.com {serial_number}"
    search_box.send_keys(search_query)
    search_box.send_keys(Keys.RETURN)
    time.sleep(1)
    try:
        first_result = wait.until(EC.element_to_be_clickable((By.XPATH, "(//h3[@class='LC20lb MBeuO DKV0Md'])[1]")))
        first_result.click()
        time.sleep(1)
    except:
        return "Currently unavailable"

In [6]:
def check_name(driver,wait,serial_number):
    try:
        time.sleep(1)
        check_name_element=driver.find_element(By.XPATH,'//h1//span')
        check_name=check_name_element.text
        combinations = [serial_number]
        
        dot_pos = serial_number.find('.')
        dash_pos = serial_number.find('-')
        if dot_pos != -1:
            combinations.append(serial_number[:dot_pos])
        if dash_pos != -1 and dash_pos != 2:
            combinations.append(serial_number[:dash_pos])
        if dash_pos != -1 and dot_pos != -1:
            middle_portion = serial_number[dash_pos + 1:dot_pos]
            if len(middle_portion)>2:
                combinations.append(middle_portion) 
            
        for item in combinations:
            if item in check_name:
                return True
        return False  
    except Exception:
        return False

In [7]:
def get_product_price(driver, wait):
    try:
        time.sleep(1)
        price_element = driver.find_element(By.XPATH, '//div//div//div//div//div//div[@class="hl05eU"]//div[@class="Nx9bqj CxhGGd"]')
        price = (price_element.text).replace(",","").replace("₹", "")
        return (int)(price)
    except Exception:
        return "Currently unavailable"

In [8]:
def get_product_offers(driver, wait):
    time.sleep(3)
    try:
        view_offers = wait.until(EC.element_to_be_clickable((By.XPATH, '//button/div/div/span')))
        view_offers.click()
        time.sleep(1)
        offer_element = driver.find_element(By.XPATH, '//body//div//div[@class="DOjaWF gdgoEp col-8-12"][@style="padding: 0px 0px 0px 24px;"]//div[@class="cPHDOP col-12-12"]//div[@class="f+WmCe"]')
        offer_string = offer_element.text
        #time.sleep(2)
        offer_list = [line.strip() for line in offer_string.strip().split("\n")]
        time.sleep(2)
        return(offer_list)
    except Exception:
        return "No offers"

In [9]:
def get_strikeout_price(driver, wait):
    time.sleep(1)
    try:
        strike_price = driver.find_element(By.XPATH, '//div//div//div[@class="yRaY8j A6+E6v"]')
        strike = (strike_price.text).replace(",", "").replace("₹", "")
        return (int)(strike)
    except Exception:
        return "Currently unavailable"

In [10]:
def get_seller_name(driver, wait):
    time.sleep(1)
    try:
        seller_name = driver.find_element(By.XPATH, '//div//div//div//div[@id="sellerName"]//span//span')
        seller = seller_name.text
        title_case_name = seller.title()
        return title_case_name
    except Exception:
        return "Currently unavailable"

In [11]:
def get_rating(driver,wait):
    time.sleep(1)
    try:
        get_rating=driver.find_element(By.XPATH,'//div[@class="DOjaWF gdgoEp"]//div[@class="ipqd2A"]')
        rating=(float)(get_rating.text)
        return rating
    except Exception:
        return "NA"

In [12]:
def buy_with_exchange(driver,wait):
    time.sleep(1)
    try:
        buy_with_exchange=driver.find_element(By.XPATH,'//label[@for="BUY_WITH_EXCHANGE"]//div[@class="-KdBdD"]')
        exchange=buy_with_exchange.text
        return exchange
    except Exception:
        return ""

In [13]:
def review_count(driver,wait):
    time.sleep(1)
    try:
        review_count=driver.find_element(By.XPATH,'(//div[@class="col-12-12"]//span)[2]')
        count_text=review_count.text
        count_text=count_text.replace(",", "")
        count = int(re.search(r'\d+', count_text).group())
        return count
    except Exception:
        return "NA"

In [14]:
def rating_5_star(driver,wait):
    time.sleep(3)
    try:
        rating_all=driver.find_element(By.XPATH,'(//div[@class="col-12-12"])[1]')
        rating_all=rating_all.text
        rating_all=rating_all.replace(",", "")
        rating_all = int(re.search(r'\d+', rating_all).group())
        rating_5_star=driver.find_element(By.XPATH,'(//div[@class="BArk-j"])[1]')
        count_text=rating_5_star.text.replace(",", "")
        count = int(re.search(r'\d+', count_text).group())
        count_percentage=((count/rating_all)*100)
        return (int)(count_percentage)
    except Exception:
        return "NA"

In [15]:
def rating_4_star(driver,wait):
    time.sleep(1)
    try:
        rating_all=driver.find_element(By.XPATH,'(//div[@class="col-12-12"])[1]')
        rating_all=rating_all.text
        rating_all=rating_all.replace(",", "")
        rating_all = int(re.search(r'\d+', rating_all).group())
        rating_4_star=driver.find_element(By.XPATH,'(//div[@class="BArk-j"])[2]')
        count_text=rating_4_star.text.replace(",", "")
        count = int(re.search(r'\d+', count_text).group())
        count_percentage=((count/rating_all)*100)
        return (int)(count_percentage)
    except Exception:
        return "NA"

In [16]:
def rating_3_star(driver,wait):
    time.sleep(1)
    try:
        rating_all=driver.find_element(By.XPATH,'(//div[@class="col-12-12"])[1]')
        rating_all=rating_all.text
        rating_all=rating_all.replace(",", "")
        rating_all = int(re.search(r'\d+', rating_all).group())
        rating_3_star=driver.find_element(By.XPATH,'(//div[@class="BArk-j"])[3]')
        count_text=rating_3_star.text.replace(",", "")
        count = int(re.search(r'\d+', count_text).group())
        count_percentage=((count/rating_all)*100)
        return (int)(count_percentage)
    except Exception:
        return "NA"

In [17]:
def rating_2_star(driver,wait):
    time.sleep(1)
    try:
        rating_all=driver.find_element(By.XPATH,'(//div[@class="col-12-12"])[1]')
        rating_all=rating_all.text
        rating_all=rating_all.replace(",", "")
        rating_all = int(re.search(r'\d+', rating_all).group())
        rating_2_star=driver.find_element(By.XPATH,'(//div[@class="BArk-j"])[4]')
        count_text=rating_2_star.text.replace(",", "")
        count = int(re.search(r'\d+', count_text).group())
        count_percentage=((count/rating_all)*100)
        return (int)(count_percentage)
    except Exception:
        return "NA"

In [18]:
def rating_1_star(driver,wait):
    time.sleep(1)
    try:
        rating_all=driver.find_element(By.XPATH,'(//div[@class="col-12-12"])[1]')
        rating_all=rating_all.text
        rating_all=rating_all.replace(",", "")
        rating_all = int(re.search(r'\d+', rating_all).group())
        rating_1_star=driver.find_element(By.XPATH,'(//div[@class="BArk-j"])[5]')
        count_text=rating_1_star.text.replace(",", "")
        count = int(re.search(r'\d+', count_text).group())
        count_percentage=((count/rating_all)*100)
        return (int)(count_percentage)
    except Exception:
        return "NA"

In [19]:
def main(person_name, excel_file_path):
    # Read the Excel file
    df = pd.read_excel(excel_file_path)
    serial_numbers = df[df['Assigned To'] == person_name]['Sales Model Code'].tolist()

    current_prices = []
    offers_list = []
    strikeout_prices = []
    sellers = []
    Price_After_Exchange = []
    Total_Reviews = []
    Average_Rating = []
    Star_5_Rating = []
    Star_4_Rating = []
    Star_3_Rating = []
    Star_2_Rating = []
    Star_1_Rating = []
    check_na=[]

    date = print_current_date()
    time = print_current_time()

    for sn in serial_numbers:
        driver, wait = setup_driver()
        search_product(driver, wait, sn)
        
        name=check_name(driver,wait,sn)
        check_na.append(name)
        
        price = get_product_price(driver, wait)
        current_prices.append(price)
        
        strike = get_strikeout_price(driver, wait)
        strikeout_prices.append(strike)
        
        seller = get_seller_name(driver, wait)
        sellers.append(seller)
        
        exchange = buy_with_exchange(driver, wait)
        Price_After_Exchange.append(exchange)

        total_reviews = review_count(driver, wait)
        Total_Reviews.append(total_reviews)

        avg_rating = get_rating(driver, wait)
        Average_Rating.append(avg_rating)

        star_5 = rating_5_star(driver, wait)
        Star_5_Rating.append(star_5)

        star_4 = rating_4_star(driver, wait)
        Star_4_Rating.append(star_4)

        star_3 = rating_3_star(driver, wait)
        Star_3_Rating.append(star_3)

        star_2 = rating_2_star(driver, wait)
        Star_2_Rating.append(star_2)

        star_1 = rating_1_star(driver, wait)
        Star_1_Rating.append(star_1)
        
        offers = get_product_offers(driver, wait)
        offers_list.append(offers)

        driver.quit()
    
    data = {
        'Source': "Flipkart",
        'Model Number': serial_numbers,
        'Check Name':check_na,
        'Date': date,
        'Time': time,
        'Original_MRP': strikeout_prices,
        'Discounted_Price': current_prices,
        'Seller_Name': sellers,
        'Price_After_Exchange': Price_After_Exchange,
        'Total_Reviews': Total_Reviews,
        'Average_Rating': Average_Rating,
        'Star_5_Rating': Star_5_Rating,
        'Star_4_Rating': Star_4_Rating,
        'Star_3_Rating': Star_3_Rating,
        'Star_2_Rating': Star_2_Rating,
        'Star_1_Rating': Star_1_Rating,
        'Offers': offers_list
    }

    df = pd.DataFrame(data)
    output_file = 'C:\\Users\\pgupt\\Desktop\\Output_data.xlsx'

    try:
        existing_df = pd.read_excel(output_file)
        updated_df = pd.concat([existing_df, df], ignore_index=True)
    except FileNotFoundError:
        updated_df = df
    updated_df.to_excel(output_file, index=False)

    print(f"Data has been written to {output_file}")

In [20]:
person_name = "Priyanshu Gupta"  # Replace this with the actual name you want to search for
excel_file_path = 'C:\\Users\\pgupt\\Desktop\\Product_List.xlsx'# Replace with the path to your Excel file
main(person_name,excel_file_path)

Data has been written to C:\Users\pgupt\Desktop\Output_data.xlsx
