In [1]:
pip install requests  beautifulsoup4 pandas

Note: you may need to restart the kernel to use updated packages.


In [2]:
import requests
import time

# Function to scrape data with retry mechanism
def fetch_data_with_retry(url, retries=3, delay=5):
    for i in range(retries):
        try:
            response = requests.get(url)
            response.raise_for_status()  # Raises an HTTPError if the response was unsuccessful
            return response.text  # Successfully fetched data
        except requests.exceptions.HTTPError as http_err:
            if response.status_code == 529:
                print(f"Server overloaded. Retry {i+1}/{retries} in {delay} seconds...")
                time.sleep(delay)  # Wait before retrying
            else:
                print(f"HTTP error occurred: {http_err}")
                break
        except Exception as err:
            print(f"An error occurred: {err}")
            break
    return None

# Define the URLs in a dictionary
urls = {
    "mobile_phones": "https://www.flipkart.com/mobile-phones-store?otracker=nmenu_sub_Electronics_0_Mobiles",
    "mobile_accessories": "https://www.flipkart.com/mobile-phone-cases-covers-screenguards-store?otracker=nmenu_sub_Electronics_0_Mobile%20Accessories",
    "smart_wearable": "https://www.flipkart.com/search?q=smart%20wearable&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&otracker=nmenu_sub_Electronics_0_Smart%20Wearable%20Tech",
    "health_appliances": "https://www.flipkart.com/health-and-appliances-store?otracker=nmenu_sub_Electronics_0_Health%20Care%20Appliances",
    "laptops": "https://www.flipkart.com/laptops-store?otracker=nmenu_sub_Electronics_0_Laptops",
    "desktop_pc": "https://www.flipkart.com/desktop-pc-store?otracker=nmenu_sub_Electronics_0_Desktop%20PCs",
    "gaming": "https://www.flipkart.com/gaming-store?otracker=nmenu_sub_Electronics_0_Gaming%20%26%20Accessories",
    "computer_accessories": "https://www.flipkart.com/computer-accessories-store?otracker=nmenu_sub_Electronics_0_Computer%20Accessories",
    "computer_peripherals": "https://www.flipkart.com/peripherals-store?otracker=nmenu_sub_Electronics_0_Computer%20Peripherals",
    "tablets": "https://www.flipkart.com/tablets-store?otracker=nmenu_sub_Electronics_0_Tablets",
    "televisions": "https://www.flipkart.com/television-store?otracker=nmenu_sub_Electronics_0_Televisions",
    "speakers": "https://www.flipkart.com/audio-speaker-store?otracker=nmenu_sub_Electronics_0_Speakers",
    "smart_home": "https://www.flipkart.com/smart-home-automation-store?otracker=nmenu_sub_Electronics_0_Smart%20Home%20Automation",
    "cameras": "https://www.flipkart.com/camera-clp-store?otracker=nmenu_sub_Electronics_0_Camera",
    "camera_accessories": "https://www.flipkart.com/search?q=camera+accessories&sid=jek%2C6l2&as=on&as-show=on&otracker=AS_QueryStore_OrganicAutoSuggest_1_10_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_1_10_na_na_na&as-pos=1&as-type=RECENT&suggestionId=camera+accessories%7CCamera+Accessories&requestId=9c52219a-6c92-4e00-ab8e-28561a626634&as-backfill=on&otracker=nmenu_sub_Electronics_0_Camera%20Accessories",
    "network_components": "https://www.flipkart.com/computers/network-components/pr?sid=6bo,70k&otracker=categorytree&otracker=nmenu_sub_Electronics_0_Network%20Components",
    "new_launch_tv": "https://www.flipkart.com/search?q=new+launch+tv&sid=ckf%2Cczl&as=on&as-show=on&otracker=AS_QueryStore_OrganicAutoSuggest_1_12_na_na_ps&otracker1=AS_QueryStore_OrganicAutoSuggest_1_12_na_na_ps&as-pos=1&as-type=RECENT&suggestionId=new+launch+tv%7CTelevisions&requestId=33ae24d5-65fa-4e7d-ae1b-36f74371f62d&as-backfill=on&otracker=nmenu_sub_TVs%20%26%20Appliances_0_New%20Launches",
    "washing_machine": "https://www.flipkart.com/washing-machine-store?otracker=nmenu_sub_TVs%20%26%20Appliances_0_Washing%20Machine",
    "air_conditioners": "https://www.flipkart.com/acnewclp-store?otracker=nmenu_sub_TVs%20%26%20Appliances_0_Air%20Conditioners",
    "refrigerators": "https://www.flipkart.com/refrigerator-store?otracker=nmenu_sub_TVs%20%26%20Appliances_0_Refrigerators",
    "kitchen_appliances": "https://www.flipkart.com/kitchen-appliances-store?otracker=nmenu_sub_Appliances_0_Kitchen%20Appliances&otracker=nmenu_sub_TVs%20%26%20Appliances_0_Kitchen%20Appliances",
    "healthy_living_appliances": "https://www.flipkart.com/home-kitchen/~appliances-for-a-healthy-living/pr?sid=j9e&otracker=nmenu_sub_TVs%20%26%20Appliances_0_Healthy%20Living%20Appliances",
    "small_home_appliances": "https://www.flipkart.com/home-appliance-store?otracker=nmenu_sub_Appliances_0_Small%20Home%20Appliances&otracker=nmenu_sub_TVs%20%26%20Appliances_0_Small%20Home%20Appliances"
}
for category, url in urls.items():
    print(f"Fetching data for {category}...")
    data = fetch_data_with_retry(url)
    if data:
        print(f"Successfully fetched data for {category}")
    else:
        print(f"Failed to fetch data for {category}")



Fetching data for mobile_phones...
Server overloaded. Retry 1/3 in 5 seconds...
Server overloaded. Retry 2/3 in 5 seconds...
Server overloaded. Retry 3/3 in 5 seconds...
Failed to fetch data for mobile_phones
Fetching data for mobile_accessories...
Server overloaded. Retry 1/3 in 5 seconds...
Server overloaded. Retry 2/3 in 5 seconds...
Server overloaded. Retry 3/3 in 5 seconds...
Failed to fetch data for mobile_accessories
Fetching data for smart_wearable...
Server overloaded. Retry 1/3 in 5 seconds...
Server overloaded. Retry 2/3 in 5 seconds...
Server overloaded. Retry 3/3 in 5 seconds...
Failed to fetch data for smart_wearable
Fetching data for health_appliances...
Server overloaded. Retry 1/3 in 5 seconds...
Server overloaded. Retry 2/3 in 5 seconds...
Server overloaded. Retry 3/3 in 5 seconds...
Failed to fetch data for health_appliances
Fetching data for laptops...
Server overloaded. Retry 1/3 in 5 seconds...
Server overloaded. Retry 2/3 in 5 seconds...
Server overloaded. Retry 