### 1. Write a python program which searches all the product under a particular product from www.amazon.in. 
### The product to be searched will be taken as input from user. For e.g. If user input is ‘guitar’. Then search for guitars


In [2]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import pandas as pd
import time

# Function to search for a product on Amazon and store results in a DataFrame
def search_amazon_product(product_name):
    # Initialize Chrome WebDriver
    driver = webdriver.Chrome()
    
    try:
        # Open Amazon website
        driver.get("https://www.amazon.in/")
        
        # Wait for the page to load
        time.sleep(3)
        
        # Find the search box element using XPath
        search_box = driver.find_element(By.XPATH, '//*[@id="twotabsearchtextbox"]')
        
        # Input the product name into the search box
        search_box.send_keys(product_name)
        
        # Press Enter key to perform the search
        search_box.send_keys(Keys.RETURN)
        
        time.sleep(2)  # Wait for 2 seconds for the page to load
        
        # Find all the product elements
        products = driver.find_elements(By.CSS_SELECTOR, ".s-result-item h2 a")
        
        # Create an empty list to store the product names
        product_names = []
        
        # Store product names in the list
        for product in products:
            product_names.append(product.text)
    
    except Exception as e:
        print("An error occurred:", e)
        product_names = []  # Return an empty list if an error occurs
    
    finally:
        # Close the WebDriver
        driver.quit()
        
        # Convert the list of product names into a DataFrame
        df = pd.DataFrame(product_names, columns=["Product Name"])
        
        return df

# Main function
def main():
    # Take user input for the product to search
    product_name = input("Enter the product to search on Amazon: ")
    
    # Call function to search for the product on Amazon and get results as DataFrame
    product_df = search_amazon_product(product_name)
    
    # Display the DataFrame
    print(product_df)

if __name__ == "__main__":
    main()


Enter the product to search on Amazon: laptop
                                         Product Name
0   Apple 2023 MacBook Pro (16-inch, M3 Max chip w...
1   Apple 2023 MacBook Air laptop with M2 chip: 38...
2   HP Laptop 15s, 12th Gen Intel Core i3, 15.6-in...
3   ASUS VivoBook 15 (2021), 15.6-inch (39.62 cm) ...
4   HP Laptop 15s, Intel Celeron, 15.6-inch (39.6 ...
5   HP 15s 12th Gen Intel Core i5, 15/6inch (39.6 ...
6   ASUS Vivobook 16 (2023), Intel Core i9-13900H ...
7   HP Laptop 15s, AMD Ryzen 3 5300U, 15.6-inch (3...
8                                                    
9                                                    
10  ASUS Vivobook 16 (2023), Intel Core i9-13900H ...
11  HP Laptop 15s, 12th Gen Intel Core i5-1235U, 1...
12  Chuwi HeroBook Pro 14.1'' Intel Celeron N4020 ...
13  HP 2023 Ryzen 3 Dual Core 3250U - (8 GB/512 GB...
14  ASUS Vivobook 15 (2023), Intel Core i5-1335U 1...
15  MSI GF63 Thin, Intel 11th Gen. i5-11260H, 40CM...
16  Apple 2023 MacBook Pro (14-inch,

#### 2. In the above question, now scrape the following details of each product listed in first 3 pages of your search results and save it in a data frame and csv. In case if any product has less than 3 pages in search results then scrape all the products available under that product name. Details to be scraped are: "Brand Name", "Name of the Product", "Price", "Return/Exchange", "Expected Delivery", "Availability" and “Product URL”. In case, if any of the details are missing for any of the product then replace it by “-“. 

In [3]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import pandas as pd
import time

# Function to scrape details of each product listed in search results
def scrape_product_details(driver):
    print("Scraping product details...")
    # Find all the product elements
    products = driver.find_elements(By.CSS_SELECTOR, ".s-result-item")
    
    # Initialize lists to store details
    brand_names = []
    product_names = []
    prices = []
    return_exchange = []
    expected_delivery = []
    availabilities = []
    product_urls = []
    
    # Loop through each product
    for product in products:
        try:
            # Extracting details if available
            brand_name = product.find_element(By.CSS_SELECTOR, ".a-size-base-plus.a-color-base").text.strip()
        except:
            brand_name = "-"
        
        try:
            product_name = product.find_element(By.CSS_SELECTOR, ".s-line-clamp-2").text.strip()
        except:
            product_name = "-"
        
        try:
            price = product.find_element(By.CSS_SELECTOR, ".a-price .a-offscreen").text.strip()
        except:
            price = "-"
        
        try:
            ret_exchange = product.find_element(By.CSS_SELECTOR, ".a-icon-alt").text.strip()
        except:
            ret_exchange = "-"
        
        try:
            delivery = product.find_element(By.CSS_SELECTOR, ".a-text-bold").text.strip()
        except:
            delivery = "-"
        
        try:
            availability = product.find_element(By.CSS_SELECTOR, ".a-icon-check-square").text.strip()
        except:
            availability = "-"
        
        try:
            product_url = product.find_element(By.CSS_SELECTOR, "a.a-link-normal").get_attribute("href").strip()
        except:
            product_url = "-"
        
        # Append details to respective lists
        brand_names.append(brand_name)
        product_names.append(product_name)
        prices.append(price)
        return_exchange.append(ret_exchange)
        expected_delivery.append(delivery)
        availabilities.append(availability)
        product_urls.append(product_url)
    
    # Create a DataFrame from the scraped details
    df = pd.DataFrame({
        "Brand Name": brand_names,
        "Name of the Product": product_names,
        "Price": prices,
        "Return/Exchange": return_exchange,
        "Expected Delivery": expected_delivery,
        "Availability": availabilities,
        "Product URL": product_urls
    })
    print("Product details scraped successfully.")
    return df

# Function to search for a product on Amazon, scrape details from first 3 pages, and save to CSV
def search_amazon_product(product_name):
    print("Searching for the product on Amazon...")
    # Initialize Chrome WebDriver
    driver = webdriver.Chrome()
    
    try:
        # Open Amazon website
        driver.get("https://www.amazon.in/")
        
        # Wait for the page to load
        time.sleep(3)
        
        # Find the search box element using XPath
        search_box = driver.find_element(By.XPATH, '//*[@id="twotabsearchtextbox"]')
        
        # Input the product name into the search box
        search_box.send_keys(product_name)
        
        # Press Enter key to perform the search
        search_box.send_keys(Keys.RETURN)
        
        time.sleep(2)  # Wait for 2 seconds for the page to load
        
        # Scrape details from the first page
        print("Scraping details from the first page...")
        df = scrape_product_details(driver)
        
        # Loop through next 2 pages and scrape details
        for i in range(2):
            # Find the next page button and click
            next_page_button = driver.find_element(By.CSS_SELECTOR, ".a-last a")
            next_page_button.click()
            time.sleep(2)  # Wait for 2 seconds for the page to load
            
            # Scrape details from the current page and append to DataFrame
            print(f"Scraping details from page {i+2}...")
            df = pd.concat([df, scrape_product_details(driver)], ignore_index=True)
        
        # Save DataFrame to CSV
        df.to_csv("amazon_products.csv", index=False)
        print("Scraped details saved to 'amazon_products.csv'")
    
    except Exception as e:
        print("An error occurred:", e)
    
    finally:
        # Close the WebDriver
        driver.quit()

# Main function
def main():
    # Take user input for the product to search
    product_name = input("Enter the product to search on Amazon: ")
    
    # Call function to search for the product on Amazon and scrape details
    search_amazon_product(product_name)

if __name__ == "__main__":
    main()


Enter the product to search on Amazon: laptop
Searching for the product on Amazon...
Scraping details from the first page...
Scraping product details...
Product details scraped successfully.
Scraping details from page 2...
Scraping product details...
Product details scraped successfully.
Scraping details from page 3...
Scraping product details...
Product details scraped successfully.
Scraped details saved to 'amazon_products.csv'


# 3. Write a python program to access the search bar and search button on images.google.com and scrape 10 images each for keywords ‘fruits’, ‘cars’ and ‘Machine Learning’, ‘Guitar’, ‘Cakes’.

In [7]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import time

# Function to scrape image URLs from Google Images search results
def scrape_image_urls(driver, keyword, num_images):
    try:
        # Wait for the search bar to be clickable
        search_bar = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//input[@class="gLFyf gsfi"]')))
        
        # Clear the search bar and enter the keyword
        search_bar.clear()
        search_bar.send_keys(keyword)
        
        # Press Enter to perform the search
        search_bar.send_keys(Keys.RETURN)
        
        time.sleep(2)  # Wait for 2 seconds for the search results to load
        
        # Scroll down to load more images
        for _ in range(3):
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
        
        # Find all the image elements
        image_elements = driver.find_elements(By.XPATH, '//img[@class="rg_i Q4LuWd"]')
        
        # Extract image URLs
        image_urls = []
        for i in range(min(num_images, len(image_elements))):
            try:
                # Click on the image to load the full-size image
                image_elements[i].click()
                time.sleep(2)  # Wait for the full-size image to load
                
                # Find the full-size image URL
                full_size_image = driver.find_element(By.XPATH, '//img[@class="n3VNCb"]').get_attribute('src')
                
                if full_size_image:
                    image_urls.append(full_size_image)
            except Exception as e:
                print(f"Error while scraping image {i+1} for '{keyword}':", e)
        
        return image_urls
    
    except Exception as e:
        print(f"An error occurred while scraping images for '{keyword}':", e)
        return []

# Main function
def main():
    # Initialize Chrome WebDriver
    driver = webdriver.Chrome()
    
    try:
        # Open Google Images website
        driver.get("https://www.google.com/imghp")
        
        time.sleep(2)  # Wait for 2 seconds for the page to load
        
        # Keywords to search for images
        keywords = ['fruits', 'cars', 'Machine Learning', 'Guitar', 'Cakes']
        
        # Number of images to scrape for each keyword
        num_images_per_keyword = 10
        
        # Dictionary to store image URLs for each keyword
        image_urls_dict = {}
        
        # Scrape images for each keyword
        for keyword in keywords:
            print(f"Scraping images for '{keyword}'...")
            image_urls = scrape_image_urls(driver, keyword, num_images_per_keyword)
            image_urls_dict[keyword] = image_urls
        
        # Print the scraped image URLs
        for keyword, urls in image_urls_dict.items():
            print(f"\n{keyword}:\n")
            for i, url in enumerate(urls, start=1):
                print(f"{i}. {url}")
    
    except Exception as e:
        print("An error occurred:", e)
    
    finally:
        # Close the WebDriver
        driver.quit()

if __name__ == "__main__":
    main()


Scraping images for 'fruits'...
An error occurred while scraping images for 'fruits': Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF6605170C2+63090]
	(No symbol) [0x00007FF660482D12]
	(No symbol) [0x00007FF66031EC65]
	(No symbol) [0x00007FF66036499D]
	(No symbol) [0x00007FF660364ADC]
	(No symbol) [0x00007FF6603A5B37]
	(No symbol) [0x00007FF66038701F]
	(No symbol) [0x00007FF6603A3412]
	(No symbol) [0x00007FF660386D83]
	(No symbol) [0x00007FF6603583A8]
	(No symbol) [0x00007FF660359441]
	GetHandleVerifier [0x00007FF66091262D+4238301]
	GetHandleVerifier [0x00007FF66094F78D+4488509]
	GetHandleVerifier [0x00007FF660947A6F+4456479]
	GetHandleVerifier [0x00007FF6605F0606+953270]
	(No symbol) [0x00007FF66048E5DF]
	(No symbol) [0x00007FF6604892B4]
	(No symbol) [0x00007FF6604893EB]
	(No symbol) [0x00007FF660479C24]
	BaseThreadInitThunk [0x00007FFED5D9257D+29]
	RtlUserThreadStart [0x00007FFED778AA58+40]

Scraping images for 'cars'...
An error occurred while scraping images for 'cars': Message

# 4] Write a python program to search for a smartphone(e.g.: Oneplus Nord, pixel 4A, etc.) on www.flipkart.com and scrape following details for all the search results displayed on 1st page. Details to be scraped: “Brand Name”, “Smartphone name”, “Colour”, “RAM”, “Storage(ROM)”, “Primary Camera”,“Secondary Camera”, “Display Size”, “Battery Capacity”, “Price”, “Product URL”. Incase if any of the details is missing then replace it by “- “. Save your results in a dataframe and CSV. 

In [13]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_flipkart_smartphones(search_query):
    url = f"https://www.flipkart.com/search?q={search_query}"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")

    smartphones = []
    results = soup.find_all("div", class_="_1AtVbE")

    for result in results:
        details = {}

        brand_element = result.find("div", class_="_4rR01T")
        details["Brand Name"] = brand_element.text.strip() if brand_element else "-"

        name_element = result.find("a", class_="IRpwTa")
        details["Smartphone Name"] = name_element.text.strip() if name_element else "-"
        details["Colour"] = name_element.get("title").split("(")[-1].strip(")") if name_element else "-"

        details["RAM"] = result.find(string="RAM").find_next("li").text.strip() if result.find(string="RAM") else "-"
        details["Storage(ROM)"] = result.find(string="ROM").find_next("li").text.strip() if result.find(string="ROM") else "-"
        details["Primary Camera"] = result.find(string="Primary Camera").find_next("li").text.strip() if result.find(string="Primary Camera") else "-"
        details["Secondary Camera"] = result.find(string="Secondary Camera").find_next("li").text.strip() if result.find(string="Secondary Camera") else "-"
        details["Display Size"] = result.find(string="Display Size").find_next("li").text.strip() if result.find(string="Display Size") else "-"
        details["Battery Capacity"] = result.find(string="Battery Capacity").find_next("li").text.strip() if result.find(string="Battery Capacity") else "-"
        details["Price"] = result.find("div", class_="_30jeq3 _1_WHN1").text.strip() if result.find("div", class_="_30jeq3 _1_WHN1") else "-"
        details["Product URL"] = "https://www.flipkart.com" + name_element.get("href") if name_element else "-"

        smartphones.append(details)

    return smartphones

def save_to_csv(data, filename="smartphones.csv"):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

def main():
    search_query = input("Enter smartphone name to search on Flipkart: ")
    smartphones = scrape_flipkart_smartphones(search_query)
    if smartphones:
        save_to_csv(smartphones)
    else:
        print("No results found.")

if __name__ == "__main__":
    main()


Enter smartphone name to search on Flipkart: Oneplus
Data saved to smartphones.csv


### 5. Write a program to scrap geospatial coordinates (latitude, longitude) of a city searched on google maps

In [14]:
import requests

def get_coordinates(city):
    # Nominatim geocoding API endpoint
    url = "https://nominatim.openstreetmap.org/search"

    # Parameters for the API request
    params = {
        "q": city,
        "format": "json",
        "limit": 1
    }

    # Send a GET request to the Nominatim API
    response = requests.get(url, params=params)
    data = response.json()

    # Parse the response to extract coordinates
    if data:
        latitude = data[0]["lat"]
        longitude = data[0]["lon"]
        print(f"Coordinates for {city}: Latitude {latitude}, Longitude {longitude}")
    else:
        print(f"Coordinates for {city} not found.")

if __name__ == "__main__":
    city = input("Enter the city name: ")
    get_coordinates(city)


Enter the city name: Pune
Coordinates for Pune: Latitude 18.521428, Longitude 73.8544541


## 6. Write a program to scrap all the available details of best gaming laptops from digit.in. 

In [1]:
import requests
from bs4 import BeautifulSoup

def scrape_digit_in():
    url = "https://www.digit.in/top-products/best-gaming-laptops-40.html"
    response = requests.get(url)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        
        laptops = soup.find_all('div', class_='TopNumbeHeading')
        
        for laptop in laptops:
            name = laptop.find('div', class_='TopNumbeHeading').text.strip()
            specs = laptop.find('div', class_='Specs').text.strip()
            price = laptop.find('div', class_='smprice').text.strip()
            
            print(f"Name: {name}")
            print(f"Specifications: {specs}")
            print(f"Price: {price}")
            print()
    else:
        print("Failed to retrieve data from the website.")

if __name__ == "__main__":
    scrape_digit_in()


## 7. Write a python program to scrape the details for all billionaires from www.forbes.com. Details to be scrapped: “Rank”, “Name”, “Net worth”, “Age”, “Citizenship”, “Source”, “Industry”. 

In [3]:
from selenium import webdriver
from bs4 import BeautifulSoup

def scrape_forbes_billionaires():
    url = "https://www.forbes.com/billionaires/"
    
    # Start a webdriver session
    driver = webdriver.Chrome()  # You may need to adjust this based on your WebDriver configuration
    driver.get(url)
    
    # Wait for the page to load
    driver.implicitly_wait(10)
    
    # Get the page source
    page_source = driver.page_source
    
    # Parse the HTML
    soup = BeautifulSoup(page_source, 'html.parser')
    
    # Find all billionaire elements
    billionaires = soup.find_all('div', class_='personList')
    
    for person in billionaires:
        rank = person.find('div', class_='rank').text.strip()
        name = person.find('div', class_='personName').text.strip()
        net_worth = person.find('div', class_='netWorth').text.strip()
        age = person.find('div', class_='age').text.strip()
        citizenship = person.find('div', class_='countryOfCitizenship').text.strip()
        source = person.find('div', class_='source-column').text.strip()
        industry = person.find('div', class_='category').text.strip()
        
        print("Rank:", rank)
        print("Name:", name)
        print("Net Worth:", net_worth)
        print("Age:", age)
        print("Citizenship:", citizenship)
        print("Source:", source)
        print("Industry:", industry)
        print()
    
    # Close the webdriver session
    driver.quit()

if __name__ == "__main__":
    scrape_forbes_billionaires()


# 9.  Write a python program to scrape a data for all available Hostels from https://www.hostelworld.com/ in “London” location. You have to scrape hostel name, distance from city centre, ratings, total reviews, overall reviews, privates from price, dorms from price, facilities and property description. 

In [4]:
import requests
from bs4 import BeautifulSoup

def scrape_hostels_in_london():
    url = "https://www.hostelworld.com/"
    response = requests.get(url)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        
        hostels = soup.find_all('div', class_='description-container')
        
        for hostel in hostels:
            name = hostel.find('h2', class_='title').text.strip()
            distance = hostel.find('span', class_='description').text.strip()
            rating = hostel.find('div', class_='score').text.strip()
            total_reviews = hostel.find('div', class_='reviews').text.strip().split()[0]
            overall_reviews = hostel.find('div', class_='ratingtext').text.strip()
            privates_from_price = hostel.find('span', class_='price').text.strip().split('\n')[0].strip()
            dorms_from_price = hostel.find('span', class_='price').text.strip().split('\n')[1].strip()
            facilities = [facility.text.strip() for facility in hostel.find_all('span', class_='label')]
            description = hostel.find('div', class_='additional-info').text.strip()
            
            print("Name:", name)
            print("Distance from city centre:", distance)
            print("Rating:", rating)
            print("Total reviews:", total_reviews)
            print("Overall reviews:", overall_reviews)
            print("Privates from price:", privates_from_price)
            print("Dorms from price:", dorms_from_price)
            print("Facilities:", facilities)
            print("Description:", description)
            print()
    else:
        print("Failed to retrieve data from Hostelworld.")

if __name__ == "__main__":
    scrape_hostels_in_london()


Failed to retrieve data from Hostelworld.
