In [None]:
import requests
from bs4 import BeautifulSoup

# Ask the user for input
search_query = input("Enter the product you want to search for on Amazon: ")

# Create the Amazon search URL
url = f"https://www.amazon.in/s?k={search_query}"

# Send an HTTP GET request
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    soup = BeautifulSoup(response.content, "html.parser")

    # Process the search results here (you can display them or move on to Task 2)
    # You might want to extract product details and links from the search results
else:
    print("Failed to retrieve the webpage.")


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Function to scrape product details from a single page
def scrape_page(url):
    response = requests.get(url)
    if response.status_code != 200:
        return None

    soup = BeautifulSoup(response.content, 'html.parser')
    product_details = []

    # Find all product elements on the page
    product_elements = soup.find_all('div', class_='s-result-item')

    for product in product_elements:
        # Extract product details
        title_element = product.find('span', class_='a-text-normal')
        price_element = product.find('span', class_='a-offscreen')
        return_exchange_element = product.find('span', class_='a-declarative')
        expected_delivery_element = product.find('span', class_='a-text-bold')
        availability_element = product.find('span', class_='a-size-small a-color-success')

        # Check if all details are present, if not, replace with "-"
        title = title_element.text.strip() if title_element else "-"
        price = price_element.text.strip() if price_element else "-"
        return_exchange = return_exchange_element.text.strip() if return_exchange_element else "-"
        expected_delivery = expected_delivery_element.text.strip() if expected_delivery_element else "-"
        availability = availability_element.text.strip() if availability_element else "-"

        # Extract product URL
        product_url = product.find('a', class_='a-link-normal')['href']
        product_url = 'https://www.amazon.in' + product_url

        product_details.append({
            'Product Name': title,
            'Price': price,
            'Return/Exchange': return_exchange,
            'Expected Delivery': expected_delivery,
            'Availability': availability,
            'Product URL': product_url
        })

    return product_details

# Function to scrape product details from multiple pages
def scrape_multiple_pages(search_query, max_pages):
    all_product_details = []

    for page in range(1, max_pages + 1):
        url = f"https://www.amazon.in/s?k={search_query}&page={page}"
        product_details = scrape_page(url)

        if product_details:
            all_product_details.extend(product_details)
        else:
            break

    return all_product_details

# User input for the product to search
search_query = input("Enter the product you want to search for on Amazon: ")

# Set the maximum number of pages to scrape (3 in this case)
max_pages = 3

# Scrape product details from multiple pages
all_products = scrape_multiple_pages(search_query, max_pages)

# Create a DataFrame from the scraped data
df = pd.DataFrame(all_products)

# Save the DataFrame to a CSV file
df.to_csv(f'{search_query}_products.csv', index=False)

print(f"Scraped {len(all_products)} products and saved to {search_query}_products.csv.")


In [None]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
from bs4 import BeautifulSoup
import requests
import os

# Function to scrape images for a given keyword
def scrape_images_for_keyword(driver, keyword, num_images=10):
    # Navigate to Google Images
    driver.get("https://www.google.com/imghp")

    # Locate the search bar element and enter the keyword
    search_box = driver.find_element_by_name("q")
    search_box.send_keys(keyword)
    search_box.send_keys(Keys.RETURN)

    # Scroll down to load more images (repeat several times)
    for _ in range(3):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

    # Get the page source
    page_source = driver.page_source

    # Parse the HTML content
    soup = BeautifulSoup(page_source, "html.parser")

    # Find and extract image URLs
    image_urls = []
    for img in soup.find_all("img"):
        src = img.get("src")
        if src and src.startswith("http"):
            image_urls.append(src)
    
    # Create a directory to save images
    os.makedirs(keyword, exist_ok=True)

    # Download and save images
    for i, img_url in enumerate(image_urls[:num_images]):
        response = requests.get(img_url)
        if response.status_code == 200:
            with open(f"{keyword}/{keyword}_{i+1}.jpg", "wb") as file:
                file.write(response.content)

# Keywords to search for
keywords = ['fruits', 'cars', 'Machine Learning', 'Guitar', 'Cakes']

# Initialize the web driver (make sure you have installed the appropriate driver)
# Example for Chrome:
# driver = webdriver.Chrome("/path/to/chromedriver")
# Replace "/path/to/chromedriver" with the actual path to the Chrome WebDriver
# Download Chrome WebDriver from https://sites.google.com/chromium.org/driver/
# Make sure to put the WebDriver in a directory included in your PATH environment variable

# Initialize Firefox driver
driver = webdriver.Firefox()

# Loop through keywords and scrape images
for keyword in keywords:
    scrape_images_for_keyword(driver, keyword, num_images=10)

# Close the web driver
driver.close()


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Function to scrape smartphone details
def scrape_flipkart_smartphones(search_query):
    url = f"https://www.flipkart.com/search?q={search_query}"
    headers = {
        "User-Agent": "Your User-Agent String"
    }

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")

        products = soup.find_all("div", {"class": "_1AtVbE"})

        smartphone_details = []

        for product in products:
            details = product.find("div", {"class": "_2kHMtA"})
            name = details.find("a", {"class": "IRpwTa"}).text.strip()
            url = "https://www.flipkart.com" + details.find("a", {"class": "IRpwTa"})["href"]

            specifications = product.find("ul", {"class": "vFw0gD"})
            specifications = specifications.find_all("li")

            brand = name.split(" ")[0]
            color = specifications[0].text.strip() if len(specifications) > 0 else "-"
            ram = specifications[1].text.strip() if len(specifications) > 1 else "-"
            storage = specifications[2].text.strip() if len(specifications) > 2 else "-"
            primary_camera = specifications[3].text.strip() if len(specifications) > 3 else "-"
            secondary_camera = specifications[4].text.strip() if len(specifications) > 4 else "-"
            display_size = specifications[5].text.strip() if len(specifications) > 5 else "-"
            battery_capacity = specifications[6].text.strip() if len(specifications) > 6 else "-"
            price = product.find("div", {"class": "_30jeq3"}).text.strip()

            smartphone_details.append({
                "Brand Name": brand,
                "Smartphone Name": name,
                "Colour": color,
                "RAM": ram,
                "Storage(ROM)": storage,
                "Primary Camera": primary_camera,
                "Secondary Camera": secondary_camera,
                "Display Size": display_size,
                "Battery Capacity": battery_capacity,
                "Price": price,
                "Product URL": url
            })

        return smartphone_details
    else:
        print("Failed to retrieve the webpage.")
        return []

# Search query for smartphones
search_query = input("Enter the smartphone you want to search for on Flipkart: ")

# Scrape smartphone details
smartphone_details = scrape_flipkart_smartphones(search_query)

# Create a DataFrame from the scraped data
df = pd.DataFrame(smartphone_details)

# Save the DataFrame to a CSV file
df.to_csv(f'{search_query}_smartphones.csv', index=False)

print(f"Scraped {len(smartphone_details)} smartphones and saved to {search_query}_smartphones.csv.")


In [None]:
import requests
from bs4 import BeautifulSoup

# Function to scrape geospatial coordinates of a city from Google Maps
def scrape_coordinates(city_name):
    # Create the Google Maps search URL
    search_url = f"https://www.google.com/maps/search/{city_name}"

    # Send an HTTP GET request
    response = requests.get(search_url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the element containing the coordinates
        coordinates_element = soup.find('meta', itemprop='geo')

        if coordinates_element:
            # Extract latitude and longitude from the content attribute
            content = coordinates_element['content']
            latitude, longitude = content.split(',')

            return {
                'City': city_name,
                'Latitude': latitude,
                'Longitude': longitude
            }
        else:
            print(f"Coordinates not found for {city_name}.")
            return None
    else:
        print("Failed to retrieve the webpage.")
        return None

# Input: City name to search for
city_name = input("Enter the name of the city to search for on Google Maps: ")

# Scrape coordinates
coordinates = scrape_coordinates(city_name)

# Display the result
if coordinates:
    print(f"Coordinates for {city_name}:")
    print(f"Latitude: {coordinates['Latitude']}")
    print(f"Longitude: {coordinates['Longitude']}")


In [None]:
import requests
from bs4 import BeautifulSoup

# Function to scrape details of best gaming laptops from digit.in
def scrape_gaming_laptops():
    # URL of the page with gaming laptop listings
    url = "https://www.digit.in/top-products/best-gaming-laptops-40.html"

    # Send an HTTP GET request
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the container containing the laptop details
        laptop_container = soup.find('div', class_='TopNumbeHeading')

        # Initialize a list to store laptop details
        laptop_details = []

        if laptop_container:
            # Find all the laptops listed
            laptops = laptop_container.find_all('div', class_='right-container')

            for laptop in laptops:
                # Extract details for each laptop
                laptop_name = laptop.find('h3').text.strip()
                laptop_specs = laptop.find_all('li', class_='tVe95H')
                
                # Initialize variables to store specs
                processor = "-"
                ram = "-"
                storage = "-"
                display = "-"
                gpu = "-"
                price = "-"

                for spec in laptop_specs:
                    spec_text = spec.text.strip()
                    if "Processor" in spec_text:
                        processor = spec_text.split(":")[1].strip()
                    elif "RAM" in spec_text:
                        ram = spec_text.split(":")[1].strip()
                    elif "Storage" in spec_text:
                        storage = spec_text.split(":")[1].strip()
                    elif "Display" in spec_text:
                        display = spec_text.split(":")[1].strip()
                    elif "Graphics" in spec_text:
                        gpu = spec_text.split(":")[1].strip()
                    elif "Price" in spec_text:
                        price = spec_text.split(":")[1].strip()

                laptop_details.append({
                    'Laptop Name': laptop_name,
                    'Processor': processor,
                    'RAM': ram,
                    'Storage': storage,
                    'Display': display,
                    'GPU': gpu,
                    'Price': price
                })

        return laptop_details

    else:
        print("Failed to retrieve the webpage.")
        return []

# Scrape gaming laptop details
gaming_laptops = scrape_gaming_laptops()

# Display the scraped data (you can save it to a file or a DataFrame)
for laptop in gaming_laptops:
    print("Laptop Name:", laptop['Laptop Name'])
    print("Processor:", laptop['Processor'])
    print("RAM:", laptop['RAM'])
    print("Storage:", laptop['Storage'])
    print("Display:", laptop['Display'])
    print("GPU:", laptop['GPU'])
    print("Price:", laptop['Price'])
    print("-" * 50)


In [None]:
import requests
from bs4 import BeautifulSoup

# Function to scrape details of billionaires from Forbes
def scrape_forbes_billionaires():
    # URL of Forbes Billionaires page
    url = "https://www.forbes.com/billionaires/"

    # Send an HTTP GET request
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the container containing billionaire details
        billionaires_container = soup.find('div', class_='table-fixed-body')

        # Initialize a list to store billionaire details
        billionaire_details = []

        if billionaires_container:
            # Find all the billionaire rows
            billionaire_rows = billionaires_container.find_all('div', class_='table-row')

            for row in billionaire_rows:
                # Extract details for each billionaire
                rank = row.find('div', class_='rank').text.strip()
                name = row.find('div', class_='name').text.strip()
                net_worth = row.find('div', class_='netWorth').text.strip()
                age = row.find('div', class_='age').text.strip()
                citizenship = row.find('div', class_='citizenship').text.strip()
                source = row.find('div', class_='source').text.strip()
                industry = row.find('div', class_='industry').text.strip()

                billionaire_details.append({
                    'Rank': rank,
                    'Name': name,
                    'Net Worth': net_worth,
                    'Age': age,
                    'Citizenship': citizenship,
                    'Source': source,
                    'Industry': industry
                })

        return billionaire_details

    else:
        print("Failed to retrieve the webpage.")
        return []

# Scrape billionaire details
billionaires = scrape_forbes_billionaires()

# Display the scraped data (you can save it to a file or a DataFrame)
for billionaire in billionaires:
    print("Rank:", billionaire['Rank'])
    print("Name:", billionaire['Name'])
    print("Net Worth:", billionaire['Net Worth'])
    print("Age:", billionaire['Age'])
    print("Citizenship:", billionaire['Citizenship'])
    print("Source:", billionaire['Source'])
    print("Industry:", billionaire['Industry'])
    print("-" * 50)


In [None]:
from googleapiclient.discovery import build

# Replace with your API Key
api_key = 'AIzaSyDVoe-WSUi3dveEy_BGODr7jwQS4sME2uw'

# Create a YouTube Data API client
youtube = build('youtube', 'v3', developerKey=api_key)

# Specify the video ID of the YouTube video you want to retrieve comments for
video_id = 'AIzaSyDVoe-WSUi3dveEy_BGODr7jwQS4sME2uw'

# Maximum number of comments to retrieve
max_results = 500

# Retrieve comments for the video
comments = []

next_page_token = None

while len(comments) < max_results:
    results = youtube.commentThreads().list(
        part='snippet',
        videoId=video_id,
        maxResults=min(100, max_results - len(comments)),  # Limit to 100 comments per request
        pageToken=next_page_token
    ).execute()

    for item in results['items']:
        comment = item['snippet']['topLevelComment']['snippet']
        comments.append({
            'Comment': comment['textDisplay'],
            'Upvotes': comment.get('likeCount', 0),
            'Timestamp': comment['publishedAt']
        })

    if 'nextPageToken' in results:
        next_page_token = results['nextPageToken']
    else:
        break

# Display or save the extracted comments, upvotes, and timestamps
for comment in comments:
    print(f"Comment: {comment['Comment']}")
    print(f"Upvotes: {comment['Upvotes']}")
    print(f"Timestamp: {comment['Timestamp']}")
    print('-' * 50)
