In [None]:
import requests
from bs4 import BeautifulSoup

def search_amazon(product_name):
    base_url = "https://www.amazon.in/s"
    
    # Set up parameters for the search query
    params = {
        "k": product_name,
    }

    # Send a GET request to Amazon with the search parameters
    response = requests.get(base_url, params=params)

    if response.status_code == 200:
        # Parse the HTML content of the page using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract product details (you may need to inspect the Amazon page to get the appropriate HTML tags)
        products = soup.find_all('div', class_='s-include-content-margin')

        if not products:
            print("No products found.")
        else:
            print(f"Found {len(products)} products related to '{product_name}':\n")

            for index, product in enumerate(products, start=1):
                product_title = product.find('span', class_='a-text-normal').text
                product_price = product.find('span', class_='a-offscreen').text
                product_link = "https://www.amazon.in" + product.find('a', class_='a-link-normal')['href']

                print(f"{index}. {product_title}\n   Price: {product_price}\n   Link: {product_link}\n")
    else:
        print(f"Failed to retrieve Amazon search results. Status code: {response.status_code}")

if __name__ == "__main__":
    # Get user input for the product to be searched
    user_input = input("guitar: ")
    
    # Call the search_amazon function with the user input
    search_amazon(user_input)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_product_details(product):
    brand_name = product.find('span', class_='a-size-base-plus').text.strip()
    product_name = product.find('span', class_='a-text-normal').text.strip()
    product_price = product.find('span', class_='a-offscreen').text.strip()

    return {
        "Brand Name": brand_name,
        "Name of the Product": product_name,
        "Price": product_price,
        "Return/Exchange": product.find('div', class_='a-row a-size-base a-color-secondary').text.strip(),
        "Expected Delivery": product.find('div', class_='a-row s-align-children-center').text.strip(),
        "Availability": product.find('div', class_='a-row a-size-base a-color-secondary').text.strip(),
        "Product URL": "https://www.amazon.in" + product.find('a', class_='a-link-normal')['href'],
    }

def search_amazon_and_scrape(product_name, num_pages=3):
    base_url = "https://www.amazon.in/s"
    all_products_data = []

    for page in range(1, num_pages + 1):
        params = {"k": product_name, "page": page}
        response = requests.get(base_url, params=params)

        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            products = soup.find_all('div', class_='s-include-content-margin')

            if not products:
                print(f"No products found on page {page}.")
                break

            for product in products:
                product_data = scrape_product_details(product)
                all_products_data.append(product_data)

        else:
            print(f"Failed to retrieve Amazon search results on page {page}. Status code: {response.status_code}")
            break

    # Create a DataFrame from the scraped data
    df = pd.DataFrame(all_products_data)

    # Replace missing values with "-"
    df = df.fillna("-")

    # Save DataFrame to CSV file
    df.to_csv(f"{product_name}_amazon_products.csv", index=False)

    print(f"Scraping and CSV file creation complete. Saved as {product_name}_amazon_products.csv")

if __name__ == "__main__":
    user_input = input("Enter the product to search on Amazon: ")
    search_amazon_and_scrape(user_input)


In [None]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import os
import requests
from bs4 import BeautifulSoup

def scrape_images(keyword, num_images=10):
    # Create a directory for saving images
    if not os.path.exists(keyword):
        os.makedirs(keyword)

    # Specify the path to your WebDriver (provide the path to your chromedriver executable)
    driver_path = "/path/to/chromedriver"
    driver = webdriver.Chrome(executable_path=driver_path)

    try:
        # Open Google Images
        driver.get("https://images.google.com/")

        # Find the search bar and search button
        search_bar = driver.find_element("name", "q")
        search_button = driver.find_element("css selector", "input[value='Search']")

        # Keywords to search for
        keywords = ['fruits', 'cars', 'Machine Learning', 'Guitar', 'Cakes']

        for keyword in keywords:
            # Search for the keyword
            search_bar.clear()
            search_bar.send_keys(keyword)
            search_button.click()

            # Wait for results to load
            time.sleep(2)

            # Scroll down to load more images
            for _ in range(3):  # Scroll down three times
                driver.find_element_by_tag_name("body").send_keys(Keys.PAGE_DOWN)
                time.sleep(1)

            # Get image URLs from the search results
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            images = soup.find_all('img', class_='rg_i')

            # Download the first num_images images
            for i, image in enumerate(images[:num_images], start=1):
                image_url = image.get('src')
                image_path = os.path.join(keyword, f"{keyword}_{i}.jpg")

                # Download the image
                response = requests.get(image_url, stream=True)
                with open(image_path, 'wb') as file:
                    for chunk in response.iter_content(chunk_size=128):
                        file.write(chunk)

                print(f"Downloaded {keyword} image {i}.")



In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_smartphone_details(product):
    details = {}

    details['Brand Name'] = product.find('div', {'class': '_4rR01T'}).text.strip()
    details['Smartphone Name'] = product.find('a', {'class': '_1fQZEK'}).text.strip()

    # Get details if available, otherwise set to "-"
    details['Colour'] = product.find('a', {'class': '_1fQZEK'}).get('title', '-')
    details['RAM'] = product.find('li', {'class': 'rgWa7D'}, text='RAM').find_next('li').text.strip() if product.find('li', {'class': 'rgWa7D'}, text='RAM') else '-'
    details['Storage(ROM)'] = product.find('li', {'class': 'rgWa7D'}, text='Internal Storage').find_next('li').text.strip() if product.find('li', {'class': 'rgWa7D'}, text='Internal Storage') else '-'
    details['Primary Camera'] = product.find('li', {'class': 'rgWa7D'}, text='Primary Camera').find_next('li').text.strip() if product.find('li', {'class': 'rgWa7D'}, text='Primary Camera') else '-'
    details['Secondary Camera'] = product.find('li', {'class': 'rgWa7D'}, text='Secondary Camera').find_next('li').text.strip() if product.find('li', {'class': 'rgWa7D'}, text='Secondary Camera') else '-'
    details['Display Size'] = product.find('li', {'class': 'rgWa7D'}, text='Display Size').find_next('li').text.strip() if product.find('li', {'class': 'rgWa7D'}, text='Display Size') else '-'
    details['Battery Capacity'] = product.find('li', {'class': 'rgWa7D'}, text='Battery Capacity').find_next('li').text.strip() if product.find('li', {'class': 'rgWa7D'}, text='Battery Capacity') else '-'
    details['Price'] = product.find('div', {'class': '_30jeq3 _1_WHN1'}).text.strip()
    details['Product URL'] = "https://www.flipkart.com" + product.find('a', {'class': '_1fQZEK'})['href']

    return details

def search_flipkart_and_scrape(product_name):
    base_url = "https://www.flipkart.com/search"
    params = {"q": product_name}

    # Send a GET request to Flipkart with the search parameters
    response = requests.get(base_url, params=params)

    if response.status_code == 200:
        # Parse the HTML content of the page using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract smartphone details
        products = soup.find_all('div', {'class': '_1AtVbE'})
        
        if not products:
            print("No products found.")
            return

        # Scrape details for each smartphone
        smartphone_details = [scrape_smartphone_details(product) for product in products]

        # Create a DataFrame from the scraped data
        df = pd.DataFrame(smartphone_details)

        # Replace missing values with "-"
        df = df.fillna("-")

        # Save DataFrame to CSV file
        df.to_csv(f"{product_name}_flipkart_smartphones.csv", index=False)

        print(f"Scraping and CSV file creation complete. Saved as {product_name}_flipkart_smartphones.csv")
    else:
        print(f"Failed to retrieve Flipkart search results. Status code: {response.status_code}")

if __name__ == "__main__":
    # Get user input for the smartphone to be searched
    user_input = input("Enter the smartphone to search on Flipkart: ")

    # Call the search_flipkart_and_scrape function with the user input
    search_flipkart_and_scrape(user_input)


In [None]:
import requests

def get_coordinates(api_key, city_name):
    base_url = "https://maps.googleapis.com/maps/api/place/findplacefromtext/json"

    # Parameters for the API request
    params = {
        'input': city_name,
        'inputtype': 'textquery',
        'fields': 'geometry/location',
        'key': api_key,
    }

    # Send a GET request to the Google Places API
    response = requests.get(base_url, params=params)
    data = response.json()

    if response.status_code == 200 and data.get('status') == 'OK':
        # Extract latitude and longitude from the response
        location = data['candidates'][0]['geometry']['location']
        latitude = location['lat']
        longitude = location['lng']

        return latitude, longitude
    else:
        print(f"Failed to retrieve coordinates. Status code: {response.status_code}, Error: {data.get('error_message', 'No error message')}")
        return None

if __name__ == "__main__":
    # Replace 'YOUR_API_KEY' with your actual Google Cloud API key
    api_key = 'YOUR_API_KEY'

    # Get user input for the city to search
    city_name = input("Enter the city name: ")

    # Get coordinates using the Google Places API
    coordinates = get_coordinates(api_key, city_name)

    if coordinates:
        print(f"Coordinates for {city_name}: Latitude {coordinates[0]}, Longitude {coordinates[1]}")


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_gaming_laptops():
    base_url = "https://www.digit.in/top-products/best-gaming-laptops-40.html"
    
    # Send a GET request to the Digit website
    response = requests.get(base_url)

    if response.status_code == 200:
        # Parse the HTML content of the page using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract details of gaming laptops
        laptops = soup.find_all('div', class_='TopNumbeHeading sticky-footer')

        # Create a list to store laptop details
        laptop_details = []

        for laptop in laptops:
            details = {}

            details['Name'] = laptop.find('div', class_='TopNumbeHeading sticky-footer').text.strip()
            details['Price'] = laptop.find('div', class_='rubric').text.strip()
            details['Specifications'] = laptop.find('div', class_='Specs-Wrap').text.strip()

            laptop_details.append(details)

        # Create a DataFrame from the scraped data
        df = pd.DataFrame(laptop_details)

        # Save DataFrame to CSV file
        df.to_csv("gaming_laptops_digit.csv", index=False)

        print("Scraping and CSV file creation complete. Saved as gaming_laptops_digit.csv")
    else:
        print(f"Failed to retrieve Digit website. Status code: {response.status_code}")

if __name__ == "__main__":
    scrape_gaming_laptops()


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_forbes_billionaires():
    base_url = "https://www.forbes.com/billionaires/"
    
    # Send a GET request to the Forbes website
    response = requests.get(base_url)

    if response.status_code == 200:
        # Parse the HTML content of the page using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract details of billionaires
        billionaires = soup.find_all('div', class_='person')

        # Create a list to store billionaire details
        billionaire_details = []

        for billionaire in billionaires:
            details = {}

            details['Rank'] = billionaire.find('div', class_='rank').text.strip()
            details['Name'] = billionaire.find('div', class_='name').text.strip()
            details['Net Worth'] = billionaire.find('div', class_='netWorth').text.strip()
            details['Age'] = billionaire.find('div', class_='age').text.strip()
            details['Citizenship'] = billionaire.find('div', class_='countryOfCitizenship').text.strip()
            details['Source'] = billionaire.find('div', class_='source').text.strip()
            details['Industry'] = billionaire.find('div', class_='category').text.strip()

            billionaire_details.append(details)

        # Create a DataFrame from the scraped data
        df = pd.DataFrame(billionaire_details)

        # Save DataFrame to CSV file
        df.to_csv("forbes_billionaires.csv", index=False)

        print("Scraping and CSV file creation complete. Saved as forbes_billionaires.csv")
    else:
        print(f"Failed to retrieve Forbes website. Status code: {response.status_code}")

if __name__ == "__main__":
    scrape_forbes_billionaires()


In [None]:
import googleapiclient.discovery
from googleapiclient.errors import HttpError

def get_youtube_comments(api_key, video_id, max_comments=500):
    youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)

    try:
        # Get video details
        video_response = youtube.videos().list(
            part="snippet",
            id=video_id
        ).execute()

        video_title = video_response['items'][0]['snippet']['title']

        # Get video comments
        comments = []
        nextPageToken = None

        while len(comments) < max_comments:
            comment_response = youtube.commentThreads().list(
                part="snippet,replies",
                videoId=video_id,
                maxResults=100,
                pageToken=nextPageToken
            ).execute()

            for item in comment_response['items']:
                comment = item['snippet']['topLevelComment']['snippet']
                comments.append({
                    'author': comment['authorDisplayName'],
                    'comment': comment['textDisplay'],
                    'upvotes': comment['likeCount'],
                    'timestamp': comment['publishedAt'],
                })

            if 'nextPageToken' in comment_response:
                nextPageToken = comment_response['nextPageToken']
            else:
                break

        return video_title, comments[:max_comments]

    except HttpError as e:
        print(f"Error: {e}")
        return None

if __name__ == "__main__":
    # Replace 'YOUR_API_KEY' with your actual YouTube Data API key
    api_key = 'YOUR_API_KEY'

    # Replace 'VIDEO_ID' with the actual video ID you want to scrape comments from
    video_id = 'VIDEO_ID'

    result = get_youtube_comments(api_key, video_id)

    if result:
        video_title, comments = result
        print(f"Video Title: {video_title}")
        print(f"Total Comments Extracted: {len(comments)}\n")

        for i, comment in enumerate(comments, start=1):
            print(f"Comment {i}:")
            print(f"Author: {comment['author']}")
            print(f"Comment: {comment['comment']}")
            print(f"Upvotes: {comment['upvotes']}")
            print(f"Timestamp: {comment['timestamp']}")
            print("---------")
    else:
        print("Failed to retrieve comments.")


In [None]:
import requests
from bs4 import BeautifulSoup

def scrape_hostelworld_data():
    base_url = "https://www.hostelworld.com/s/7?city=London&dateFrom=2023-01-01&dateTo=2023-01-07&guests=1"

    # Send a GET request to Hostelworld
    response = requests.get(base_url)

    if response.status_code == 200:
        # Parse the HTML content of the page using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract details of hostels
        hostels = soup.find_all('div', class_='hostel-details')

        for hostel in hostels:
            # Extract data for each hostel
            hostel_name = hostel.find('h2', class_='title').text.strip()
            distance_from_city_center = hostel.find('span', class_='description').text.strip()
            ratings = hostel.find('div', class_='score orange big').text.strip()
            total_reviews = hostel.find('div', class_='reviews').text.strip().split()[0]
            overall_reviews = hostel.find('div', class_='reviews').text.strip().split()[3]
            privates_price = hostel.find('div', class_='price-col').find('span', class_='price').text.strip()
            dorms_price = hostel.find('div', class_='price-col').find_all('span', class_='price')[1].text.strip()
            facilities = ', '.join([fac.text.strip() for fac in hostel.find_all('li', class_='facility-badge')])
            property_description = hostel.find('div', class_='rating-factors').find_next('p').text.strip()

            # Print or store the extracted data
            print(f"Hostel Name: {hostel_name}")
            print(f"Distance from City Centre: {distance_from_city_center}")
            print(f"Ratings: {ratings}")
            print(f"Total Reviews: {total_reviews}")
            print(f"Overall Reviews: {overall_reviews}")
            print(f"Privates From Price: {privates_price}")
            print(f"Dorms From Price: {dorms_price}")
            print(f"Facilities: {facilities}")
            print(f"Property Description: {property_description}")
            print("=" * 50)

    else:
        print(f"Failed to retrieve Hostelworld website. Status code: {response.status_code}")

if __name__ == "__main__":
    scrape_hostelworld_data()
