In [1]:
#Q1

In [3]:
import requests
from bs4 import BeautifulSoup

def search_products(product):
    url = f"https://www.amazon.in/s?k={product}"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Raise an exception for unsuccessful status codes

    soup = BeautifulSoup(response.text, 'html.parser')
    products = soup.find_all('span', {'class': 'a-size-medium a-color-base a-text-normal'})

    if products:
        print(f"Products related to '{product}':\n")
        for idx, item in enumerate(products, 1):
            print(f"{idx}. {item.text.strip()}")
    else:
        print(f"No products found for '{product}'")

if __name__ == "__main__":
    user_input = input("Enter the product you want to search for on Amazon: ")
    search_products(user_input)


Enter the product you want to search for on Amazon:  phone


HTTPError: 503 Server Error: Service Unavailable for url: https://www.amazon.in/s?k=phone

In [None]:
#Q2

In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_product_details(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Raise an exception for unsuccessful status codes

    soup = BeautifulSoup(response.text, 'html.parser')

    # Extracting details
    try:
        brand_name = soup.find('a', class_='a-link-normal').text.strip()
    except AttributeError:
        brand_name = '-'

    try:
        product_name = soup.find('span', id='productTitle').text.strip()
    except AttributeError:
        product_name = '-'

    try:
        price = soup.find('span', id='priceblock_ourprice').text.strip()
    except AttributeError:
        try:
            price = soup.find('span', id='priceblock_dealprice').text.strip()
        except AttributeError:
            price = '-'

    try:
        return_exchange = soup.find('div', {'id': 'RETURNS_POLICY'}).text.strip()
    except AttributeError:
        return_exchange = '-'

    try:
        expected_delivery = soup.find('span', {'id': 'ddmDeliveryMessage'}).text.strip()
    except AttributeError:
        expected_delivery = '-'

    try:
        availability = soup.find('div', {'id': 'availability'}).text.strip()
    except AttributeError:
        availability = '-'

    return {
        'Brand Name': brand_name,
        'Name of the Product': product_name,
        'Price': price,
        'Return/Exchange': return_exchange,
        'Expected Delivery': expected_delivery,
        'Availability': availability,
        'Product URL': url
    }

def scrape_products(product, pages=3):
    data = []

    for page in range(1, pages + 1):
        url = f"https://www.amazon.in/s?k={product}&page={page}"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise an exception for unsuccessful status codes

        soup = BeautifulSoup(response.text, 'html.parser')
        products = soup.find_all('span', {'class': 'a-size-medium a-color-base a-text-normal'})

        for product in products:
            product_url = "https://www.amazon.in" + product.find_parent('a')['href']
            product_data = scrape_product_details(product_url)
            data.append(product_data)

    return data

def save_to_csv(data, filename):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

if __name__ == "__main__":
    product = input("Enter the product you want to search for on Amazon: ")
    pages = int(input("Enter the number of pages to scrape (default is 3): ") or "3")

    products_data = scrape_products(product, pages)
    save_to_csv(products_data, f"{product}_products.csv")


Enter the product you want to search for on Amazon:  guitar
Enter the number of pages to scrape (default is 3):  3


HTTPError: 503 Server Error: Service Unavailable for url: https://www.amazon.in/s?k=guitar&page=1

In [7]:
#Q3

In [8]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import os
import requests
from bs4 import BeautifulSoup

def scrape_images(keyword, num_images):
    # Create a new instance of Chrome WebDriver
    driver = webdriver.Chrome()

    # Open Google Images
    driver.get("https://images.google.com/")

    # Find the search bar
    search_bar = driver.find_element_by_name("q")

    # Enter the keyword and submit the search
    search_bar.send_keys(keyword)
    search_bar.send_keys(Keys.RETURN)

    # Scroll down to load more images
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

    # Get the page source and parse it with BeautifulSoup
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Find all image elements
    images = soup.find_all("img", class_="rg_i")

    # Create a directory to save the images
    os.makedirs(keyword, exist_ok=True)

    # Download the images
    for idx, image in enumerate(images[:num_images], 1):
        image_url = image['src']
        image_data = requests.get(image_url).content
        with open(os.path.join(keyword, f"{keyword}_{idx}.jpg"), "wb") as f:
            f.write(image_data)
            print(f"Downloaded {keyword} image {idx}")

    # Close the WebDriver
    driver.quit()

# List of keywords and number of images to scrape for each keyword
keywords = ['fruits', 'cars', 'Machine Learning', 'Guitar', 'Cakes']
num_images_per_keyword = 10

# Scrape images for each keyword
for keyword in keywords:
    scrape_images(keyword, num_images_per_keyword)


ModuleNotFoundError: No module named 'selenium'

In [9]:
#Q4

In [12]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_smartphone_details(search_query):
    url = f"https://www.flipkart.com/search?q={search_query}&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Raise an exception for unsuccessful status codes

    soup = BeautifulSoup(response.text, 'html.parser')
    products = soup.find_all('div', {'class': '_1AtVbE'})

    data = []
    for product in products:
        try:
            brand_name = product.find('div', {'class': '_4rR01T'}).text
        except AttributeError:
            brand_name = '-'

        try:
            smartphone_name = product.find('a', {'class': 'IRpwTa'}).text
        except AttributeError:
            smartphone_name = '-'

        try:
            color = product.find('div', {'class': '_3LWrw9'}).text
        except AttributeError:
            color = '-'

        specs = product.find_all('li', {'class': 'rgWa7D'})
        ram = '-'
        rom = '-'
        primary_camera = '-'
        secondary_camera = '-'
        display_size = '-'
        battery_capacity = '-'
        price = '-'
        product_url = '-'
        for spec in specs:
            if 'RAM' in spec.text:
                ram = spec.text.split('|')[0].strip()
            elif 'ROM' in spec.text:
                rom = spec.text.split('|')[0].strip()
            elif 'MP' in spec.text:
                if 'Primary' in spec.text:
                    primary_camera = spec.text.split('|')[0].strip()
                elif 'Secondary' in spec.text:
                    secondary_camera = spec.text.split('|')[0].strip()
            elif 'Display Size' in spec.text:
                display_size = spec.text.split('|')[0].strip()
            elif 'Battery' in spec.text:
                battery_capacity = spec.text.split('|')[0].strip()

        try:
            price = product.find('div', {'class': '_30jeq3 _1_WHN1'}).text
        except AttributeError:
            price = '-'

        try:
            product_url = "https://www.flipkart.com" + product.find('a', {'class': 'IRpwTa'})['href']
        except AttributeError:
            pass

        data.append({
            'Brand Name': brand_name,
            'Smartphone Name': smartphone_name,
            'Colour': color,
            'RAM': ram,
            'Storage (ROM)': rom,
            'Primary Camera': primary_camera,
            'Secondary Camera': secondary_camera,
            'Display Size': display_size,
            'Battery Capacity': battery_capacity,
            'Price': price,
            'Product URL': product_url
        })

    return data

def save_to_csv(data, filename):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

if __name__ == "__main__":
    search_query = input("Enter the smartphone you want to search for on Flipkart: ")
    smartphone_data = scrape_smartphone_details(search_query)
    save_to_csv(smartphone_data, f"{search_query}_smartphones.csv")
for product in products:
    try:
        brand_name = product.find('div', {'class': '_4rR01T'}).text
    except AttributeError:
        brand_name = '-'

    try:
        smartphone_name = product.find('a', {'class': 'IRpwTa'}).text
    except AttributeError:
        smartphone_name = '-'

    try:
        color = product.find('div', {'class': '_3LWrw9'}).text
    except AttributeError:
        color = '-'

Enter the smartphone you want to search for on Flipkart:  apple


TypeError: 'NoneType' object is not subscriptable

In [13]:
#Q5

In [14]:
from selenium import webdriver

def get_coordinates(city):
    # Create a new instance of Chrome WebDriver
    driver = webdriver.Chrome()

    # Open Google Maps
    driver.get("https://www.google.com/maps")

    # Find the search bar and enter the city
    search_bar = driver.find_element_by_id("searchboxinput")
    search_bar.send_keys(city)

    # Submit the search
    search_bar.submit()

    # Wait for the page to load
    driver.implicitly_wait(10)  # Adjust the wait time as needed

    # Get the current URL which contains the coordinates
    url = driver.current_url

    # Parse the coordinates from the URL
    coordinates = parse_coordinates_from_url(url)

    # Close the WebDriver
    driver.quit()

    return coordinates

def parse_coordinates_from_url(url):
    # Extract latitude and longitude from the URL
    # Example URL: https://www.google.com/maps/place/latitude,longitude
    # Example URL: https://www.google.com/maps/place/37.7749,-122.4194
    parts = url.split("/place/")[-1].split(",")
    latitude = parts[0]
    longitude = parts[1]

    return latitude, longitude

if __name__ == "__main__":
    city = input("Enter the city to search for on Google Maps: ")
    coordinates = get_coordinates(city)
    print(f"Coordinates for {city}: Latitude - {coordinates[0]}, Longitude - {coordinates[1]}")


ModuleNotFoundError: No module named 'selenium'

In [15]:
#Q6

In [16]:
import requests
from bs4 import BeautifulSoup

def scrape_gaming_laptops():
    url = "https://www.digit.in/top-products/best-gaming-laptops-40.html"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Raise an exception for unsuccessful status codes

    soup = BeautifulSoup(response.text, 'html.parser')

    laptops = []
    for laptop in soup.find_all('div', class_='TopNumbeHeading sticky-footer'):
        laptop_details = {}
        try:
            laptop_details['Name'] = laptop.find('h3').text.strip()
        except AttributeError:
            laptop_details['Name'] = '-'

        specs = laptop.find_next_sibling('div', class_='product-detail')
        if specs:
            try:
                laptop_details['Specifications'] = specs.find('div', class_='value').text.strip()
            except AttributeError:
                laptop_details['Specifications'] = '-'
        else:
            laptop_details['Specifications'] = '-'

        rating = laptop.find_next_sibling('div', class_='rating')
        if rating:
            try:
                laptop_details['Rating'] = rating.find('p').text.strip()
            except AttributeError:
                laptop_details['Rating'] = '-'
        else:
            laptop_details['Rating'] = '-'

        laptops.append(laptop_details)

    return laptops

if __name__ == "__main__":
    gaming_laptops = scrape_gaming_laptops()
    for laptop in gaming_laptops:
        print(laptop)


In [17]:
#Q7

In [18]:
import requests
from bs4 import BeautifulSoup

def scrape_billionaires():
    url = "https://www.forbes.com/billionaires/"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Raise an exception for unsuccessful status codes

    soup = BeautifulSoup(response.text, 'html.parser')

    billionaires = []
    for person in soup.find_all('div', class_='personList'):
        billionaire_details = {}
        try:
            billionaire_details['Rank'] = person.find('div', class_='rank').text.strip()
        except AttributeError:
            billionaire_details['Rank'] = '-'

        try:
            billionaire_details['Name'] = person.find('div', class_='personName').text.strip()
        except AttributeError:
            billionaire_details['Name'] = '-'

        try:
            billionaire_details['Net Worth'] = person.find('div', class_='netWorth').text.strip()
        except AttributeError:
            billionaire_details['Net Worth'] = '-'

        try:
            billionaire_details['Age'] = person.find('div', class_='age').text.strip()
        except AttributeError:
            billionaire_details['Age'] = '-'

        try:
            billionaire_details['Citizenship'] = person.find('div', class_='countryOfCitizenship').text.strip()
        except AttributeError:
            billionaire_details['Citizenship'] = '-'

        try:
            billionaire_details['Source'] = person.find('div', class_='source').text.strip()
        except AttributeError:
            billionaire_details['Source'] = '-'

        try:
            billionaire_details['Industry'] = person.find('div', class_='category').text.strip()
        except AttributeError:
            billionaire_details['Industry'] = '-'

        billionaires.append(billionaire_details)

    return billionaires

if __name__ == "__main__":
    billionaires = scrape_billionaires()
    for person in billionaires:
        print(person)


In [19]:
#Q8

In [20]:
import os
import googleapiclient.discovery

# Set up YouTube Data API credentials
API_KEY = os.environ.get('YOUTUBE_API_KEY')  # Replace with your API key

def fetch_comments(video_id, max_results=100):
    youtube = googleapiclient.discovery.build('youtube', 'v3', developerKey=API_KEY)
    comments = []

    nextPageToken = None
    while True:
        response = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            maxResults=min(max_results, 100),
            pageToken=nextPageToken
        ).execute()

        for item in response['items']:
            comment = item['snippet']['topLevelComment']['snippet']
            comment_text = comment['textDisplay']
            like_count = comment['likeCount']
            published_at = comment['publishedAt']
            comments.append({'text': comment_text, 'likes': like_count, 'published_at': published_at})

        nextPageToken = response.get('nextPageToken')
        if not nextPageToken or len(comments) >= max_results:
            break

    return comments

def main():
    video_id = input("Enter the video ID of the YouTube video: ")
    comments = fetch_comments(video_id, max_results=500)

    print(f"Total comments extracted: {len(comments)}")
    for comment in comments:
        print(f"Comment: {comment['text']}")
        print(f"Likes: {comment['likes']}")
        print(f"Published at: {comment['published_at']}")
        print('-' * 50)

if __name__ == "__main__":
    main()


ModuleNotFoundError: No module named 'googleapiclient'

In [21]:
#Q9

In [22]:
import requests
from bs4 import BeautifulSoup

def scrape_hostels(location):
    url = f"https://www.hostelworld.com/search?search_keywords={location}&country=England&city=London&date_from=2022-04-15&date_to=2022-04-18&number_of_guests=1"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Raise an exception for unsuccessful status codes

    soup = BeautifulSoup(response.text, 'html.parser')

    hostels = []
    for hostel in soup.find_all('div', class_='fabresult'):
        hostel_details = {}
        hostel_details['Name'] = hostel.find('h2', class_='title').text.strip()
        hostel_details['Distance from City Centre'] = hostel.find('span', class_='description').text.strip()
        hostel_details['Ratings'] = hostel.find('div', class_='score').text.strip()
        hostel_details['Total Reviews'] = hostel.find('div', class_='reviews').text.strip()
        hostel_details['Overall Reviews'] = hostel.find('div', class_='keyword').text.strip()
        hostel_details['Privates From Price'] = hostel.find('div', class_='prices').find('span', class_='price').text.strip()
        hostel_details['Dorms From Price'] = hostel.find('div', class_='prices').find_all('span', class_='price')[1].text.strip()
        hostel_details['Facilities'] = [facility.text.strip() for facility in hostel.find('ul', class_='facilities').find_all('li')]
        hostel_details['Property Description'] = hostel.find('div', class_='ratingdescription').text.strip()
        hostels.append(hostel_details)

    return hostels

if __name__ == "__main__":
    location = "London"
    hostels = scrape_hostels(location)
    for hostel in hostels:
        print(hostel)
