In [None]:
# Answer 1
import requests
from bs4 import BeautifulSoup

def search_amazon_products(search_query):
    base_url = 'https://www.amazon.in'
    search_url = f'{base_url}/s?k={search_query}'

    response = requests.get(search_url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        product_links = soup.find_all('a', class_='a-link-normal a-text-normal')

        if product_links:
            print(f"Products under '{search_query}':")
            for link in product_links:
                product_title = link.text.strip()
                if product_title:
                    print(product_title)
        else:
            print('No products found.')
    else:
        print('Failed to retrieve search results.')

product = input('Enter the product to search: ')
search_amazon_products(product)



In [None]:
#Answer 2
import time
import pandas as pd
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By

def scrape_product_details(keyword, num_pages):
    base_url = 'https://www.amazon.in'
    search_url = f'{base_url}/s?k={keyword}'

    driver = webdriver.Chrome()
    driver.get(search_url)

    product_urls = set()

    for _ in range(num_pages):
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        product_links = soup.find_all('a', class_='a-link-normal a-text-normal')

        for link in product_links:
            product_url = link.get('href')
            if product_url.startswith('/'):
                product_urls.add(base_url + product_url)

        next_button = driver.find_element(By.XPATH, "//a[@class='s-pagination-item s-pagination-next s-pagination-button s-pagination-disabled']")
        if next_button:
            break
        else:
            next_button.click()
            time.sleep(2)

    driver.quit()

    product_details = []
    for url in product_urls:
        details = scrape_product_info(url)
        product_details.append(details)
        

    columns = ['Brand Name', 'Name of the Product', 'Price', 'Return/Exchange', 'Expected Delivery', 'Availability', 'Product URL']
    df = pd.DataFrame(product_details, columns=columns)

    return df

def scrape_product_info(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    brand_name = get_product_info(soup, 'bylineInfo')
    product_name = get_product_info(soup, 'product-title')
    price = get_product_info(soup, 'priceblock_ourprice')
    return_exchange = get_product_info(soup, 'icon-return-policy-2')
    expected_delivery = get_product_info(soup, 'ddmDeliveryMessage')
    availability = get_product_info(soup, 'availability')
    product_url = url

    details = [brand_name, product_name, price, return_exchange, expected_delivery, availability, product_url]
    return details

def get_product_info(soup, class_name):
    element = soup.find('span', class_=class_name)
    if element:
        return element.text.strip()
    else:
        return '-'

keywords = ['fruits', 'cars', 'Machine Learning', 'Guitar', 'Cakes']
num_pages = 3

all_dataframes = []
for keyword in keywords:
    df = scrape_product_details(keyword, num_pages)
    all_dataframes.append(df)

combined_df = pd.concat(all_dataframes)
combined_df.to_csv('product_details.csv', index=False)

print('Product details saved to product_details.csv.')


In [None]:
#Answer 3
import time
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By

def scrape_images(keyword, num_images):
    driver = webdriver.Chrome()
    driver.get("https://images.google.com")

    search_bar = driver.find_element(By.XPATH, "//input[@name='q']")
    search_bar.clear()
    search_bar.send_keys(keyword)
    search_bar.send_keys(Keys.RETURN)

    time.sleep(2)

    for _ in range(3):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

    image_elements = driver.find_elements(By.XPATH, "//img[@class='rg_i Q4LuWd']")
    num_scraped_images = min(num_images, len(image_elements))

    image_urls = []
    for i in range(num_scraped_images):
        image_url = image_elements[i].get_attribute('src')
        if image_url:
            image_urls.append(image_url)

    driver.quit()

    return image_urls

keywords = ['fruits', 'cars', 'Machine Learning', 'Guitar', 'Cakes']
num_images = 10

for keyword in keywords:
    image_urls = scrape_images(keyword, num_images)
    for url in image_urls:
        print(url)
    print()


In [None]:
# Answer 4
import pandas as pd
import requests
from bs4 import BeautifulSoup

def scrape_smartphone_details(keyword):
    base_url = f'https://www.flipkart.com/search?q={keyword}&sid=tyy%2C4io&as=on&as-show=on&otracker=AS_QueryStore_HistoryAutoSuggest_1_9_na_na_ps&otracker1=AS_QueryStore_HistoryAutoSuggest_1_9_na_na_ps&as-pos=1&as-type=HISTORY&suggestionId=oneplus+nord%7CMobiles&as-searchtext={keyword}'
    response = requests.get(base_url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        products = soup.find_all('a', class_='_1fQZEK')

        if products:
            data = []
            for product in products:
                brand_name = product.find('div', class_='_4rR01T').text.strip()
                smartphone_name = product.find('a', class_='IRpwTa').text.strip()
                url = 'https://www.flipkart.com' + product['href']

                details_response = requests.get(url)
                if details_response.status_code == 200:
                    details_soup = BeautifulSoup(details_response.content, 'html.parser')
                    color = get_product_detail(details_soup, 'Color')
                    ram = get_product_detail(details_soup, 'RAM')
                    storage = get_product_detail(details_soup, 'Storage')
                    primary_camera = get_product_detail(details_soup, 'Primary Camera')
                    secondary_camera = get_product_detail(details_soup, 'Secondary Camera')
                    display_size = get_product_detail(details_soup, 'Display Size')
                    battery_capacity = get_product_detail(details_soup, 'Battery Capacity')
                    price = get_product_detail(details_soup, '_30jeq3 _1_WHN1')
                else:
                    color = '-'
                    ram = '-'
                    storage = '-'
                    primary_camera = '-'
                    secondary_camera = '-'
                    display_size = '-'
                    battery_capacity = '-'
                    price = '-'

                data.append([brand_name, smartphone_name, color, ram, storage, primary_camera, secondary_camera, display_size, battery_capacity, price, url])

            columns = ['Brand Name', 'Smartphone Name', 'Color', 'RAM', 'Storage (ROM)', 'Primary Camera', 'Secondary Camera', 'Display Size', 'Battery Capacity', 'Price', 'Product URL']
            df = pd.DataFrame(data, columns=columns)
            return df
        else:
            print('No products found.')
    else:
        print('Failed to retrieve search results.')

    return None

def get_product_detail(soup, detail_name):
    element = soup.find('li', attrs={'class': '_21Ahn-','data-tkid': f'product-details:{detail_name}'})
    if element:
        return element.find('span').text.strip()
    else:
        return '-'

keyword = input('Enter the smartphone to search: ')
df = scrape_smartphone_details(keyword)

if df is not None:
    df.to_csv('smartphone_details.csv', index=False)
    print('Smartphone details saved to smartphone_details.csv.')


In [None]:
#Answer 5
import requests
from bs4 import BeautifulSoup

def scrape_coordinates(city):
    search_query = f'https://www.google.com/maps/search/{city}'
    response = requests.get(search_query)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        map_div = soup.find('div', class_='BnJ3rd')
        if map_div:
            latitude = map_div.get('data-latitude')
            longitude = map_div.get('data-longitude')
            if latitude and longitude:
                return latitude, longitude
            else:
                print(f'Coordinates not found for {city}.')
        else:
            print(f'Location not found for {city}.')
    else:
        print('Failed to retrieve coordinates.')

    return None, None

city = input('Enter the city name: ')
latitude, longitude = scrape_coordinates(city)

if latitude and longitude:
    print(f'Coordinates for {city}:')
    print('Latitude:', latitude)
    print('Longitude:', longitude)


In [None]:
# Answer6
import pandas as pd
import requests
from bs4 import BeautifulSoup

def scrape_funding_deals():
    url = 'https://trak.in/india-startup-funding-investment-2015/'
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        table = soup.find('table', class_='tablepress tablepress-id-48')

        if table:
            data = []
            rows = table.find_all('tr')

            for row in rows[1:]:
                columns = row.find_all('td')
                deal_date = columns[1].text.strip()
                if 'Jan 2021' <= deal_date <= 'Mar 2021':
                    startup_name = columns[2].text.strip()
                    industry = columns[3].text.strip()
                    sub_vertical = columns[4].text.strip()
                    city = columns[5].text.strip()
                    investor = columns[6].text.strip()
                    investment_type = columns[7].text.strip()
                    amount_in_usd = columns[8].text.strip()

                    data.append([deal_date, startup_name, industry, sub_vertical, city, investor, investment_type, amount_in_usd])

            columns = ['Deal Date', 'Startup Name', 'Industry', 'Sub-Vertical', 'City', 'Investor', 'Investment Type', 'Amount in USD']
            df = pd.DataFrame(data, columns=columns)
            return df
        else:
            print('Table not found on the page.')
    else:
        print('Failed to retrieve the page.')

    return None

df = scrape_funding_deals()

if df is not None:
    df.to_csv('funding_deals.csv', index=False)
    print('Funding deal details saved to funding_deals.csv.')


In [None]:
# Answer 7
import pandas as pd
import requests
from bs4 import BeautifulSoup

def scrape_gaming_laptops():
    url = 'https://www.digit.in/top-products/best-gaming-laptops-40.html'
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        laptops = soup.find_all('div', class_='TopNumbeHeading sticky-footer')

        if laptops:
            data = []
            for laptop in laptops:
                name = laptop.text.strip()
                details = laptop.find_next_sibling('ul').find_all('li')
                specs = [detail.text.strip() for detail in details]
                data.append([name] + specs)

            columns = ['Laptop Name', 'Operating System', 'Display', 'Processor', 'Memory', 'Weight', 'Price']
            df = pd.DataFrame(data, columns=columns)
            return df
        else:
            print('Laptops not found on the page.')
    else:
        print('Failed to retrieve the page.')

    return None

df = scrape_gaming_laptops()

if df is not None:
    df.to_csv('gaming_laptops.csv', index=False)
    print('Gaming laptop details saved to gaming_laptops.csv.')


In [None]:
#Answer 8

import pandas as pd
import requests
from bs4 import BeautifulSoup

def scrape_billionaires():
    url = 'https://www.forbes.com/billionaires/'
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        table = soup.find('div', class_='table-responsive')

        if table:
            data = []
            rows = table.find_all('div', class_='rank')

            for row in rows:
                rank = row.text.strip()
                name = row.find_next_sibling('div', class_='personName').text.strip()
                net_worth = row.find_next_sibling('div', class_='netWorth').text.strip()
                age = row.find_next_sibling('div', class_='age').text.strip()
                citizenship = row.find_next_sibling('div', class_='countryOfCitizenship').text.strip()
                source = row.find_next_sibling('div', class_='source').text.strip()
                industry = row.find_next_sibling('div', class_='category').text.strip()

                data.append([rank, name, net_worth, age, citizenship, source, industry])

            columns = ['Rank', 'Name', 'Net Worth', 'Age', 'Citizenship', 'Source', 'Industry']
            df = pd.DataFrame(data, columns=columns)
            return df
        else:
            print('Table not found on the page.')
    else:
        print('Failed to retrieve the page.')

    return None

df = scrape_billionaires()

if df is not None:
    df.to_csv('billionaires.csv', index=False)
    print('Billionaires details saved to billionaires.csv.')


In [None]:
#Answer 9
from selenium import webdriver
from selenium.webdriver.common.by import By
import time

driver_path = 'path_to_chromedriver'  # Replace with the actual path to the chromedriver executable
driver = webdriver.Chrome(driver_path)

video_url = 'https://www.youtube.com/watch?v=your_video_id'  # Replace with the actual YouTube video URL or video ID
driver.get(video_url)

SCROLL_PAUSE_TIME = 2

last_height = driver.execute_script("return document.documentElement.scrollHeight")

while True:
    driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")
    time.sleep(SCROLL_PAUSE_TIME)
    new_height = driver.execute_script("return document.documentElement.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

comment_elements = driver.find_elements(By.CSS_SELECTOR, '#content-text')
upvote_elements = driver.find_elements(By.CSS_SELECTOR, '#vote-count-middle')
time_elements = driver.find_elements(By.CSS_SELECTOR, '#header-author > yt-formatted-string > a > span')

comments = [element.text for element in comment_elements]
upvotes = [element.text for element in upvote_elements]
times = [element.get_attribute('innerText') for element in time_elements]

driver.quit()


In [None]:
#Answer 10

import requests
from bs4 import BeautifulSoup

url = 'https://www.hostelworld.com/search?city=London&country=England'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

hostels = soup.find_all('div', class_='property-card')
for hostel in hostels:
    name = hostel.find('h2', class_='title').text.strip()
    distance = hostel.find('span', class_='distance').text.strip()
    ratings = hostel.find('div', class_='score orange').text.strip()
    total_reviews = hostel.find('div', class_='reviews').text.strip().split()[0]
    overall_reviews = hostel.find('div', class_='keyword').text.strip()
    privates_from_price = hostel.find('div', class_='price-col').text.strip()
    dorms_from_price = hostel.find('div', class_='price-col').find_next('div', class_='price-col').text.strip()
    facilities = ', '.join([fac.text.strip() for fac in hostel.find_all('span', class_='facilities-label')])
    description = hostel.find('div', class_='rating-factors').text.strip()

    print('Hostel Name:', name)
    print('Distance from City Centre:', distance)
    print('Ratings:', ratings)
    print('Total Reviews:', total_reviews)
    print('Overall Reviews:', overall_reviews)
    print('Privates from Price:', privates_from_price)
    print('Dorms from Price:', dorms_from_price)
    print('Facilities:', facilities)
    print('Description:', description)
    print('-----------------------------------------')
