In [None]:
import requests
from bs4 import BeautifulSoup

def search_amazon(product):
    base_url = 'https://www.amazon.in/s'
    params = {'k': product}
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    
    response = requests.get(base_url, params=params, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        products = soup.find_all('div', {'class': 'sg-col-inner'})

        for product in products:
            product_name = product.find('span', {'class': 'a-size-medium'}).text.strip() if product.find('span', {'class': 'a-size-medium'}) else "-"
            product_price = product.find('span', {'class': 'a-price-whole'}).text.strip() if product.find('span', {'class': 'a-price-whole'}) else "-"
            product_rating = product.find('span', {'class': 'a-icon-alt'}).text.strip() if product.find('span', {'class': 'a-icon-alt'}) else "-"
            product_reviews = product.find('span', {'class': 'a-size-base'}).text.strip() if product.find('span', {'class': 'a-size-base'}) else "-"

            print("Product:", product_name)
            print("Price:", product_price)
            print("Rating:", product_rating)
            print("Reviews:", product_reviews)
            print("\n")
    else:
        print("Failed to retrieve search results.")

if __name__ == "__main__":
    user_input = input("Enter the product you want to search on Amazon.in: ")
    search_amazon(user_input)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_product_details(product_name):
    base_url = 'https://www.amazon.in/s'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    all_products = []

    for page_num in range(1, 4):
        params = {'k': product_name, 'page': page_num}
        response = requests.get(base_url, params=params, headers=headers)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            products = soup.find_all('div', {'class': 'sg-col-inner'})

            for product in products:
                details = {}
                details['Brand Name'] = product.find('span', {'class': 'a-size-base-plus a-color-base a-text-normal'}).text.strip() if product.find('span', {'class': 'a-size-base-plus a-color-base a-text-normal'}) else "-"
                details['Name of the Product'] = product.find('span', {'class': 'a-size-base-plus a-color-base a-text-normal'}).text.strip() if product.find('span', {'class': 'a-size-base-plus a-color-base a-text-normal'}) else "-"
                details['Price'] = product.find('span', {'class': 'a-price-whole'}).text.strip() if product.find('span', {'class': 'a-price-whole'}) else "-"
                details['Return/Exchange'] = product.find('span', {'class': 'a-text-bold'}).text.strip() if product.find('span', {'class': 'a-text-bold'}) else "-"
                details['Expected Delivery'] = product.find('span', {'class': 'a-text-bold'}).text.strip() if product.find('span', {'class': 'a-text-bold'}) else "-"
                details['Availability'] = product.find('span', {'class': 'a-size-base'}).text.strip() if product.find('span', {'class': 'a-size-base'}) else "-"
                details['Product URL'] = 'https://www.amazon.in' + product.find('a', {'class': 'a-link-normal a-text-normal'})['href'] if product.find('a', {'class': 'a-link-normal a-text-normal'}) else "-"
                
                all_products.append(details)
        else:
            print(f"Failed to retrieve search results for page {page_num}")

    return all_products

def save_to_csv(product_name, products):
    df = pd.DataFrame(products)
    df.to_csv(f'{product_name}_products.csv', index=False)
    print("Data saved to CSV successfully.")

if __name__ == "__main__":
    user_input = input("Enter the product you want to search on Amazon.in: ")
    products_data = scrape_product_details(user_input)
    save_to_csv(user_input, products_data)


In [None]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import os
import requests
from bs4 import BeautifulSoup

def scrape_images(keyword, num_images):
    driver = webdriver.Chrome(executable_path="chromedriver.exe")  # Provide path to your chromedriver

    driver.get("https://images.google.com/")

    search_bar = driver.find_element_by_name("q")
    search_bar.clear()
    search_bar.send_keys(keyword)
    search_bar.send_keys(Keys.RETURN)

    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

    image_urls = set()
    soup = BeautifulSoup(driver.page_source, "html.parser")
    for img in soup.find_all("img"):
        if img.has_attr("src"):
            image_urls.add(img["src"])

    image_urls = [url for url in image_urls if not url.startswith("data:image")]
    os.makedirs(keyword, exist_ok=True)

    for i, url in enumerate(image_urls[:num_images]):
        try:
            response = requests.get(url)
            with open(os.path.join(keyword, f"{keyword}_{i+1}.jpg"), "wb") as f:
                f.write(response.content)
            print(f"Downloaded image {i+1} for '{keyword}'")
        except Exception as e:
            print(f"Failed to download image {i+1} for '{keyword}': {e}")

    driver.quit()

if __name__ == "__main__":
    keywords = ['fruits', 'cars', 'Machine Learning', 'Guitar', 'Cakes']
    num_images_per_keyword = 10

    for keyword in keywords:
        print(f"Scraping images for '{keyword}'...")
        scrape_images(keyword, num_images_per_keyword)
        print()

    print("Scraping complete.")
    


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_flipkart_smartphones(keyword):
    base_url = f"https://www.flipkart.com/search?q={keyword}&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    
    response = requests.get(base_url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        products = soup.find_all('div', {'class': '_1AtVbE'})
        results = []

        for product in products:
            details = {}
            details['Brand Name'] = product.find('div', {'class': '_4rR01T'}).text.strip() if product.find('div', {'class': '_4rR01T'}) else "-"
            details['Smartphone Name'] = product.find('a', {'class': 'IRpwTa'}).text.strip() if product.find('a', {'class': 'IRpwTa'}) else "-"
            details['Colour'] = product.find('a', {'class': '_1WPlpC'}).text.strip() if product.find('a', {'class': '_1WPlpC'}) else "-"
            details['RAM'] = [spec.text.strip() for spec in product.find_all('li', {'class': 'rgWa7D'}) if 'RAM' in spec.text.strip()][0] if any('RAM' in spec.text.strip() for spec in product.find_all('li', {'class': 'rgWa7D'})) else "-"
            details['Storage(ROM)'] = [spec.text.strip() for spec in product.find_all('li', {'class': 'rgWa7D'}) if 'ROM' in spec.text.strip()][0] if any('ROM' in spec.text.strip() for spec in product.find_all('li', {'class': 'rgWa7D'})) else "-"
            details['Primary Camera'] = [spec.text.strip() for spec in product.find_all('li', {'class': 'rgWa7D'}) if 'MP' in spec.text.strip() and 'Camera' in spec.text.strip()][0] if any('MP' in spec.text.strip() and 'Camera' in spec.text.strip() for spec in product.find_all('li', {'class': 'rgWa7D'})) else "-"
            details['Secondary Camera'] = [spec.text.strip() for spec in product.find_all('li', {'class': 'rgWa7D'}) if 'MP' in spec.text.strip() and 'Front' in spec.text.strip()][0] if any('MP' in spec.text.strip() and 'Front' in spec.text.strip() for spec in product.find_all('li', {'class': 'rgWa7D'})) else "-"
            details['Display Size'] = [spec.text.strip() for spec in product.find_all('li', {'class': 'rgWa7D'}) if 'inch' in spec.text.strip()][0] if any('inch' in spec.text.strip() for spec in product.find_all('li', {'class': 'rgWa7D'})) else "-"
            details['Battery Capacity'] = [spec.text.strip() for spec in product.find_all('li', {'class': 'rgWa7D'}) if 'mAh' in spec.text.strip()][0] if any('mAh' in spec.text.strip() for spec in product.find_all('li', {'class': 'rgWa7D'})) else "-"
            details['Price'] = product.find('div', {'class': '_30jeq3'}).text.strip() if product.find('div', {'class': '_30jeq3'}) else "-"
            details['Product URL'] = 'https://www.flipkart.com' + product.find('a', {'class': 'IRpwTa'})['href'] if product.find('a', {'class': 'IRpwTa'}) else "-"
            
            results.append(details)

        return results
    else:
        print("Failed to retrieve search results.")
        return []

def save_to_csv(keyword, results):
    df = pd.DataFrame(results)
    df.to_csv(f'{keyword}_smartphones.csv', index=False)
    print("Data saved to CSV successfully.")

if __name__ == "__main__":
    keyword = input("Enter the smartphone you want to search on Flipkart: ")
    results = scrape_flipkart_smartphones(keyword)
    if results:
        save_to_csv(keyword, results)


In [None]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time

def scrape_coordinates(city_name):
   
    driver = webdriver.Chrome(executable_path="chromedriver.exe")  
    driver.maximize_window()

   
    driver.get("https://www.google.com/maps")

    search_bar = driver.find_element_by_css_selector("input[aria-label='Search Google Maps']")
    search_bar.clear()
    search_bar.send_keys(city_name)
    search_bar.send_keys(Keys.RETURN)

    time.sleep(5)

   current_url = driver.current_url

     driver.quit()

 if "/@" in current_url:
   coordinates_index = current_url.index("/@") + 2
    coordinates_string = current_url[coordinates_index:].split(",")[0]
     latitude = coordinates_string.split(",")[0]
        longitude = coordinates_string.split(",")[1]
        return latitude, longitude
    else:
        print("Failed to scrape coordinates. Please try again.")
        return None, None

if __name__ == "__main__":
    city = input("Enter the name of the city to search on Google Maps: ")
    latitude, longitude = scrape_coordinates(city)
    if latitude and longitude:
        print(f"Coordinates for {city}: Latitude {latitude}, Longitude {longitude}")


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_gaming_laptops():
    url = "https://www.digit.in/top-products/best-gaming-laptops-40.html"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }

    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        laptops = soup.find_all("div", class_="TopNumbeHeading sticky-footer")
        laptop_details = []

        for laptop in laptops:
            details = {}
            details["Name"] = laptop.find("div", class_="heading-wraper").text.strip()
            details["Price"] = laptop.find("td", class_="smprice").text.strip()
            details["Specifications"] = laptop.find("div", class_="Section-center").text.strip()
            laptop_details.append(details)

        return laptop_details
    else:
        print("Failed to retrieve data from the website.")
        return []

def save_to_csv(data):
    df = pd.DataFrame(data)
    df.to_csv("gaming_laptops.csv", index=False)
    print("Data saved to CSV successfully.")

if __name__ == "__main__":
    gaming_laptops_data = scrape_gaming_laptops()
    if gaming_laptops_data:
        save_to_csv(gaming_laptops_data)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_forbes_billionaires():
    url = "https://www.forbes.com/billionaires/"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }

    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        billionaires = soup.find_all("div", class_="personName")
        billionaires_data = []

        for billionaire in billionaires:
            details = {}
            details["Rank"] = billionaire.find_previous("div", class_="rank").text.strip()
            details["Name"] = billionaire.text.strip()
            details["Net Worth"] = billionaire.find_next("div", class_="netWorth").text.strip()
            details["Age"] = billionaire.find_next("div", class_="age").text.strip()
            details["Citizenship"] = billionaire.find_next("div", class_="countryOfCitizenship").text.strip()
            details["Source"] = billionaire.find_next("div", class_="source-column").text.strip()
            details["Industry"] = billionaire.find_next("div", class_="category").text.strip()
            billionaires_data.append(details)

        return billionaires_data
    else:
        print("Failed to retrieve data from the website.")
        return []

def save_to_csv(data):
    df = pd.DataFrame(data)
    df.to_csv("forbes_billionaires.csv", index=False)
    print("Data saved to CSV successfully.")

if __name__ == "__main__":
    billionaires_data = scrape_forbes_billionaires()
    if billionaires_data:
        save_to_csv(billionaires_data)


In [None]:
from googleapiclient.discovery import build
import datetime

API_KEY = "YOUR_API_KEY"

def extract_comments(video_id, max_results=500):
    youtube = build("youtube", "v3", developerKey=API_KEY)

    comments = []
    next_page_token = None
    total_results = 0

    while total_results < max_results:
        request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            maxResults=min(100, max_results - total_results),
            pageToken=next_page_token if next_page_token else ""
        )
        response = request.execute()

        for item in response["items"]:
            comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
            comment_time = item["snippet"]["topLevelComment"]["snippet"]["publishedAt"]
            comment_time = datetime.datetime.strptime(comment_time, "%Y-%m-%dT%H:%M:%SZ").strftime("%Y-%m-%d %H:%M:%S")
            comments.append({"Comment": comment, "Time": comment_time})
            total_results += 1

        next_page_token = response.get("nextPageToken")
        if not next_page_token:
            break

    return comments

if __name__ == "__main__":
    video_id = input("Enter the YouTube video ID: ")
    comments = extract_comments(video_id)
    print(f"Total comments extracted: {len(comments)}")
    for idx, comment in enumerate(comments, start=1):
        print(f"\nComment {idx}:")
        print(f"Text: {comment['Comment']}")
        print(f"Time: {comment['Time']}")


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_hostels_in_london():
    url = "https://www.hostelworld.com/findabed.php/ChosenCity.London/ChosenCountry.England"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }

    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        hostels = soup.find_all("div", class_="fabresult")
        hostel_data = []

        for hostel in hostels:
            details = {}
            details["Hostel Name"] = hostel.find("h2", class_="title-5").text.strip()
            details["Distance from City Centre"] = hostel.find("span", class_="description").text.strip()
            details["Ratings"] = hostel.find("div", class_="score orange big").text.strip()
            details["Total Reviews"] = hostel.find("div", class_="reviews").text.strip().split()[0]
            details["Overall Reviews"] = hostel.find("div", class_="keyword").text.strip()
            details["Privates from Price"] = hostel.find("span", class_="price title-5").text.strip().split()[0]
            details["Dorms from Price"] = hostel.find("span", class_="price").text.strip().split()[0]
            details["Facilities"] = ", ".join([item.text.strip() for item in hostel.find_all("div", class_="facilities")])
            details["Property Description"] = hostel.find("div", class_="rating-factors prop-card-tablet rating-factors small").text.strip()
            hostel_data.append(details)

        return hostel_data
    else:
        print("Failed to retrieve data from the website.")
        return []

def save_to_csv(data):
    df = pd.DataFrame(data)
    df.to_csv("hostels_in_london.csv", index=False)
    print("Data saved to CSV successfully.")

if __name__ == "__main__":
    hostels_data = scrape_hostels_in_london()
    if hostels_data:
        save_to_csv(hostels_data)
