In [None]:
import requests
from bs4 import BeautifulSoup

def search_amazon(product_name):
    base_url = "https://www.amazon.in"
    search_url = f"{base_url}/s?k={product_name.replace(' ', '+')}"

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36"
    }

    response = requests.get(search_url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        products = soup.find_all("div", class_="s-result-item")

        if products:
            for product in products:
                product_name = product.find("span", class_="a-text-normal").get_text(strip=True)
                product_price = product.find("span", class_="a-price")
                price = product_price.find("span", class_="a-offscreen").get_text(strip=True) if product_price else "Price not available"
                product_link = product.find("a", class_="a-link-normal", href=True)['href']
                print(f"Product: {product_name}\nPrice: {price}\nLink: {base_url}{product_link}\n")
        else:
            print("No products found.")
    else:
        print("Failed to fetch data from Amazon.")

if __name__ == "__main__":
    user_input = input("Enter the product you want to search on Amazon.in: ")
    search_amazon(user_input)



In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def extract_product_details(product):
    base_url = 'https://www.amazon.in'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }

    products_data = []

    page_num = 1
    while page_num <= 3:
        search_url = f"{base_url}/s?k={product.replace(' ', '+')}&page={page_num}"
        response = requests.get(search_url, headers=headers)

        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            products = soup.find_all('div', {'data-component-type': 's-search-result'})

            if not products:
                break 

            for product in products:
                details = {}

                product_name_tag = product.find('span', {'class': 'a-text-normal'})
                if product_name_tag:
                    details['Name of the Product'] = product_name_tag.text.strip()
                else:
                    details['Name of the Product'] = '-'

                brand_tag = product.find('span', {'class': 'a-size-base-plus'})
                if brand_tag:
                    details['Brand Name'] = brand_tag.text.strip()
                else:
                    details['Brand Name'] = '-'

                price_tag = product.find('span', {'class': 'a-offscreen'})
                if price_tag:
                    details['Price'] = price_tag.text.strip()
                else:
                    details['Price'] = '-'

                return_tag = product.find('span', {'class': 'a-text-bold', 'dir': 'auto'})
                if return_tag:
                    details['Return/Exchange'] = return_tag.text.strip()
                else:
                    details['Return/Exchange'] = '-'

                delivery_tag = product.find('span', {'class': 'a-text-bold', 'dir': 'auto'})
                if delivery_tag:
                    details['Expected Delivery'] = delivery_tag.text.strip()
                else:
                    details['Expected Delivery'] = '-'

                availability_tag = product.find('span', {'class': 'a-size-base', 'aria-label': 'In Stock.'})
                if availability_tag:
                    details['Availability'] = 'In Stock'
                else:
                    details['Availability'] = 'Out of Stock'

                product_url = product.find('a', {'class': 'a-link-normal'})
                if product_url:
                    details['Product URL'] = base_url + product_url['href']
                else:
                    details['Product URL'] = '-'

                products_data.append(details)

            page_num += 1
        else:
            print("Failed to fetch data from Amazon")
            break

    return products_data

def create_csv(data, filename='amazon_products.csv'):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"Data has been saved to '{filename}'")

if __name__ == "__main__":
    search_query = input("Enter the product you want to search for on Amazon: ")
    products_data = extract_product_details(search_query)

    if products_data:
        create_csv(products_data)


In [2]:
import requests
from bs4 import BeautifulSoup
import os

def scrape_images(keyword, num_images=10):
    url = f"https://www.google.com/search?q={keyword}&tbm=isch"

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
                      '(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        image_elements = soup.find_all('img', class_='t0fcAb')

        image_urls = []
        for img in image_elements[:num_images]:
            if 'src' in img.attrs:
                image_urls.append(img['src'])

        os.makedirs(keyword, exist_ok=True)

        for i, img_url in enumerate(image_urls):
            img_data = requests.get(img_url).content
            with open(f"{keyword}/image_{i+1}.jpg", "wb") as f:
                f.write(img_data)

if __name__ == "__main__":
    keywords = ['fruits', 'cars', 'Machine+Learning', 'Guitar', 'Cakes']

    for keyword in keywords:
        scrape_images(keyword)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_flipkart_smartphones(product):
    base_url = f"https://www.flipkart.com/search?q={product.replace(' ', '+')}"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
                      '(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }

    response = requests.get(base_url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        products = soup.find_all('div', {'class': '_1AtVbE'})

        products_data = []

        for product in products:
            details = {}

            product_name = product.find('div', {'class': '_4rR01T'})
            if product_name:
                details['Smartphone name'] = product_name.text.strip()
            else:
                details['Smartphone name'] = '-'

            brand_name = product.find('div', {'class': '_2WkVRV'})
            if brand_name:
                details['Brand Name'] = brand_name.text.split()[0].strip()
            else:
                details['Brand Name'] = '-'

            price = product.find('div', {'class': '_30jeq3 _1_WHN1'})
            if price:
                details['Price'] = price.text.strip()
            else:
                details['Price'] = '-'

            product_url = product.find('a', {'class': '_1fQZEK'})
            if product_url:
                details['Product URL'] = 'https://www.flipkart.com' + product_url['href']
            else:
                details['Product URL'] = '-'

            # Other details might not be available on the search result page
            details['Colour'] = '-'
            details['RAM'] = '-'
            details['Storage(ROM)'] = '-'
            details['Primary Camera'] = '-'
            details['Secondary Camera'] = '-'
            details['Display Size'] = '-'
            details['Battery Capacity'] = '-'

            products_data.append(details)

        return products_data

    else:
        print("Failed to fetch data from Flipkart")
        return []

def create_csv(data, filename='flipkart_smartphones.csv'):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"Data has been saved to '{filename}'")

if __name__ == "__main__":
    search_query = input("Enter the smartphone you want to search for on Flipkart: ")
    smartphone_data = scrape_flipkart_smartphones(search_query)

    if smartphone_data:
        create_csv(smartphone_data)


In [None]:
import requests
from bs4 import BeautifulSoup

def scrape_coordinates(city_name):
    base_url = f"https://www.google.com/maps/search/{city_name.replace(' ', '+')}"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
                      '(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }

    response = requests.get(base_url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        coords_div = soup.find('meta', {'itemprop': 'geo'})
        
        if coords_div:
            latitude = coords_div.get('content').split(',')[0]
            longitude = coords_div.get('content').split(',')[1]
            return latitude, longitude
        else:
            return None, None
    else:
        print("Failed to fetch data from Google Maps")
        return None, None

if __name__ == "__main__":
    city = input("Enter the name of the city: ")
    lat, long = scrape_coordinates(city)

    if lat and long:
        print(f"Latitude: {lat}, Longitude: {long}")
    else:
        print("Coordinates not found.")


In [None]:
import requests
from bs4 import BeautifulSoup

def scrape_gaming_laptops():
    url = 'https://www.digit.in/top-products/best-gaming-laptops-40.html'

    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        laptops = soup.find_all('div', class_='TopNumbeHeading sticky-footer')

        laptops_data = []

        for laptop in laptops:
            details = {}

            details['Name'] = laptop.find('div', class_='TopNumbeHeading sticky-footer').text.strip()

            specs = laptop.find_next('div', class_='specs').text.strip()
            specs_list = specs.split('|')
            for spec in specs_list:
                key, value = spec.split(':')
                details[key.strip()] = value.strip()

            laptops_data.append(details)

        return laptops_data
    else:
        print("Failed to fetch data from Digit.in")
        return []

if __name__ == "__main__":
    gaming_laptops_data = scrape_gaming_laptops()

    if gaming_laptops_data:
        for laptop in gaming_laptops_data:
            print(laptop)
    else:
        print("No data found.")


In [None]:
import requests
from bs4 import BeautifulSoup

def scrape_forbes_billionaires():
    url = 'https://www.forbes.com/billionaires/'

    # Fetch the webpage content
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        billionaire_rows = soup.find_all('div', class_='personName')

        billionaires_data = []

        for row in billionaire_rows:
            details = {}

            details['Rank'] = row.find_previous(class_='rank').text.strip()
            details['Name'] = row.text.strip()

            parent = row.parent.parent

            details['Net worth'] = parent.find(class_='netWorth').text.strip()
            details['Age'] = parent.find(class_='age').text.strip()
            details['Citizenship'] = parent.find(class_='countryOfCitizenship').text.strip()
            details['Source'] = parent.find(class_='source-column').text.strip()
            details['Industry'] = parent.find(class_='category').text.strip()

            billionaires_data.append(details)

        return billionaires_data
    else:
        print("Failed to fetch data from Forbes.com")
        return []

if __name__ == "__main__":
    billionaires_data = scrape_forbes_billionaires()

    if billionaires_data:
        for billionaire in billionaires_data:
            print(billionaire)
    else:
        print("No data found.")


In [None]:
pip install google-api-python-client
from googleapiclient.discovery import build

API_KEY = 'YOUR_API_KEY'

def get_video_comments(video_id, max_results=500):
    youtube = build('youtube', 'v3', developerKey=API_KEY)

    # Retrieve comments for the video
    response = youtube.commentThreads().list(
        part='snippet',
        videoId=video_id,
        order='time',  # Order by time
        maxResults=max_results
    ).execute()

    comments_data = []

    for item in response['items']:
        comment = item['snippet']['topLevelComment']
        comment_text = comment['snippet']['textDisplay']
        comment_upvotes = comment['snippet']['likeCount']
        comment_time = comment['snippet']['publishedAt']
        
        comment_details = {
            'Comment': comment_text,
            'Upvotes': comment_upvotes,
            'Time': comment_time
        }
        
        comments_data.append(comment_details)

    return comments_data

if __name__ == "__main__":
    video_id = input("Enter YouTube video ID: ")
    comments = get_video_comments(video_id)

    if comments:
        for index, comment in enumerate(comments, start=1):
            print(f"Comment {index}:")
            print(f"Text: {comment['Comment']}")
            print(f"Upvotes: {comment['Upvotes']}")
            print(f"Time: {comment['Time']}")
            print("-" * 50)
    else:
        print("No comments found or invalid video ID.")


In [None]:
import requests
from bs4 import BeautifulSoup

url = "https://www.hostelworld.com/find-a-hostel/london"

response = requests.get(url)

soup = BeautifulSoup(response.content, 'html.parser')

hostels = soup.find_all('div', {'class': 'hostel'})

data = []

for hostel in hostels:
    name = hostel.find('h2', {'class': 'hostel-name'}).text.strip()

    distance = hostel.find('p', {'class': 'distance'}).text.strip()

    ratings = hostel.find('div', {'class': 'ratings'}).text.strip()
    reviews = hostel.find('div', {'class': 'reviews'}).text.strip()

    private_price = hostel.find('span', {'class': 'private-price'}).text.strip()
    dorm_price = hostel.find('span', {'class': 'dorm-price'}).text.strip()

    facilities = hostel.find('div', {'class': 'facilities'}).text.strip()
    description = hostel.find('div', {'class': 'description'}).text.strip()

    data.append({
        'name': name,
        'distance': distance,
        'ratings': ratings,
        'reviews': reviews,
        'private_price': private_price,
        'dorm_price': dorm_price,
        'facilities': facilities,
        'description': description
    })

print(data)
