In [None]:
! pip install requests
import requests

In [None]:
import requests

url = 'https://youtube.com/watch?v=9bZkp7q19f0'  # Example URL, replace with the desired one
response = requests.get(url)
# Check if the request was successful
print("successful" if response.status_code == 200 else "Page not found" if response.status_code == 404 else "Something went wrong")

In [None]:
response.raise_for_status()  # Throws error for bad requests

In [None]:
url = "https://api.github.com/users/octocat"
response = requests.get(url)

data = response.json()
print(data)
print(data['name'])        # The Octocat
print(data['public_repos'])  # e.g., 8

In [None]:
url = "https://pokeapi.co/api/v2"  # Example URL for a Pokémon API
response = requests.get(url)

def pokemon_info(name):
    url = f"https://pokeapi.co/api/v2/pokemon/{name}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        return None

pokemon_name = "raichu"  # Example Pokémon name, replace with the desired one
response = requests.get(f"https://pokeapi.co/api/v2/pokemon/{pokemon_name}")

if response.status_code == 200:
    data = response.json()
    print(f'Name:', data['name'])
    print(f'Height:', data['height'])
    print(f'Weight:', data['weight'])
    print(f'Id:', data['id'])
    print(f"Pokemon data extracted successfully!")
else:
    print(f"Error: Pokémon '{pokemon_name}' not found (status code {response.status_code})")
    print(f'failed to extract Pokémon data for {pokemon_name}')

In [None]:
url = 'https://in.pinterest.com/pin/44050902599444305/'
response = requests.get(url)

with open('image.png', 'wb') as f:
    f.write(response.content)


In [None]:
print(response.headers)
print(response.cookies)

In [None]:
from requests.auth import HTTPBasicAuth

response = requests.get('https://httpbin.org/basic-auth/user/pass',
                        auth=HTTPBasicAuth('user', 'pass'))
print(response.status_code)
print(response.json())

 1. requests.get()
Used to get (fetch) data from a URL.

In [None]:
import requests

response = requests.get('https://api.github.com')

print(response.status_code)   # HTTP status code, like 200 (OK)
print(response.text)          # Response content as a string


2. requests.post()
Used to send data (like login info, forms) to a server.

In [None]:
data = {"name":"pratham", "age": 20}
response  = requests.post('https://httpbin.org/post', json=data)
print(data)
print()
print(f'The connection is successful {response.status_code}' if response.status_code == 200 else 'failed')
print(response.json()) 

 3. requests.put()
Used to update existing data on the server.

In [None]:
data = {'name': 'Prathamesh Talele'}

response = requests.put('https://httpbin.org/put', data=data)

print(response.status_code)
print(response.text)

4. requests.delete()
Used to delete data on the server.

In [None]:
respone = requests.delete('https://httpbin.org/delete' )

print(f'The connection is successful {respone.status_code}' if respone.status_code == 200 else "failed")
print(respone.text)

 5. requests.head()
Sends a request just for the headers (not the content). Useful to check if a page exists.

In [None]:
response = requests.head('https://httpbin.org/get')
print(response.headers)


{'Date': 'Mon, 07 Jul 2025 06:38:55 GMT', 'Content-Type': 'application/json', 'Content-Length': '307', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true'}


6. requests.options()
Used to find out what methods are allowed on a server (like GET, POST, etc.)

In [31]:
response = requests.options('https://httpbin.org')
print(response.headers['Allow'])  # Example header, replace with the desired one

GET, OPTIONS, HEAD


In [32]:
params = {'search': 'python'}

response = requests.get('https://httpbin.org/get', params=params)

print(response.url)  # See the full URL


https://httpbin.org/get?search=python


In [34]:
url = "https://httpbin.org/post"
headers = {"User-Agent": "myapp/1.0"}
data = {"username": "pratham"}

response = requests.post(url, headers=headers, data=data)

if response.status_code == 200:
    print("Success!")
    print(response.text)
else:
    print("Failed")


Success!
{
  "args": {}, 
  "data": "", 
  "files": {}, 
  "form": {
    "username": "pratham"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Content-Length": "16", 
    "Content-Type": "application/x-www-form-urlencoded", 
    "Host": "httpbin.org", 
    "User-Agent": "myapp/1.0", 
    "X-Amzn-Trace-Id": "Root=1-686b6ca8-311303dd2764fb3742ad3cdd"
  }, 
  "json": null, 
  "origin": "103.87.31.207", 
  "url": "https://httpbin.org/post"
}



In [39]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random
from urllib.parse import quote_plus

# ---------- CONFIGURABLE PART ----------
SEARCH_QUERY = "laptop"
BASE_URL = "https://www.amazon.in"
MAX_PAGES = 5  # Number of pages to scrape
OUTPUT_FILE = "amazon_laptops_advanced.csv"

HEADERS_LIST = [
    {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
        "Accept-Language": "en-US,en;q=0.9"
    },
    {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)",
        "Accept-Language": "en-US,en;q=0.8"
    },
    {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64)",
        "Accept-Language": "en-GB,en;q=0.7"
    }
]

# ---------- FUNCTION TO SCRAPE ONE PAGE ----------
def scrape_page(url):
    print(f"Scraping: {url}")
    headers = random.choice(HEADERS_LIST)
    response = requests.get(url, headers=headers)

    if response.status_code != 200:
        print(f"Failed to fetch page: {url}, Status Code: {response.status_code}")
        return []

    soup = BeautifulSoup(response.text, "lxml")
    items = soup.select("div.s-main-slot div[data-component-type='s-search-result']")

    product_list = []

    for item in items:
        title = item.h2.text.strip() if item.h2 else None
        link = BASE_URL + item.h2.a['href'] if item.h2 and item.h2.a else None

        price_whole = item.select_one("span.a-price-whole")
        price = price_whole.text.strip().replace(",", "") if price_whole else None

        rating = item.select_one("span.a-icon-alt")
        rating = rating.text.strip() if rating else None

        reviews = item.select_one("span.a-size-base")
        reviews = reviews.text.strip() if reviews else None

        product = {
            "Title": title,
            "Price (INR)": price,
            "Rating": rating,
            "Review Count": reviews,
            "Product URL": link
        }
        product_list.append(product)

    return product_list

# ---------- MAIN SCRAPER LOOP ----------
def scrape_amazon(query, max_pages=1):
    all_products = []
    page = 1

    while page <= max_pages:
        search_url = f"{BASE_URL}/s?k={quote_plus(query)}&page={page}"
        products = scrape_page(search_url)

        if not products:
            print("No products found or blocked. Stopping.")
            break

        all_products.extend(products)

        # Random sleep to avoid blocking
        time.sleep(random.uniform(2, 5))
        page += 1

    return all_products

# ---------- SAVE RESULTS ----------
def save_to_csv(products, filename):
    df = pd.DataFrame(products)
    df.to_csv(filename, index=False)
    print(f"Saved {len(products)} products to {filename}")

# ---------- RUNNING THE SCRAPER ----------
if __name__ == "__main__":
    print(f"Scraping Amazon.in for '{SEARCH_QUERY}'...\n")
    data = scrape_amazon(SEARCH_QUERY, MAX_PAGES)
    save_to_csv(data, OUTPUT_FILE)


Scraping Amazon.in for 'laptop'...

Scraping: https://www.amazon.in/s?k=laptop&page=1
Scraping: https://www.amazon.in/s?k=laptop&page=2
Scraping: https://www.amazon.in/s?k=laptop&page=3
Scraping: https://www.amazon.in/s?k=laptop&page=4
Scraping: https://www.amazon.in/s?k=laptop&page=5
Saved 98 products to amazon_laptops_advanced.csv


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random
from urllib.parse import quote_plus

# ---------- CONFIGURABLE PART ----------
SEARCH_QUERIES = ["laptop", "smartphone", "headphones", "books"]
BASE_URL = "https://www.amazon.in"
MAX_PAGES = 3  # Number of pages to scrape per category
OUTPUT_FILE = "amazon_products.csv"

HEADERS_LIST = [
    {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
        "Accept-Language": "en-US,en;q=0.9"
    },
    {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)",
        "Accept-Language": "en-US,en;q=0.8"
    },
    {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64)",
        "Accept-Language": "en-GB,en;q=0.7"
    }
]

# ---------- FUNCTION TO SCRAPE ONE PAGE ----------
def scrape_page(url, category):
    print(f"Scraping: {url}")
    headers = random.choice(HEADERS_LIST)
    response = requests.get(url, headers=headers)

    if response.status_code != 200:
        print(f"Failed to fetch page: {url}, Status Code: {response.status_code}")
        return []

    soup = BeautifulSoup(response.text, "lxml")
    items = soup.select("div.s-main-slot div[data-component-type='s-search-result']")

    product_list = []

    for item in items:
        title = item.h2.text.strip() if item.h2 else None
        link = BASE_URL + item.h2.a['href'] if item.h2 and item.h2.a else None

        price_whole = item.select_one("span.a-price-whole")
        price_fraction = item.select_one("span.a-price-fraction")
        price = f"{price_whole.text.strip().replace(',', '')}.{price_fraction.text.strip()}" if price_whole and price_fraction else None

        rating = item.select_one("span.a-icon-alt")
        rating = rating.text.strip() if rating else None

        reviews = item.select_one("span.a-size-base")
        reviews = reviews.text.strip() if reviews else None

        badge = item.select_one("span.s-label-popover-default")
        badge_text = badge.text.strip() if badge else None

        image = item.select_one("img.s-image")
        image_url = image["src"] if image else None

        brand = item.select_one("span.a-size-base-plus.a-color-base")
        brand_name = brand.text.strip() if brand else None

        delivery = item.select_one("span.a-color-base.a-text-bold")
        delivery_info = delivery.text.strip() if delivery else None

        discount = item.select_one("span.a-letter-space + span.a-size-base.a-color-secondary")
        discount_text = discount.text.strip() if discount else None

        product = {
            "Category": category,
            "Title": title,
            "Brand": brand_name,
            "Price (INR)": price,
            "Rating": rating,
            "Review Count": reviews,
            "Product Badge": badge_text,
            "Product URL": link,
            "Image URL": image_url,
            "Delivery Info": delivery_info,
            "Discount": discount_text
        }
        product_list.append(product)

    return product_list

# ---------- MAIN SCRAPER LOOP ----------
def scrape_amazon(queries, max_pages=1):
    all_products = []

    for query in queries:
        print(f"\nScraping category: {query}")
        page = 1

        while page <= max_pages:
            search_url = f"{BASE_URL}/s?k={quote_plus(query)}&page={page}"
            products = scrape_page(search_url, query)

            if not products:
                print("No products found or blocked. Stopping.")
                break

            all_products.extend(products)

            time.sleep(random.uniform(2, 5))
            page += 1

    return all_products

# ---------- SAVE RESULTS ----------
def save_to_csv(products, filename):
    df = pd.DataFrame(products)
    df.to_csv(filename, index=False)
    print(f"Saved {len(products)} products to {filename}")

# ---------- RUNNING THE SCRAPER ----------
if __name__ == "__main__":
    print("Scraping Amazon.in for multiple categories...\n")
    data = scrape_amazon(SEARCH_QUERIES, MAX_PAGES)
    save_to_csv(data, OUTPUT_FILE)

Scraping Amazon.in for multiple categories...


Scraping category: laptop
Scraping: https://www.amazon.in/s?k=laptop&page=1
Scraping: https://www.amazon.in/s?k=laptop&page=2
Scraping: https://www.amazon.in/s?k=laptop&page=3

Scraping category: smartphone
Scraping: https://www.amazon.in/s?k=smartphone&page=1
Scraping: https://www.amazon.in/s?k=smartphone&page=2
Scraping: https://www.amazon.in/s?k=smartphone&page=3

Scraping category: headphones
Scraping: https://www.amazon.in/s?k=headphones&page=1
Scraping: https://www.amazon.in/s?k=headphones&page=2
Scraping: https://www.amazon.in/s?k=headphones&page=3

Scraping category: books
Scraping: https://www.amazon.in/s?k=books&page=1
Scraping: https://www.amazon.in/s?k=books&page=2
Scraping: https://www.amazon.in/s?k=books&page=3
Saved 234 products to amazon_products_multi_category.csv
