In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

print("Libraries imported!")

Libraries imported!


In [2]:
# The URL of the product page you want to scrape.
# I'll use an example. Replace this with a real Amazon/Flipkart product URL.
URL = "https://www.amazon.in/Daikin-Inverter-Copper-Filter-MTKL35UV16/dp/B0BK28T4BZ/ref=sr_1_1_sspa?_encoding=UTF8&content-id=amzn1.sym.58c90a12-100b-4a2f-8e15-7c06f1abe2be&dib=eyJ2IjoiMSJ9.LpujZ4uISPUK8sa_6yNGVTLp2_seTR9samDUOPD7O27De5gR4wiXFNOAqexYtHRw8BdOCXbWsVVka54tE7wmzp0520p0LoTi57Xz1ZJK4iJqZyvF_DFiZoxTbLXluGeZbwBd-bRA70uiMd1aiZA1bMScj_EYRNMaRxI9X7U8eIXmYDJcuMWS9TZXf7LF06t5w8TV8MjG9Gx61gCSsen4zLmn6Uqtjjh9DpJ_MtQyX5vWyMjR35BjFcnb1laMqpqL8ij8onewzvqpv708KrDnLsA3KTUKs64RehFc1aQKJJE.NYIux8ldIlgBzvvXbUae9OYOeLvxAqeS-ydyKdC5mlc&dib_tag=se&pd_rd_r=d18d4de4-e171-46b6-b9f7-81df7001286f&pd_rd_w=atRXp&pd_rd_wg=m4RXC&qid=1761529367&refinements=p_85%3A10440599031&rps=1&s=kitchen&sr=1-1-spons&sp_csd=d2lkZ2V0TmFtZT1zcF9hdGZfYnJvd3Nl&th=1" 

# CRITICAL: This header makes your request look like it's from a real browser.
# Without it, Amazon will block you.
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'
}

# Now, let's try to "get" the webpage
webpage = requests.get(URL, headers=HEADERS)

# Print the status code. 200 means success!
# 503 or 403 means you were blocked.
print(webpage.status_code)

200


In [3]:
# Create the "soup" object
# webpage.content is the raw HTML we downloaded
soup = BeautifulSoup(webpage.content, "html.parser")

# Let's see the title of the page to make sure it worked
print(soup.title.string)

Daikin 1 Ton 3 Star Inverter Split AC (Copper, PM 2.5 Filter, 2024 Model, MTKL35UV16, White) : Amazon.in: Home & Kitchen


In [10]:
# In Cell 4
reviews = soup.find_all('div', {'class': 'a-row'})

# --- OR whatever your class was ---
# reviews = soup.find_all('div', {'class': 'review-container'}) 

# Now check the count again
print(f"Found {len(reviews)} reviews on this page.")

Found 247 reviews on this page.


In [11]:
# Create empty lists to hold our data
all_ratings = []
all_reviews = []

# Loop through each review container
for review in reviews:
    
    # 1. Get the rating
    # We use a "try...except" block because sometimes a review is missing a rating
    try:
        # Find the star rating element
        rating_element = review.find('i', {'data-hook': 'review-star-rating'})
        # Get the text from its "span" child
        rating_text = rating_element.find('span', {'class': 'a-icon-alt'}).get_text(strip=True)
        # The text is "4.0 out of 5 stars", so we split it and take the first part
        rating = float(rating_text.split(' ')[0])
    except Exception as e:
        rating = None # If it fails, just store "None"
        
    # 2. Get the review text
    try:
        # Find the review text element
        review_text_element = review.find('span', {'data-hook': 'review-body'})
        # Get the actual text
        review_text = review_text_element.get_text(strip=True)
    except Exception as e:
        review_text = None # If it fails, store "None"
        
    # 3. Add our findings to our lists
    all_ratings.append(rating)
    all_reviews.append(review_text)

# Let's check our work
print(f"Scraped {len(all_ratings)} ratings.")
print(f"Scraped {len(all_reviews)} review texts.")

Scraped 247 ratings.
Scraped 247 review texts.


In [12]:
# Create a pandas DataFrame (a table)
df = pd.DataFrame({
    'rating': all_ratings,
    'review_text': all_reviews
})

# Clean up: Drop any rows where we failed to scrape either the rating or the text
df = df.dropna()

# Display the first 5 rows of our table
print(df.head())

# Save the table to a CSV file in your project folder
df.to_csv('reviews.csv', index=False)

print("\nSuccessfully saved data to reviews.csv!")

     rating                                        review_text
174     5.0  Very good ac, its new and its cooling is excel...
178     5.0  Very good ac, its new and its cooling is excel...
180     5.0  Very good ac, its new and its cooling is excel...
189     1.0  Delivered on time and installation was done sa...
197     4.0  Even though I purchased the product online the...

Successfully saved data to reviews.csv!
