In [2]:
import pandas as pd

# Path to the dataset
file_path = r'C:\Users\megha\Desktop\data_analysis\amazon.csv'

# Load the dataset
data = pd.read_csv(file_path)

# Display the first few rows of the dataframe to understand its structure
print("First few rows of the dataset:")
print(data.head())

# Display the summary information of the dataframe
print("\nSummary information of the dataset:")
print(data.info())

# Remove currency symbols and non-numeric characters, then convert prices to numeric values
data['discounted_price'] = data['discounted_price'].str.replace('₹', '').str.replace(',', '').str.extract('(\d+.\d+|\d+)').astype(float)
data['actual_price'] = data['actual_price'].str.replace('₹', '').str.replace(',', '').str.extract('(\d+.\d+|\d+)').astype(float)
data['discount_percentage'] = data['discount_percentage'].str.replace('%', '').str.extract('(\d+.\d+|\d+)').astype(float)
data['rating'] = data['rating'].str.extract('(\d+.\d+|\d+)').astype(float)
data['rating_count'] = data['rating_count'].str.replace(',', '').str.extract('(\d+.\d+|\d+)').astype(float)

# Check for missing values
print("\nMissing values in the dataset:")
print(data.isnull().sum())

# Fill missing values or drop rows with missing values based on the context
data = data.dropna()

# Verify that there are no more missing values
print("\nMissing values after cleaning:")
print(data.isnull().sum())

# Example analysis: Top 5 products by rating
top_rated_products = data.sort_values(by='rating', ascending=False).head(5)
print("\nTop 5 products by rating:")
print(top_rated_products[['product_name', 'rating', 'rating_count', 'discounted_price', 'actual_price', 'discount_percentage']])

# Example analysis: Top 5 products by discount percentage
top_discounted_products = data.sort_values(by='discount_percentage', ascending=False).head(5)
print("\nTop 5 products by discount percentage:")
print(top_discounted_products[['product_name', 'discount_percentage', 'rating', 'rating_count', 'discounted_price', 'actual_price']])


First few rows of the dataset:
   product_id                                       product_name  \
0  B07JW9H4J1  Wayona Nylon Braided USB to Lightning Fast Cha...   
1  B098NS6PVG  Ambrane Unbreakable 60W / 3A Fast Charging 1.5...   
2  B096MSW6CT  Sounce Fast Phone Charging Cable & Data Sync U...   
3  B08HDJ86NZ  boAt Deuce USB 300 2 in 1 Type-C & Micro USB S...   
4  B08CF3B7N1  Portronics Konnect L 1.2M Fast Charging 3A 8 P...   

                                            category discounted_price  \
0  Computers&Accessories|Accessories&Peripherals|...             ₹399   
1  Computers&Accessories|Accessories&Peripherals|...             ₹199   
2  Computers&Accessories|Accessories&Peripherals|...             ₹199   
3  Computers&Accessories|Accessories&Peripherals|...             ₹329   
4  Computers&Accessories|Accessories&Peripherals|...             ₹154   

  actual_price discount_percentage rating rating_count  \
0       ₹1,099                 64%    4.2       24,269   
1    