### Phase 2 : Exploratory Data Analysis (EDA)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('amazon_laptops.csv')
df

Unnamed: 0,Title,Price,Rating,Reviews,Link
0,"Samsung Galaxy Book4 (Gray, 16GB RAM, 512GB SS...",53990.0,4.1,120,https://www.amazon.in/sspa/click?ie=UTF8&spc=M...
1,"Samsung Galaxy Book4 (Gray, 16GB RAM, 512GB SS...",62990.0,3.8,21,https://www.amazon.in/sspa/click?ie=UTF8&spc=M...
2,"HP Victus, AMD Ryzen 9-8945HS, 8GB RTX 4060, 1...",99990.0,3.8,343,https://www.amazon.in/HP-9-8945HS-Upgrade-300n...
3,"ASUS TUF Gaming A15, AMD Ryzen 7 7435HS Gaming...",64990.0,4.2,608,https://www.amazon.in/ASUS-15-6-inch-GeForce-G...
4,"ASUS TUF A16 (2025), AMD Ryzen 7 7445HS, Gamin...",89990.0,4.0,57,https://www.amazon.in/ASUS-7445HS-4050-6GB-Win...
...,...,...,...,...,...
421,"ASUS TUF Gaming A14, AMD Ryzen 7 8845HS Gaming...",114990.0,3.2,5,https://www.amazon.in/ASUS-Processor-GeForce-W...
422,"MSI Thin 15, Intel 13th Gen. Core i5-13420H, 4...",59990.0,4.1,69,https://www.amazon.in/sspa/click?ie=UTF8&spc=M...
423,"MSI Thin 15, Intel 12th Gen. i7-12650H, 40CM F...",74953.0,4.0,11,https://www.amazon.in/sspa/click?ie=UTF8&spc=M...
424,"ASUS Gaming V16 (2025) 14th Gen,Intel Core 5 2...",77790.0,4.0,47,https://www.amazon.in/sspa/click?ie=UTF8&spc=M...


In [3]:
df.info()
df.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 426 entries, 0 to 425
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Title    426 non-null    object 
 1   Price    405 non-null    float64
 2   Rating   350 non-null    float64
 3   Reviews  350 non-null    object 
 4   Link     426 non-null    object 
dtypes: float64(2), object(3)
memory usage: 16.8+ KB


Unnamed: 0,Price,Rating
count,405.0,350.0
mean,110715.02963,3.996286
std,83724.499657,0.57733
min,22995.0,1.0
25%,67990.0,3.8
50%,77790.0,4.0
75%,115020.0,4.2
max,617990.0,5.0


In [4]:
df.isnull().sum()

Title       0
Price      21
Rating     76
Reviews    76
Link        0
dtype: int64

In [7]:
df_clean = df.dropna(subset=['Price']).copy()
df_clean['Rating'] = df_clean['Rating'].fillna(0)
df_clean['Reviews'] = df_clean['Reviews'].fillna(0)

In [9]:
df_clean.isnull().sum()

Title      0
Price      0
Rating     0
Reviews    0
Link       0
dtype: int64

In [6]:
df.shape

(426, 5)

In [10]:
def clean_reviews(value):
    if pd.isna(value):
        return 0
    value = str(value).replace(",", "") 
    if "K" in value.upper():
        return int(float(value.upper().replace("K", "")) * 1000)
    try:
        return int(float(value)) 
    except:
        return 0

df_clean['Reviews'] = df_clean['Reviews'].apply(clean_reviews)

In [11]:
df_clean['Price'] = pd.to_numeric(df_clean['Price'].astype(str).str.replace(",", ""), errors='coerce').fillna(0).astype(int)
df_clean['Rating'] = pd.to_numeric(df_clean['Rating'], errors='coerce').fillna(0.0)
print(df_clean.dtypes)

Title       object
Price        int64
Rating     float64
Reviews      int64
Link        object
dtype: object


In [12]:
df_clean['Brand'] = df_clean['Title'].apply(lambda x: x.split()[0].upper())
df_clean

Unnamed: 0,Title,Price,Rating,Reviews,Link,Brand
0,"Samsung Galaxy Book4 (Gray, 16GB RAM, 512GB SS...",53990,4.1,120,https://www.amazon.in/sspa/click?ie=UTF8&spc=M...,SAMSUNG
1,"Samsung Galaxy Book4 (Gray, 16GB RAM, 512GB SS...",62990,3.8,21,https://www.amazon.in/sspa/click?ie=UTF8&spc=M...,SAMSUNG
2,"HP Victus, AMD Ryzen 9-8945HS, 8GB RTX 4060, 1...",99990,3.8,343,https://www.amazon.in/HP-9-8945HS-Upgrade-300n...,HP
3,"ASUS TUF Gaming A15, AMD Ryzen 7 7435HS Gaming...",64990,4.2,608,https://www.amazon.in/ASUS-15-6-inch-GeForce-G...,ASUS
4,"ASUS TUF A16 (2025), AMD Ryzen 7 7445HS, Gamin...",89990,4.0,57,https://www.amazon.in/ASUS-7445HS-4050-6GB-Win...,ASUS
...,...,...,...,...,...,...
421,"ASUS TUF Gaming A14, AMD Ryzen 7 8845HS Gaming...",114990,3.2,5,https://www.amazon.in/ASUS-Processor-GeForce-W...,ASUS
422,"MSI Thin 15, Intel 13th Gen. Core i5-13420H, 4...",59990,4.1,69,https://www.amazon.in/sspa/click?ie=UTF8&spc=M...,MSI
423,"MSI Thin 15, Intel 12th Gen. i7-12650H, 40CM F...",74953,4.0,11,https://www.amazon.in/sspa/click?ie=UTF8&spc=M...,MSI
424,"ASUS Gaming V16 (2025) 14th Gen,Intel Core 5 2...",77790,4.0,47,https://www.amazon.in/sspa/click?ie=UTF8&spc=M...,ASUS


In [17]:
df_clean.to_csv("amazon_laptops_cleaned.csv", index=False)