In [1]:
import pandas as pd
import numpy as np

In [2]:
filepath = 'laptops.csv'
df = pd.read_csv(filepath)

In [3]:
df

Unnamed: 0,title,price,rating
0,"HP 15s, AMD Ryzen 3 5300U, 15.6 inch(39.6cm) F...",39990,
1,"Dell Vostro 3420 Laptop,12th Gen Intel Core i3...",41990,
2,"Honor MagicBook 14, AMD Ryzen 5 5500U 14-inch ...",44490,4.4 out of 5 stars
3,HP 255 G8 Laptop with AMD Athlon Silver 3050U ...,26999,
4,Lenovo IdeaPad Slim 3 Intel Core i3-1115G4 11t...,33639,
...,...,...,...
343,(Renewed) DELL Latitude 5490 Core i5 8th Gen L...,26615,
344,"ASUS Vivobook S 14 Flip (2023), AMD Ryzen 5 75...",68990,
345,HP Victus Gaming Laptop AMD Ryzen 5 5600H 15.6...,58490,4.5 out of 5 stars
346,Xiaomi NoteBook Pro 120 12th Gen Intel i5-1245...,64999,


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 348 entries, 0 to 347
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   title   348 non-null    object
 1   price   340 non-null    object
 2   rating  30 non-null     object
dtypes: object(3)
memory usage: 8.3+ KB


In [5]:
print(df.columns.tolist())

['title', 'price', 'rating']


In [6]:
def clean_price_column(price):
    if isinstance(price,str):             #check if price is string
        price = price.replace(',', '')    # removes comma
        price = price.strip()             #removes white space
        if price.isnumeric():
            price = int(price)
            return price
        if len(price) == 0:
            return np.nan
    return price

In [7]:
clean_price_column('239807.890')

'239807.890'

In [8]:
df['price'].apply(clean_price_column)

0      39990.0
1      41990.0
2      44490.0
3      26999.0
4      33639.0
        ...   
343    26615.0
344    68990.0
345    58490.0
346    64999.0
347    56990.0
Name: price, Length: 348, dtype: float64

In [9]:
df['cleaned price'] = df['price'].apply(clean_price_column)

In [10]:
df

Unnamed: 0,title,price,rating,cleaned price
0,"HP 15s, AMD Ryzen 3 5300U, 15.6 inch(39.6cm) F...",39990,,39990.0
1,"Dell Vostro 3420 Laptop,12th Gen Intel Core i3...",41990,,41990.0
2,"Honor MagicBook 14, AMD Ryzen 5 5500U 14-inch ...",44490,4.4 out of 5 stars,44490.0
3,HP 255 G8 Laptop with AMD Athlon Silver 3050U ...,26999,,26999.0
4,Lenovo IdeaPad Slim 3 Intel Core i3-1115G4 11t...,33639,,33639.0
...,...,...,...,...
343,(Renewed) DELL Latitude 5490 Core i5 8th Gen L...,26615,,26615.0
344,"ASUS Vivobook S 14 Flip (2023), AMD Ryzen 5 75...",68990,,68990.0
345,HP Victus Gaming Laptop AMD Ryzen 5 5600H 15.6...,58490,4.5 out of 5 stars,58490.0
346,Xiaomi NoteBook Pro 120 12th Gen Intel i5-1245...,64999,,64999.0


In [11]:
def cleaned_rating_column(rating):
    if isinstance(rating,str):
        rating = rating.split()[0]
        return float(rating)
    return rating

In [12]:
df['cleaned_rating'] = df['rating'].apply(cleaned_rating_column)
df

Unnamed: 0,title,price,rating,cleaned price,cleaned_rating
0,"HP 15s, AMD Ryzen 3 5300U, 15.6 inch(39.6cm) F...",39990,,39990.0,
1,"Dell Vostro 3420 Laptop,12th Gen Intel Core i3...",41990,,41990.0,
2,"Honor MagicBook 14, AMD Ryzen 5 5500U 14-inch ...",44490,4.4 out of 5 stars,44490.0,4.4
3,HP 255 G8 Laptop with AMD Athlon Silver 3050U ...,26999,,26999.0,
4,Lenovo IdeaPad Slim 3 Intel Core i3-1115G4 11t...,33639,,33639.0,
...,...,...,...,...,...
343,(Renewed) DELL Latitude 5490 Core i5 8th Gen L...,26615,,26615.0,
344,"ASUS Vivobook S 14 Flip (2023), AMD Ryzen 5 75...",68990,,68990.0,
345,HP Victus Gaming Laptop AMD Ryzen 5 5600H 15.6...,58490,4.5 out of 5 stars,58490.0,4.5
346,Xiaomi NoteBook Pro 120 12th Gen Intel i5-1245...,64999,,64999.0,


In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 348 entries, 0 to 347
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   title           348 non-null    object 
 1   price           340 non-null    object 
 2   rating          30 non-null     object 
 3   cleaned price   340 non-null    float64
 4   cleaned_rating  30 non-null     float64
dtypes: float64(2), object(3)
memory usage: 13.7+ KB


In [14]:
df.sort_values(by='cleaned_rating',ascending=False, inplace=True)
df

Unnamed: 0,title,price,rating,cleaned price,cleaned_rating
298,"Apple 2021 MacBook Pro (16-inch/41.05 cm, M1 M...",319990,4.7 out of 5 stars,319990.0,4.7
111,"Apple 2020 MacBook Air Laptop M1 chip, 13.3-in...",84990,4.7 out of 5 stars,84990.0,4.7
273,"ASUS Vivobook 15, Intel Core i3-1220P 12th Gen...",42990,4.6 out of 5 stars,42990.0,4.6
249,"ASUS Vivobook 15, Intel Core i3-1220P 12th Gen...",42990,4.6 out of 5 stars,42990.0,4.6
237,"Lenovo ThinkBook 15 G3 Ryzen 3 15.6"" FHD Thin ...",35990,4.6 out of 5 stars,35990.0,4.6
...,...,...,...,...,...
342,"Dell Inspiron 3525 Laptop, AMD Ryzen R3-5425U,...",39990,,39990.0,
343,(Renewed) DELL Latitude 5490 Core i5 8th Gen L...,26615,,26615.0,
344,"ASUS Vivobook S 14 Flip (2023), AMD Ryzen 5 75...",68990,,68990.0,
346,Xiaomi NoteBook Pro 120 12th Gen Intel i5-1245...,64999,,64999.0,


In [15]:
df.drop(columns=['price'], inplace=True)
df

Unnamed: 0,title,rating,cleaned price,cleaned_rating
298,"Apple 2021 MacBook Pro (16-inch/41.05 cm, M1 M...",4.7 out of 5 stars,319990.0,4.7
111,"Apple 2020 MacBook Air Laptop M1 chip, 13.3-in...",4.7 out of 5 stars,84990.0,4.7
273,"ASUS Vivobook 15, Intel Core i3-1220P 12th Gen...",4.6 out of 5 stars,42990.0,4.6
249,"ASUS Vivobook 15, Intel Core i3-1220P 12th Gen...",4.6 out of 5 stars,42990.0,4.6
237,"Lenovo ThinkBook 15 G3 Ryzen 3 15.6"" FHD Thin ...",4.6 out of 5 stars,35990.0,4.6
...,...,...,...,...
342,"Dell Inspiron 3525 Laptop, AMD Ryzen R3-5425U,...",,39990.0,
343,(Renewed) DELL Latitude 5490 Core i5 8th Gen L...,,26615.0,
344,"ASUS Vivobook S 14 Flip (2023), AMD Ryzen 5 75...",,68990.0,
346,Xiaomi NoteBook Pro 120 12th Gen Intel i5-1245...,,64999.0,
