In [1]:
import pandas as pd
import numpy as np
import re

In [2]:
data=pd.read_csv('Notebookcheck_last_3_months_data.csv')
data.head(2)

Unnamed: 0,Product_name,Processor,Graphics adapter,Memory,Display,Storage,Size,Battery,Camera,Weight,Price,URL_link
0,Lenovo Yoga 6 13 82ND0009US (Yoga 6 13 Series),"AMD Ryzen 5 5500U 6 x 2.1 - 4 GHz, 21 W PL2 / ...","AMD Radeon RX Vega 7 - 512 MB, Memory: 1333 MH...","8192 MB , 1600 MHz, 22-22-22-52, Dual-Channel...","13.30 inch 16:9, 1920 x 1080 pixel 166 PPI, 10...","WDC PC SN530 SDBPMPZ-265G, 256 GB",height x width x depth (in mm): 18.2 x 308 x 2...,60 Wh Lithium-Polymer,Webcam: 720pPrimary Camera: 0.9 MPix,"1.331 kg ( = 46.95 oz / 2.93 pounds), Power Su...",800 USD,https://www.notebookcheck.net/Lenovo-Yoga-6-13...
1,Asus ZenBook Flip 13 UX363EA-HP069T (ZenBook F...,"Intel Core i7-1165G7 4 x 2.8 - 4.7 GHz, 51 W P...","Intel Iris Xe Graphics G7 96EUs, Memory: 1300 ...","16384 MB , LPDDR4-4266, soldered","13.30 inch 16:9, 1920 x 1080 pixel 166 PPI, mu...","WDC PC SN730 SDBPNTY-1T00, 1024 GB , 950 GB free",height x width x depth (in mm): 13 x 305 x 211...,"67 Wh, 4220 mAh Lithium-Ion",Webcam: HDPrimary Camera: 0.9 MPixSecondary Ca...,"1.188 kg ( = 41.91 oz / 2.62 pounds), Power Su...","1,799 EUR",https://www.notebookcheck.net/Asus-ZenBook-Fli...


In [3]:
# Functions for cleaning each column data
def clean_processor(text):
    proc_details = text.split(',')
    if len(proc_details) >= 3:
        processor = proc_details[0]
        pl2 = re.search(r'([0-9]+)\sW',proc_details[1]).group(1)
        pl1 = re.search(r'([0-9]+)\sW',proc_details[2]).group(1)
    else:
        processor = proc_details[0]
        pl2 = 'NA'
        pl1 = 'NA'       
        
    return [processor,pl2,pl1]

def clean_memory(text):
    mem_details = text.split(',')
    memory = re.search(r'([0-9]+)\sMB',mem_details[0]).group(1)
    memory = int(memory)//1000
    return memory

def clean_display(text):
    disp_details = text.split(',')
    display,asp_ratio = re.search(r'([0-9\.]+)\sinch\s([0-9:]+)',disp_details[0]).groups()
    return [display,asp_ratio]   

def clean_storage(text):
    storage_details = text.split(',')
    storage = re.search(r'([0-9]+)\sGB',storage_details[1]).group(1)
    return storage

def clean_size(text):
    size_details = text.split(':')
    height,width,depth = re.search(r'\=\s([0-9.]+)\sx\s([0-9.]+)\sx\s([0-9.]+)\sin',size_details[1]).groups()
    return height,width,depth

def clean_battery(text):
    battery = re.search(r'([0-9.]+)\sWh',text).group(1)
    return battery 

def clean_camera(text):
    if type(text) != str:
        return 'NA'
    
    cam = re.search(r'([0-9.]+)\sMP',text)
    if cam:
        return cam.group(1) 
    else:
        return 'NA'

def clean_weight(text):
    weight = re.search(r'([0-9.]+)\skg',text)
    if weight:
        return weight.group(1)
    else:
        weight = re.search(r'([0-9]+)\sg',text)
        return int(weight.group(1))/1000
    
def clean_price(text):
    if type(text) != str:
        return 'NA'
    
    price,currency = text.split(' ')
    if not price.isnumeric():
        price = re.sub(r'[,.]','',price)
        
    if currency.lower() == 'usd':
        conv_price = 75 * int(price)
    elif currency.lower().startswith('eur'):
        conv_price = 87 * int(price)
        
    return conv_price

In [4]:
# clean_processor
data[['Processor','PL2(in W)','PL1(in W)']] = data.Processor.apply(lambda x: pd.Series(clean_processor(x)))

# Brand_name 
data['Brand_name']=data.Product_name.apply(lambda x: pd.Series(str(x).split(' ')))[0]

# clean_memory 
data['Memory(in GB)']= data.Memory.apply(lambda x: pd.Series(clean_memory(x)))

# clean_display
data[['Display(in inches)','Aspect_ratio']] = data['Display'].apply(lambda x: pd.Series(clean_display(x)))

# Graphics adapter
data['Graphics adapter']=data['Graphics adapter'].apply(lambda x: pd.Series(str(x).split(',')))[0]

# clean_storage
data['Storage(in GB)']= data['Storage'].apply(lambda x: pd.Series(clean_storage(x)))

# clean_size
data[['Height(in inches)','Width(in inches)','Depth(in inches)']] = data['Size'].apply(lambda x: pd.Series(clean_size(x)))

# clean_battery
data['Battery(in Wh)']= data['Battery'].apply(lambda x: pd.Series(clean_battery(x)))

# clean_camera
data['Camera(in MP)']= data['Camera'].apply(lambda x: pd.Series(clean_camera(x)))

# clean_weight
data['Weight(in kg)']= data['Weight'].apply(lambda x: pd.Series(clean_weight(x)))

# clean_price
data['Price(in INR)']= data['Price'].apply(lambda x: pd.Series(clean_price(x)))

In [5]:
# drop cleaned columns
data.drop(['Memory','Display','Storage','Size','Battery','Camera','Weight','Price'],1,inplace=True)

In [6]:
# Align the columns based on the information
data=data[['Brand_name','Product_name','Processor','Graphics adapter','PL1(in W)','PL2(in W)','Memory(in GB)','Storage(in GB)',
           'Display(in inches)','Aspect_ratio','Battery(in Wh)','Weight(in kg)','Camera(in MP)','Height(in inches)',
           'Width(in inches)','Depth(in inches)','Price(in INR)','URL_link']]

In [7]:
data.head(3)

Unnamed: 0,Brand_name,Product_name,Processor,Graphics adapter,PL1(in W),PL2(in W),Memory(in GB),Storage(in GB),Display(in inches),Aspect_ratio,Battery(in Wh),Weight(in kg),Camera(in MP),Height(in inches),Width(in inches),Depth(in inches),Price(in INR),URL_link
0,Lenovo,Lenovo Yoga 6 13 82ND0009US (Yoga 6 13 Series),AMD Ryzen 5 5500U 6 x 2.1 - 4 GHz,AMD Radeon RX Vega 7 - 512 MB,18,21,8,256,13.3,16:9,60,1.331,0.9,0.72,12.13,8.13,60000,https://www.notebookcheck.net/Lenovo-Yoga-6-13...
1,Asus,Asus ZenBook Flip 13 UX363EA-HP069T (ZenBook F...,Intel Core i7-1165G7 4 x 2.8 - 4.7 GHz,Intel Iris Xe Graphics G7 96EUs,15,51,16,1024,13.3,16:9,67,1.188,0.9,0.51,12.01,8.31,156513,https://www.notebookcheck.net/Asus-ZenBook-Fli...
2,Medion,Medion Erazer Beast X20 (Erazer Series),Intel Core i7-10870H 8 x 2.2 - 5 GHz,NVIDIA GeForce RTX 3070 Laptop GPU - 8192 MB,60,60,32,2048,17.3,16:9,91,2.246,0.9,0.91,15.55,10.31,200013,https://www.notebookcheck.net/Medion-Erazer-Be...


In [9]:
# Sorting dataframe with some features
data.sort_values(['Weight(in kg)', 'Price(in INR)','Storage(in GB)', 'Memory(in GB)','Battery(in Wh)', 'PL1(in W)'], 
                  ascending=[True, True, False, False, False, False],inplace=True)

In [11]:
# Top 10 efficient laptops
data.head(10)

Unnamed: 0,Brand_name,Product_name,Processor,Graphics adapter,PL1(in W),PL2(in W),Memory(in GB),Storage(in GB),Display(in inches),Aspect_ratio,Battery(in Wh),Weight(in kg),Camera(in MP),Height(in inches),Width(in inches),Depth(in inches),Price(in INR),URL_link
27,Dell,"Dell Latitude 13 7320 Detachable, i7-1180G7 (L...",Intel Core i7-1180G7 4 x 2.2 - 4.6 GHz,Intel Iris Xe Graphics G7 96EUs,23,40,16,256,13.0,3:2,40,0.782,8.0,0.33,11.35,8.19,157500.0,https://www.notebookcheck.net/Dell-Latitude-13...
40,LG,LG Gram 14Z90P-G.AA79G (Gram Series),Intel Core i7-1165G7 4 x 2.8 - 4.7 GHz,Intel Iris Xe Graphics G7 96EUs - 1024 MB,18,52,16,1024,14.0,16:10,72,0.967,0.9,0.66,12.34,8.47,139113.0,https://www.notebookcheck.net/LG-Gram-14Z90P-i...
46,Huawei,Huawei MateBook X Silver Frost (2020) (MateBoo...,Intel Core i5-10210U 4 x 1.6 - 4.2 GHz,Intel UHD Graphics 620 - 1024 MB,7,30,16,512,13.0,3:2,42,1.009,0.9,0.54,11.18,8.11,130413.0,https://www.notebookcheck.net/Huawei-MateBook-...
24,Schenker,Schenker Vision 14,Intel Core i7-1165G7 4 x 2.8 - 4.7 GHz,Intel Iris Xe Graphics G7 96EUs,30,60,16,1024,14.0,16:10,53,1.028,0.9,0.61,12.16,8.46,143550.0,https://www.notebookcheck.net/Schenker-Vision-...
73,Lenovo,Lenovo ThinkPad X12 20UVS03G00,Intel Core i7-1160G7 4 x 2.1 - 4.4 GHz,Intel Iris Xe Graphics G7 96EUs,12,40,16,512,12.3,3:2,42,1.107,,0.57,11.15,8.01,141810.0,https://www.notebookcheck.net/Lenovo-ThinkPad-...
55,Lenovo,Lenovo ThinkPad X1 Carbon G9-20XWCTO1WW (Think...,Intel Core i7-1165G7 4 x 2.8 - 4.7 GHz,Intel Iris Xe Graphics G7 96EUs,28,36,32,1024,14.0,16:10,57,1.156,,0.59,12.4,8.72,219240.0,https://www.notebookcheck.net/Lenovo-ThinkPad-...
58,Asus,Asus ZenBook 13 UM325S (ZenBook 13 UM325 Series),AMD Ryzen 7 5800U 8 x 1.9 - 4.4 GHz,AMD Radeon RX Vega 8 (Ryzen 5000),25,30,16,1024,13.3,16:9,67,1.158,0.9,0.55,11.97,7.99,,https://www.notebookcheck.net/Asus-ZenBook-13-...
31,HP,HP Elite Dragonfly Max (Elite Dragonfly Series),Intel Core i7-1185G7 4 x 3 - 4.8 GHz,Intel Iris Xe Graphics G7 96EUs,18,35,16,512,13.3,16:9,56,1.164,5.0,0.63,11.98,7.78,183750.0,https://www.notebookcheck.net/HP-EliteBook-Dra...
50,Lenovo,Lenovo ThinkPad X1 Carbon G9-20XXS00100 (Think...,Intel Core i7-1165G7 4 x 2.8 - 4.7 GHz,Intel Iris Xe Graphics G7 96EUs,28,36,32,1024,14.0,16:10,57,1.175,0.9,0.59,12.38,8.72,146073.0,https://www.notebookcheck.net/Lenovo-ThinkPad-...
1,Asus,Asus ZenBook Flip 13 UX363EA-HP069T (ZenBook F...,Intel Core i7-1165G7 4 x 2.8 - 4.7 GHz,Intel Iris Xe Graphics G7 96EUs,15,51,16,1024,13.3,16:9,67,1.188,0.9,0.51,12.01,8.31,156513.0,https://www.notebookcheck.net/Asus-ZenBook-Fli...
