In [173]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import metrics

import tensorflow as tf
import keras
from keras import layers

In [174]:
df = pd.read_csv("laptop_cleaned2.csv")
original_df = df.copy()

df.head()

Unnamed: 0.1,Unnamed: 0,Name,Brand,Price,Rating,Processor_brand,Processor_name,Processor_variant,Processor_gen,Core_per_processor,...,Graphics_name,Graphics_brand,Graphics_GB,Graphics_integreted,Display_size_inches,Horizontal_pixel,Vertical_pixel,ppi,Touch_screen,Operating_system
0,0,HP Victus 15-fb0157AX Gaming Laptop (AMD Ryzen...,HP,50399,4.3,AMD,AMD Ryzen 5,5600H,5.0,6.0,...,AMD Radeon RX 6500M,AMD,4.0,False,15.6,1920,1080,141.21,True,Windows 11 OS
1,1,Lenovo V15 G4 ‎82YU00W7IN Laptop (AMD Ryzen 3 ...,Lenovo,26690,4.45,AMD,AMD Ryzen 3,7320U,7.0,4.0,...,AMD Radeon Graphics,AMD,,False,15.6,1920,1080,141.21,False,Windows 11 OS
2,2,HP 15s-fq5007TU Laptop (12th Gen Core i3/ 8GB/...,HP,37012,4.65,Intel,Intel Core i3,1215U,12.0,6.0,...,Intel UHD Graphics,Intel,,False,15.6,1920,1080,141.21,False,Windows 11 OS
3,3,Samsung Galaxy Book2 Pro 13 Laptop (12th Gen C...,Samsung,69990,4.75,Intel,Intel Core i5,1240P,12.0,12.0,...,Intel Iris Xe Graphics,Intel,,False,13.3,1080,1920,165.63,False,Windows 11 OS
4,4,Tecno Megabook T1 Laptop (11th Gen Core i3/ 8G...,Tecno,23990,4.25,Intel,Intel Core i3,1115G4,11.0,2.0,...,Intel UHD Graphics,Intel,,False,15.6,1920,1080,141.21,False,Windows 11 OS


In [175]:
df.isnull().sum()

Unnamed: 0                  0
Name                        0
Brand                       0
Price                       0
Rating                      0
Processor_brand             0
Processor_name              0
Processor_variant          24
Processor_gen             128
Core_per_processor         10
Total_processor           447
Execution_units           447
Low_Power_Cores             0
Energy_Efficient_Units      0
Threads                    46
RAM_GB                      0
RAM_type                   22
Storage_capacity_GB         0
Storage_type                0
Graphics_name               2
Graphics_brand              2
Graphics_GB               652
Graphics_integreted         2
Display_size_inches         0
Horizontal_pixel            0
Vertical_pixel              0
ppi                         0
Touch_screen                0
Operating_system            0
dtype: int64

In [176]:
df.shape

(1020, 29)

In [177]:
df.duplicated().sum()

np.int64(0)

I will impute missing values for core_per_processor, threads, ram_type, graphics_name, graphics_brand and graphics_integrated and will drop total_processor, Execution_units and graphics_gb due to almost half of the rows being empty for the first two and more than 60 percent missing for the last.

#### checking and simplifying Laptop Brand

In [178]:
df["Brand"].nunique()

31

In [179]:
df["Brand"].value_counts()

Brand
Lenovo       217
HP           213
Asus         157
Dell         116
MSI           97
Acer          69
Samsung       32
Apple         20
Infinix       20
Chuwi          8
Zebronics      7
Microsoft      7
LG             7
Honor          6
Xiaomi         6
Gigabyte       6
Avita          6
Ultimus        5
Wings          3
Primebook      3
Fujitsu        3
Huawei         2
AXL            2
Tecno          1
Jio            1
iBall          1
Walker         1
Colorful       1
ASUS           1
Ninkear        1
Razer          1
Name: count, dtype: int64

because I have so many brands with less than 10 entries, I will combine them into "other"

In [180]:
brand_counts = df["Brand"].value_counts()
rare_brands = brand_counts[brand_counts < 10].index  # brands with <10 rows
df["Brand"] = df["Brand"].replace(rare_brands, "Other")

In [181]:
df["Brand"].unique()

array(['HP', 'Lenovo', 'Samsung', 'Other', 'Dell', 'Asus', 'Apple',
       'Acer', 'MSI', 'Infinix'], dtype=object)

In [182]:
df["Brand"].value_counts()

Brand
Lenovo     217
HP         213
Asus       157
Dell       116
MSI         97
Other       79
Acer        69
Samsung     32
Apple       20
Infinix     20
Name: count, dtype: int64

In [183]:
df["Brand"].isna().sum()

np.int64(0)

In [184]:
df = pd.get_dummies(df, columns=["Brand"])

In [185]:
df.head()

Unnamed: 0.1,Unnamed: 0,Name,Price,Rating,Processor_brand,Processor_name,Processor_variant,Processor_gen,Core_per_processor,Total_processor,...,Brand_Acer,Brand_Apple,Brand_Asus,Brand_Dell,Brand_HP,Brand_Infinix,Brand_Lenovo,Brand_MSI,Brand_Other,Brand_Samsung
0,0,HP Victus 15-fb0157AX Gaming Laptop (AMD Ryzen...,50399,4.3,AMD,AMD Ryzen 5,5600H,5.0,6.0,,...,False,False,False,False,True,False,False,False,False,False
1,1,Lenovo V15 G4 ‎82YU00W7IN Laptop (AMD Ryzen 3 ...,26690,4.45,AMD,AMD Ryzen 3,7320U,7.0,4.0,,...,False,False,False,False,False,False,True,False,False,False
2,2,HP 15s-fq5007TU Laptop (12th Gen Core i3/ 8GB/...,37012,4.65,Intel,Intel Core i3,1215U,12.0,6.0,2.0,...,False,False,False,False,True,False,False,False,False,False
3,3,Samsung Galaxy Book2 Pro 13 Laptop (12th Gen C...,69990,4.75,Intel,Intel Core i5,1240P,12.0,12.0,4.0,...,False,False,False,False,False,False,False,False,False,True
4,4,Tecno Megabook T1 Laptop (11th Gen Core i3/ 8G...,23990,4.25,Intel,Intel Core i3,1115G4,11.0,2.0,,...,False,False,False,False,False,False,False,False,True,False


#### Changing price unit to EUR

The price is in Indian Rupees and I am changing it to Euros 

In [186]:
inr_to_eur = 0.01
df["Price"] = df["Price"] * inr_to_eur

In [187]:
df.head()

Unnamed: 0.1,Unnamed: 0,Name,Price,Rating,Processor_brand,Processor_name,Processor_variant,Processor_gen,Core_per_processor,Total_processor,...,Brand_Acer,Brand_Apple,Brand_Asus,Brand_Dell,Brand_HP,Brand_Infinix,Brand_Lenovo,Brand_MSI,Brand_Other,Brand_Samsung
0,0,HP Victus 15-fb0157AX Gaming Laptop (AMD Ryzen...,503.99,4.3,AMD,AMD Ryzen 5,5600H,5.0,6.0,,...,False,False,False,False,True,False,False,False,False,False
1,1,Lenovo V15 G4 ‎82YU00W7IN Laptop (AMD Ryzen 3 ...,266.9,4.45,AMD,AMD Ryzen 3,7320U,7.0,4.0,,...,False,False,False,False,False,False,True,False,False,False
2,2,HP 15s-fq5007TU Laptop (12th Gen Core i3/ 8GB/...,370.12,4.65,Intel,Intel Core i3,1215U,12.0,6.0,2.0,...,False,False,False,False,True,False,False,False,False,False
3,3,Samsung Galaxy Book2 Pro 13 Laptop (12th Gen C...,699.9,4.75,Intel,Intel Core i5,1240P,12.0,12.0,4.0,...,False,False,False,False,False,False,False,False,False,True
4,4,Tecno Megabook T1 Laptop (11th Gen Core i3/ 8G...,239.9,4.25,Intel,Intel Core i3,1115G4,11.0,2.0,,...,False,False,False,False,False,False,False,False,True,False


#### Checking Processor brands

In [188]:
df["Processor_brand"].nunique()

7

In [189]:
df["Processor_brand"].value_counts()

Processor_brand
Intel        742
AMD          250
Apple         18
MediaTek       7
Qualcomm       1
Microsoft      1
HiSilicon      1
Name: count, dtype: int64

In [190]:
processor_brand_counts = df["Processor_brand"].value_counts()
rare_processor_brands = processor_brand_counts[processor_brand_counts < 10].index  # brands with <10 rows
df["Processor_brand"] = df["Processor_brand"].replace(rare_processor_brands, "Other")

In [191]:
df = pd.get_dummies(df, columns=["Processor_brand"])

In [192]:
df.head()

Unnamed: 0.1,Unnamed: 0,Name,Price,Rating,Processor_name,Processor_variant,Processor_gen,Core_per_processor,Total_processor,Execution_units,...,Brand_HP,Brand_Infinix,Brand_Lenovo,Brand_MSI,Brand_Other,Brand_Samsung,Processor_brand_AMD,Processor_brand_Apple,Processor_brand_Intel,Processor_brand_Other
0,0,HP Victus 15-fb0157AX Gaming Laptop (AMD Ryzen...,503.99,4.3,AMD Ryzen 5,5600H,5.0,6.0,,,...,True,False,False,False,False,False,True,False,False,False
1,1,Lenovo V15 G4 ‎82YU00W7IN Laptop (AMD Ryzen 3 ...,266.9,4.45,AMD Ryzen 3,7320U,7.0,4.0,,,...,False,False,True,False,False,False,True,False,False,False
2,2,HP 15s-fq5007TU Laptop (12th Gen Core i3/ 8GB/...,370.12,4.65,Intel Core i3,1215U,12.0,6.0,2.0,4.0,...,True,False,False,False,False,False,False,False,True,False
3,3,Samsung Galaxy Book2 Pro 13 Laptop (12th Gen C...,699.9,4.75,Intel Core i5,1240P,12.0,12.0,4.0,8.0,...,False,False,False,False,False,True,False,False,True,False
4,4,Tecno Megabook T1 Laptop (11th Gen Core i3/ 8G...,239.9,4.25,Intel Core i3,1115G4,11.0,2.0,,,...,False,False,False,False,True,False,False,False,True,False


I combined brands with less than 10 entries into "others"

#### Checking and simplifying "Processor_name" and "Processor_variant"

In [193]:
df["Processor_name"].nunique()

36

In [194]:
df["Processor_name"].value_counts()

Processor_name
Intel Core i5                   322
Intel Core i7                   159
AMD Ryzen 5                     119
Intel Core i3                   114
AMD Ryzen 7                      87
Intel Core i9                    49
Intel Core Ultra                 44
Intel Celeron                    36
AMD Ryzen 3                      31
Apple M3                          7
AMD Ryzen 9                       7
Intel Core 5                      4
AMD Athlon                        4
Apple M2                          3
Apple M3 Pro                      3
Intel Core 7                      3
MediaTek Kompanio                 3
MediaTek                          3
Apple M2 Apple M2 Chip            2
Apple M3 Max                      2
Intel Pentium Silver              2
Intel                             2
Apple M1                          1
eration Intel Core                1
Intel Atom Quad                   1
Intel Core 3                      1
Qualcomm X Elite                  1
AMD Athlon Si

In [195]:
df["Processor_variant"].nunique()

124

In [196]:
df["Processor_variant"].value_counts()

Processor_variant
1235U      60
1215U      50
12450H     49
1335U      38
1115G4     33
           ..
13900HK     1
7845HX      1
N4120       1
1260P       1
5500u       1
Name: count, Length: 124, dtype: int64

I couldn't figure out how to convert the processor name, variant and generation into numerical values without making it too complex since Intel, AMD and Apple don't have comparable generations and variants, so I decided to drop them since there are also 24 and 128 values missing respectively. The remaining names I decided to One-hot encode, with ones with less than 10 values being combined into "others"

In [197]:
proc_counts = df["Processor_name"].value_counts()
rare_procs = proc_counts[proc_counts < 10].index
df["Processor_name"] = df["Processor_name"].replace(rare_procs, "Other")
df = pd.get_dummies(df, columns=["Processor_name"])

In [198]:
df.head()

Unnamed: 0.1,Unnamed: 0,Name,Price,Rating,Processor_variant,Processor_gen,Core_per_processor,Total_processor,Execution_units,Low_Power_Cores,...,Processor_name_AMD Ryzen 3,Processor_name_AMD Ryzen 5,Processor_name_AMD Ryzen 7,Processor_name_Intel Celeron,Processor_name_Intel Core Ultra,Processor_name_Intel Core i3,Processor_name_Intel Core i5,Processor_name_Intel Core i7,Processor_name_Intel Core i9,Processor_name_Other
0,0,HP Victus 15-fb0157AX Gaming Laptop (AMD Ryzen...,503.99,4.3,5600H,5.0,6.0,,,0.0,...,False,True,False,False,False,False,False,False,False,False
1,1,Lenovo V15 G4 ‎82YU00W7IN Laptop (AMD Ryzen 3 ...,266.9,4.45,7320U,7.0,4.0,,,0.0,...,True,False,False,False,False,False,False,False,False,False
2,2,HP 15s-fq5007TU Laptop (12th Gen Core i3/ 8GB/...,370.12,4.65,1215U,12.0,6.0,2.0,4.0,0.0,...,False,False,False,False,False,True,False,False,False,False
3,3,Samsung Galaxy Book2 Pro 13 Laptop (12th Gen C...,699.9,4.75,1240P,12.0,12.0,4.0,8.0,0.0,...,False,False,False,False,False,False,True,False,False,False
4,4,Tecno Megabook T1 Laptop (11th Gen Core i3/ 8G...,239.9,4.25,1115G4,11.0,2.0,,,0.0,...,False,False,False,False,False,True,False,False,False,False


#### Dropping variant and generation

In [199]:
df = df.drop(columns=["Processor_variant", "Processor_gen"])

df = df.reset_index(drop=True)

In [200]:
df.head()

Unnamed: 0.1,Unnamed: 0,Name,Price,Rating,Core_per_processor,Total_processor,Execution_units,Low_Power_Cores,Energy_Efficient_Units,Threads,...,Processor_name_AMD Ryzen 3,Processor_name_AMD Ryzen 5,Processor_name_AMD Ryzen 7,Processor_name_Intel Celeron,Processor_name_Intel Core Ultra,Processor_name_Intel Core i3,Processor_name_Intel Core i5,Processor_name_Intel Core i7,Processor_name_Intel Core i9,Processor_name_Other
0,0,HP Victus 15-fb0157AX Gaming Laptop (AMD Ryzen...,503.99,4.3,6.0,,,0.0,0,12.0,...,False,True,False,False,False,False,False,False,False,False
1,1,Lenovo V15 G4 ‎82YU00W7IN Laptop (AMD Ryzen 3 ...,266.9,4.45,4.0,,,0.0,0,8.0,...,True,False,False,False,False,False,False,False,False,False
2,2,HP 15s-fq5007TU Laptop (12th Gen Core i3/ 8GB/...,370.12,4.65,6.0,2.0,4.0,0.0,0,8.0,...,False,False,False,False,False,True,False,False,False,False
3,3,Samsung Galaxy Book2 Pro 13 Laptop (12th Gen C...,699.9,4.75,12.0,4.0,8.0,0.0,0,16.0,...,False,False,False,False,False,False,True,False,False,False
4,4,Tecno Megabook T1 Laptop (11th Gen Core i3/ 8G...,239.9,4.25,2.0,,,0.0,0,4.0,...,False,False,False,False,False,True,False,False,False,False


#### Dropping "Core_per_processor"

I am dropping the 10 entries where I have missing values, because I have enough entries left

In [201]:
df = df.drop(columns=["Core_per_processor"])

#### Dropping Total_processor and Execution_units

as i said before I am dropping them because they have almost half missing in each column

In [202]:
df = df.drop(columns=["Total_processor", "Execution_units"])

#### Dropping missing Threads values

In [203]:
df = df.dropna(subset=["Threads"])

df = df.reset_index(drop=True)

#### Dropping missing RAM-type values

In [204]:
df = df.dropna(subset=["RAM_type"])

df = df.reset_index(drop=True)

In [221]:
df["RAM_type"].value_counts()

RAM_type
DDR4       509
DDR5       178
LPDDR5     173
LPDDR4X     49
LPDDR5X     37
LPDDR4      11
LPDDRX4      2
DDR3         1
DDR6         1
LPDDR3       1
PDDR5X       1
Name: count, dtype: int64

In [222]:
ram_type_counts = df["RAM_type"].value_counts()
rare_ram_Types = ram_type_counts[ram_type_counts < 12].index
df["RAM_type"] = df["RAM_type"].replace(rare_procs, "Other")
df = pd.get_dummies(df, columns=["RAM_type"])

In [223]:
df.head()

Unnamed: 0.1,Unnamed: 0,Name,Price,Rating,Low_Power_Cores,Energy_Efficient_Units,Threads,RAM_GB,Storage_capacity_GB,Graphics_name,...,RAM_type_DDR4,RAM_type_DDR5,RAM_type_DDR6,RAM_type_LPDDR3,RAM_type_LPDDR4,RAM_type_LPDDR4X,RAM_type_LPDDR5,RAM_type_LPDDR5X,RAM_type_LPDDRX4,RAM_type_PDDR5X
0,0,HP Victus 15-fb0157AX Gaming Laptop (AMD Ryzen...,503.99,4.3,0.0,0,12.0,8,512,AMD Radeon RX 6500M,...,True,False,False,False,False,False,False,False,False,False
1,1,Lenovo V15 G4 ‎82YU00W7IN Laptop (AMD Ryzen 3 ...,266.9,4.45,0.0,0,8.0,8,512,AMD Radeon Graphics,...,False,False,False,False,False,False,True,False,False,False
2,2,HP 15s-fq5007TU Laptop (12th Gen Core i3/ 8GB/...,370.12,4.65,0.0,0,8.0,8,512,Intel UHD Graphics,...,True,False,False,False,False,False,False,False,False,False
3,3,Samsung Galaxy Book2 Pro 13 Laptop (12th Gen C...,699.9,4.75,0.0,0,16.0,16,512,Intel Iris Xe Graphics,...,False,False,False,False,False,False,True,False,False,False
4,4,Tecno Megabook T1 Laptop (11th Gen Core i3/ 8G...,239.9,4.25,0.0,0,4.0,8,512,Intel UHD Graphics,...,False,False,False,False,True,False,False,False,False,False


#### Transforming Storage_type

In [205]:
df["Storage_type"].unique()

array([' SSD', 'Hard Disk & SSD', ' Hard Disk'], dtype=object)

In [206]:
df = pd.get_dummies(df, columns=["Storage_type"])

In [207]:
df.head()

Unnamed: 0.1,Unnamed: 0,Name,Price,Rating,Low_Power_Cores,Energy_Efficient_Units,Threads,RAM_GB,RAM_type,Storage_capacity_GB,...,Processor_name_Intel Celeron,Processor_name_Intel Core Ultra,Processor_name_Intel Core i3,Processor_name_Intel Core i5,Processor_name_Intel Core i7,Processor_name_Intel Core i9,Processor_name_Other,Storage_type_ Hard Disk,Storage_type_ SSD,Storage_type_Hard Disk & SSD
0,0,HP Victus 15-fb0157AX Gaming Laptop (AMD Ryzen...,503.99,4.3,0.0,0,12.0,8,DDR4,512,...,False,False,False,False,False,False,False,False,True,False
1,1,Lenovo V15 G4 ‎82YU00W7IN Laptop (AMD Ryzen 3 ...,266.9,4.45,0.0,0,8.0,8,LPDDR5,512,...,False,False,False,False,False,False,False,False,True,False
2,2,HP 15s-fq5007TU Laptop (12th Gen Core i3/ 8GB/...,370.12,4.65,0.0,0,8.0,8,DDR4,512,...,False,False,True,False,False,False,False,False,True,False
3,3,Samsung Galaxy Book2 Pro 13 Laptop (12th Gen C...,699.9,4.75,0.0,0,16.0,16,LPDDR5,512,...,False,False,False,True,False,False,False,False,True,False
4,4,Tecno Megabook T1 Laptop (11th Gen Core i3/ 8G...,239.9,4.25,0.0,0,4.0,8,LPDDR4,512,...,False,False,True,False,False,False,False,False,True,False


#### Dropping missing Graphics_name values

In [208]:
df = df.dropna(subset=["Graphics_name"])

df = df.reset_index(drop=True)

In [209]:
df["Graphics_name"].value_counts()

Graphics_name
Intel Iris Xe Graphics      98
Intel UHD Graphics          79
NVIDIA GeForce RTX 3050     74
Intel Integrated Iris Xe    66
Intel Integrated UHD        62
                            ..
Iris Xe Graphics             1
Intel UHD                    1
Nvidia RTX 3050              1
AMD Radeon 7 Graphics        1
NVIDIA GEFORCE RTX 4050      1
Name: count, Length: 120, dtype: int64

In [210]:
import re

def simplify_gpu(name):
    name = str(name).lower()
    if "rtx" in name: return "NVIDIA RTX"
    if "gtx" in name: return "NVIDIA GTX"
    if "nvidia" in name: return "NVIDIA Other"
    if "rx" in name: return "AMD RX"
    if "vega" in name: return "AMD Vega"
    if "radeon" in name: return "AMD Radeon Other"
    if "iris" in name: return "Intel Iris"
    if "uhd" in name: return "Intel UHD"
    if "arc" in name: return "Intel Arc"
    if "intel" in name: return "Intel Integrated"
    if "apple" in name: return "Apple GPU"
    return "Other"

df["GPU_simple"] = df["Graphics_name"].apply(simplify_gpu)


In [211]:
df["GPU_simple"].value_counts()

GPU_simple
NVIDIA RTX          309
Intel Iris          218
Intel UHD           188
AMD Radeon Other    120
Intel Integrated     37
NVIDIA GTX           27
Intel Arc            25
AMD RX               13
Other                13
AMD Vega              7
NVIDIA Other          6
Name: count, dtype: int64

In [212]:
df = pd.get_dummies(df, columns=["GPU_simple"])


In [213]:
df.head()

Unnamed: 0.1,Unnamed: 0,Name,Price,Rating,Low_Power_Cores,Energy_Efficient_Units,Threads,RAM_GB,RAM_type,Storage_capacity_GB,...,GPU_simple_AMD Radeon Other,GPU_simple_AMD Vega,GPU_simple_Intel Arc,GPU_simple_Intel Integrated,GPU_simple_Intel Iris,GPU_simple_Intel UHD,GPU_simple_NVIDIA GTX,GPU_simple_NVIDIA Other,GPU_simple_NVIDIA RTX,GPU_simple_Other
0,0,HP Victus 15-fb0157AX Gaming Laptop (AMD Ryzen...,503.99,4.3,0.0,0,12.0,8,DDR4,512,...,False,False,False,False,False,False,False,False,False,False
1,1,Lenovo V15 G4 ‎82YU00W7IN Laptop (AMD Ryzen 3 ...,266.9,4.45,0.0,0,8.0,8,LPDDR5,512,...,True,False,False,False,False,False,False,False,False,False
2,2,HP 15s-fq5007TU Laptop (12th Gen Core i3/ 8GB/...,370.12,4.65,0.0,0,8.0,8,DDR4,512,...,False,False,False,False,False,True,False,False,False,False
3,3,Samsung Galaxy Book2 Pro 13 Laptop (12th Gen C...,699.9,4.75,0.0,0,16.0,16,LPDDR5,512,...,False,False,False,False,True,False,False,False,False,False
4,4,Tecno Megabook T1 Laptop (11th Gen Core i3/ 8G...,239.9,4.25,0.0,0,4.0,8,LPDDR4,512,...,False,False,False,False,False,True,False,False,False,False


#### Dropping missing Graphics_brand values

In [214]:
df = df.dropna(subset=["Graphics_brand"])

df = df.reset_index(drop=True)

In [215]:
df["Graphics_brand"].value_counts()

Graphics_brand
Intel     468
NVIDIA    343
AMD       151
Adreno      1
Name: count, dtype: int64

In [216]:
df = pd.get_dummies(df, columns=["Graphics_brand"])

In [217]:
df.head()

Unnamed: 0.1,Unnamed: 0,Name,Price,Rating,Low_Power_Cores,Energy_Efficient_Units,Threads,RAM_GB,RAM_type,Storage_capacity_GB,...,GPU_simple_Intel Iris,GPU_simple_Intel UHD,GPU_simple_NVIDIA GTX,GPU_simple_NVIDIA Other,GPU_simple_NVIDIA RTX,GPU_simple_Other,Graphics_brand_AMD,Graphics_brand_Adreno,Graphics_brand_Intel,Graphics_brand_NVIDIA
0,0,HP Victus 15-fb0157AX Gaming Laptop (AMD Ryzen...,503.99,4.3,0.0,0,12.0,8,DDR4,512,...,False,False,False,False,False,False,True,False,False,False
1,1,Lenovo V15 G4 ‎82YU00W7IN Laptop (AMD Ryzen 3 ...,266.9,4.45,0.0,0,8.0,8,LPDDR5,512,...,False,False,False,False,False,False,True,False,False,False
2,2,HP 15s-fq5007TU Laptop (12th Gen Core i3/ 8GB/...,370.12,4.65,0.0,0,8.0,8,DDR4,512,...,False,True,False,False,False,False,False,False,True,False
3,3,Samsung Galaxy Book2 Pro 13 Laptop (12th Gen C...,699.9,4.75,0.0,0,16.0,16,LPDDR5,512,...,True,False,False,False,False,False,False,False,True,False
4,4,Tecno Megabook T1 Laptop (11th Gen Core i3/ 8G...,239.9,4.25,0.0,0,4.0,8,LPDDR4,512,...,False,True,False,False,False,False,False,False,True,False


#### Dropping Graphics_GB

Here are 652 values missing so I am dropping the whole column

In [218]:
df = df.drop(columns=["Graphics_GB"])

#### Dropping missing Graphics_integreted values

I just want to mention that there is a typo in the dataset, it should be integrated with an "a" not with an "e"

In [219]:
df = df.dropna(subset=["Graphics_integreted"])

df = df.reset_index(drop=True)

In [224]:
df["Graphics_integreted"].value_counts()

Graphics_integreted
False    722
True     241
Name: count, dtype: int64