In [None]:
#importing relevant dependencies for data cleaning
import pandas as pd
import re

In [None]:
# Set the correct path to the CSV file
path = "../data/raw/laptop_price.csv"
raw_data = pd.read_csv(path, encoding="latin1")
df = raw_data.copy()

#### UNDERSTANDING THE DATASET

In [None]:
# Shape of dataset
df.shape

(1303, 13)

In [None]:
# Info about dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   laptop_ID         1303 non-null   int64  
 1   Company           1303 non-null   object 
 2   Product           1303 non-null   object 
 3   TypeName          1303 non-null   object 
 4   Inches            1303 non-null   float64
 5   ScreenResolution  1303 non-null   object 
 6   Cpu               1303 non-null   object 
 7   Ram               1303 non-null   object 
 8   Memory            1303 non-null   object 
 9   Gpu               1303 non-null   object 
 10  OpSys             1303 non-null   object 
 11  Weight            1303 non-null   object 
 12  Price_euros       1303 non-null   float64
dtypes: float64(2), int64(1), object(10)
memory usage: 132.5+ KB


In [None]:
# First 5 rows of dataset
df.head()

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6


In [None]:
# Summary statistics
df.describe(include='all').T

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
laptop_ID,1303.0,,,,660.155794,381.172104,1.0,331.5,659.0,990.5,1320.0
Company,1303.0,19.0,Dell,297.0,,,,,,,
Product,1303.0,618.0,XPS 13,30.0,,,,,,,
TypeName,1303.0,6.0,Notebook,727.0,,,,,,,
Inches,1303.0,,,,15.017191,1.426304,10.1,14.0,15.6,15.6,18.4
ScreenResolution,1303.0,40.0,Full HD 1920x1080,507.0,,,,,,,
Cpu,1303.0,118.0,Intel Core i5 7200U 2.5GHz,190.0,,,,,,,
Ram,1303.0,9.0,8GB,619.0,,,,,,,
Memory,1303.0,39.0,256GB SSD,412.0,,,,,,,
Gpu,1303.0,110.0,Intel HD Graphics 620,281.0,,,,,,,


#### CLEANING THE DATA

In [None]:
# Check for missing values
print("\nMissing values per column:")
print(df.isnull().sum())


Missing values per column:
laptop_ID           0
Company             0
Product             0
TypeName            0
Inches              0
ScreenResolution    0
Cpu                 0
Ram                 0
Memory              0
Gpu                 0
OpSys               0
Weight              0
Price_euros         0
dtype: int64


In [None]:
# Check duplicates
print("\nNumber of duplicate rows:", df.duplicated().sum())


Number of duplicate rows: 0


In [None]:
df.columns

Index(['laptop_ID', 'Company', 'Product', 'TypeName', 'Inches',
       'ScreenResolution', 'Cpu', 'Ram', 'Memory', 'Gpu', 'OpSys', 'Weight',
       'Price_euros'],
      dtype='object')

In [None]:
df = df.drop(columns=['laptop_ID'])

In [None]:
# Extraction of number from Ram column ex., '8GB' -> 8
df['Ram (GB)'] = df['Ram'].astype(str).str.extract(r'(\d+)')[0].astype(int)
df[['Ram', 'Ram (GB)']].head()

Unnamed: 0,Ram,Ram (GB)
0,8GB,8
1,8GB,8
2,8GB,8
3,16GB,16
4,8GB,8


In [None]:
# Extract float number from weight ex., '2.5kg' -> 2.5
df['Weight (kg)'] = df['Weight'].astype(str).str.extract(r'([\d\.]+)')[0].astype(float)
df[['Weight', 'Weight (kg)']].head()

Unnamed: 0,Weight,Weight (kg)
0,1.37kg,1.37
1,1.34kg,1.34
2,1.86kg,1.86
3,1.83kg,1.83
4,1.37kg,1.37


In [None]:
df['Memory'].unique()[:20]

array(['128GB SSD', '128GB Flash Storage', '256GB SSD', '512GB SSD',
       '500GB HDD', '256GB Flash Storage', '1TB HDD',
       '32GB Flash Storage', '128GB SSD +  1TB HDD',
       '256GB SSD +  256GB SSD', '64GB Flash Storage',
       '256GB SSD +  1TB HDD', '256GB SSD +  2TB HDD', '32GB SSD',
       '2TB HDD', '64GB SSD', '1.0TB Hybrid', '512GB SSD +  1TB HDD',
       '1TB SSD', '256GB SSD +  500GB HDD'], dtype=object)

In [None]:
# CLEANING ['Memory'] COLUMN

def parse_memory_cell(s):
    # returns dict with sizes in GB
    result = {
        "HDD (GB)": 0.0,
        "SSD (GB)": 0.0,
        "Hybrid (GB)": 0.0,
        "Flash Storage (GB)": 0.0,
    }
    if pd.isna(s):
        return result

    # split on "+" or ","
    parts = re.split(r"\s*\+|,\s*", str(s))
    for p in parts:
        p = p.strip()
        if not p:
            continue

        # extract number (GB or TB)
        m = re.search(r"(\d+(?:\.\d+)?)", p)
        if not m:
            continue
        size = float(m.group(1))

        # convert TB to GB
        if re.search(r"TB", p, flags=re.I):
            size *= 1024

        # check type
        if re.search(r"HDD", p, flags=re.I):
            result["HDD (GB)"] += size
        elif re.search(r"SSD", p, flags=re.I):
            result["SSD (GB)"] += size
        elif re.search(r"Hybrid", p, flags=re.I):
            result["Hybrid (GB)"] += size
        elif re.search(r"Flash", p, flags=re.I):
            result["Flash Storage (GB)"] += size
    return result


# apply value to the function and expand the dict to columns
mem_parsed = df["Memory"].apply(parse_memory_cell).apply(pd.Series)

# merge into df
for col in mem_parsed.columns:
    df[col] = mem_parsed[col]


# total storage
df["Total_Storage (GB)"] = df[
    ["HDD (GB)", "SSD (GB)", "Hybrid (GB)", "Flash Storage (GB)"]
].sum(axis=1)

# preview
df[
    [
        "Memory",
        "HDD (GB)",
        "SSD (GB)",
        "Hybrid (GB)",
        "Flash Storage (GB)",
        "Total_Storage (GB)",
    ]
].head()

Unnamed: 0,Memory,HDD (GB),SSD (GB),Hybrid (GB),Flash Storage (GB),Total_Storage (GB)
0,128GB SSD,0.0,128.0,0.0,0.0,128.0
1,128GB Flash Storage,0.0,0.0,0.0,128.0,128.0
2,256GB SSD,0.0,256.0,0.0,0.0,256.0
3,512GB SSD,0.0,512.0,0.0,0.0,512.0
4,256GB SSD,0.0,256.0,0.0,0.0,256.0


In [None]:
# CLEANING ['OpSys'] COLUMN

def clean_opsys(x):
    s = str(x).lower()
    if 'windows' in s:
        return 'Windows'
    if 'mac' in s:
        return 'MacOS'
    if 'linux' in s or 'ubuntu' in s:
        return 'Linux'
    if 'chrome' in s:
        return 'Chrome OS'
    if 'android' in s:
        return 'Android'
    if 'no os' in s or 'no' == s.strip():
        return 'No OS'
    return 'Other'

df['OpSys_Clean'] = df['OpSys'].apply(clean_opsys)
df[['OpSys','OpSys_Clean']].value_counts().head(20)


OpSys         OpSys_Clean
Windows 10    Windows        1072
No OS         No OS            66
Linux         Linux            62
Windows 7     Windows          45
Chrome OS     Chrome OS        27
macOS         MacOS            13
Mac OS X      MacOS             8
Windows 10 S  Windows           8
Android       Android           2
Name: count, dtype: int64

In [None]:
# CLEANING ['ScreenResolution'] COLUMN

# Extraction of screen width, height and touchscreen info
df['Screen_Width']  = df['ScreenResolution'].astype(str).str.extract(r'(\d+)x')[0].astype(int)
df['Screen_Height'] = df['ScreenResolution'].astype(str).str.extract(r'x(\d+)')[0].astype(int)
df['Touchscreen']   = df['ScreenResolution'].str.contains('Touchscreen', case=False, na=False).astype(int)
df[['ScreenResolution','Screen_Width','Screen_Height','Touchscreen']].head()


Unnamed: 0,ScreenResolution,Screen_Width,Screen_Height,Touchscreen
0,IPS Panel Retina Display 2560x1600,2560,1600,0
1,1440x900,1440,900,0
2,Full HD 1920x1080,1920,1080,0
3,IPS Panel Retina Display 2880x1800,2880,1800,0
4,IPS Panel Retina Display 2560x1600,2560,1600,0


In [None]:
# CLEANING ['Cpu'] and ['Gpu'] COLUMNS

# CPU speed
df['Cpu_Speed_GHz'] = df['Cpu'].astype(str).str.extract(r'([\d\.]+)GHz')[0].astype(float)

# CPU brand (first token) and cleaned name
df['Cpu_Brand'] = df['Cpu'].astype(str).str.split().str[0]
df['Cpu_Name']  = df['Cpu'].astype(str).str.replace(r'[\d\.]+GHz','', regex=True).str.strip()
# remove brand from name (dynamic)
brands = df['Cpu_Brand'].unique().tolist()
pattern = r'^(' + '|'.join(map(re.escape, brands)) + r')\s+'
df['Cpu_Name'] = df['Cpu_Name'].str.replace(pattern, '', regex=True)

# GPU
df['Gpu_Brand'] = df['Gpu'].astype(str).str.split().str[0]
df['Gpu_Name']  = df['Gpu'].astype(str).apply(lambda x: ' '.join(x.split()[1:]) if isinstance(x, str) else x)
df[['Cpu','Cpu_Brand','Cpu_Name','Cpu_Speed_GHz','Gpu','Gpu_Brand','Gpu_Name']].head()

Unnamed: 0,Cpu,Cpu_Brand,Cpu_Name,Cpu_Speed_GHz,Gpu,Gpu_Brand,Gpu_Name
0,Intel Core i5 2.3GHz,Intel,Core i5,2.3,Intel Iris Plus Graphics 640,Intel,Iris Plus Graphics 640
1,Intel Core i5 1.8GHz,Intel,Core i5,1.8,Intel HD Graphics 6000,Intel,HD Graphics 6000
2,Intel Core i5 7200U 2.5GHz,Intel,Core i5 7200U,2.5,Intel HD Graphics 620,Intel,HD Graphics 620
3,Intel Core i7 2.7GHz,Intel,Core i7,2.7,AMD Radeon Pro 455,AMD,Radeon Pro 455
4,Intel Core i5 3.1GHz,Intel,Core i5,3.1,Intel Iris Plus Graphics 650,Intel,Iris Plus Graphics 650


In [None]:
# Check for missing values to ensure no accidental nan during converison
df[['Ram (GB)','Weight (kg)','HDD (GB)','SSD (GB)','Total_Storage (GB)','Cpu_Speed_GHz']].isnull().sum()

Ram (GB)              0
Weight (kg)           0
HDD (GB)              0
SSD (GB)              0
Total_Storage (GB)    0
Cpu_Speed_GHz         0
dtype: int64

In [None]:
def outlier_bounds(s, k=1.5):
    q1 = s.quantile(0.25)
    q3 = s.quantile(0.75)
    iqr = q3 - q1
    return q1 - k*iqr, q3 + k*iqr

num_cols = ['Price_euros','Weight (kg)','Inches','Ram (GB)','Total_Storage (GB)','Cpu_Speed_GHz']
outlier_summary = {}

for c in num_cols:
    low, high = outlier_bounds(df[c].dropna())
    mask = (df[c] < low) | (df[c] > high)
    outlier_summary[c] = {
        'low': round(low, 2),
        'high': round(high, 2),
        'count': int(mask.sum())
    }

outlier_summary

{'Price_euros': {'low': -734.32, 'high': 2821.2, 'count': 29},
 'Weight (kg)': {'low': 0.3, 'high': 3.5, 'count': 46},
 'Inches': {'low': 11.6, 'high': 18.0, 'count': 39},
 'Ram (GB)': {'low': -2.0, 'high': 14.0, 'count': 221},
 'Total_Storage (GB)': {'low': -896.0, 'high': 2176.0, 'count': 13},
 'Cpu_Speed_GHz': {'low': 0.95, 'high': 3.75, 'count': 4}}

In [None]:
low,high = outlier_bounds(df['Price_euros'])
df[(df['Price_euros']<low) | (df['Price_euros']>high)][['Company','Product','Price_euros']].sort_values('Price_euros').head(30)

Unnamed: 0,Company,Product,Price_euros
190,Lenovo,Thinkpad Yoga,2824.0
17,Apple,MacBook Pro,2858.0
758,Dell,Alienware 17,2868.99
297,Dell,Precision 7720,2884.86
1103,HP,ZBook 17,2899.0
778,Razer,Blade Pro,2899.0
1017,Lenovo,ThinkPad P70,2968.0
517,Asus,ROG Zephyrus,2968.0
563,Lenovo,Thinkpad P71,2999.0
247,Asus,Rog G701VIK-BA060T,2999.0


In [None]:
df[df['Ram (GB)'] < 0]

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,...,Total_Storage (GB),OpSys_Clean,Screen_Width,Screen_Height,Touchscreen,Cpu_Speed_GHz,Cpu_Brand,Cpu_Name,Gpu_Brand,Gpu_Name


In [None]:
df[df['Weight (kg)'] < 0.3]

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,...,Total_Storage (GB),OpSys_Clean,Screen_Width,Screen_Height,Touchscreen,Cpu_Speed_GHz,Cpu_Brand,Cpu_Name,Gpu_Brand,Gpu_Name


In [None]:

df[df['Cpu_Speed_GHz'] < 1.0]

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,...,Total_Storage (GB),OpSys_Clean,Screen_Width,Screen_Height,Touchscreen,Cpu_Speed_GHz,Cpu_Brand,Cpu_Name,Gpu_Brand,Gpu_Name
697,Asus,Chromebook Flip,2 in 1 Convertible,12.5,Full HD / Touchscreen 1920x1080,Intel Core M M3-6Y30 0.9GHz,4GB,64GB Flash Storage,Intel HD Graphics 515,Chrome OS,...,64.0,Chrome OS,1920,1080,1,0.9,Intel,Core M M3-6Y30,Intel,HD Graphics 515
1261,Asus,ZenBook UX305CA-UBM1,Ultrabook,13.3,IPS Panel Full HD 1920x1080,Intel Core M 6Y30 0.9GHz,8GB,512GB SSD,Intel HD Graphics 515,Windows 10,...,512.0,Windows,1920,1080,0,0.9,Intel,Core M 6Y30,Intel,HD Graphics 515
1275,Asus,ZenBook UX305CA-UBM1,Ultrabook,13.3,IPS Panel Full HD 1920x1080,Intel Core M 6Y30 0.9GHz,8GB,512GB SSD,Intel HD Graphics 515,Windows 10,...,512.0,Windows,1920,1080,0,0.9,Intel,Core M 6Y30,Intel,HD Graphics 515
1289,Asus,ZenBook UX305CA-UBM1,Ultrabook,13.3,IPS Panel Full HD 1920x1080,Intel Core M 6Y30 0.9GHz,8GB,512GB SSD,Intel HD Graphics 515,Windows 10,...,512.0,Windows,1920,1080,0,0.9,Intel,Core M 6Y30,Intel,HD Graphics 515


In [None]:

df[df['Total_Storage (GB)'] < 0]

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,...,Total_Storage (GB),OpSys_Clean,Screen_Width,Screen_Height,Touchscreen,Cpu_Speed_GHz,Cpu_Brand,Cpu_Name,Gpu_Brand,Gpu_Name


In [None]:
df.head()

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,...,Total_Storage (GB),OpSys_Clean,Screen_Width,Screen_Height,Touchscreen,Cpu_Speed_GHz,Cpu_Brand,Cpu_Name,Gpu_Brand,Gpu_Name
0,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,...,128.0,MacOS,2560,1600,0,2.3,Intel,Core i5,Intel,Iris Plus Graphics 640
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,...,128.0,MacOS,1440,900,0,1.8,Intel,Core i5,Intel,HD Graphics 6000
2,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,...,256.0,No OS,1920,1080,0,2.5,Intel,Core i5 7200U,Intel,HD Graphics 620
3,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,...,512.0,MacOS,2880,1800,0,2.7,Intel,Core i7,AMD,Radeon Pro 455
4,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,...,256.0,MacOS,2560,1600,0,3.1,Intel,Core i5,Intel,Iris Plus Graphics 650


In [None]:
# DROPPING REDUNDANT COLUMNS

cols_to_drop = [
    'Product', 'ScreenResolution',
    'Cpu', 'Ram', 'Memory', 'Gpu', 'OpSys', 'Weight'
]

df_cleaned = df.drop(columns=cols_to_drop)

print("Shape before:", df.shape)
print("Shape after:", df_cleaned.shape)
df_cleaned.head()

Shape before: (1303, 28)
Shape after: (1303, 20)


Unnamed: 0,Company,TypeName,Inches,Price_euros,Ram (GB),Weight (kg),HDD (GB),SSD (GB),Hybrid (GB),Flash Storage (GB),Total_Storage (GB),OpSys_Clean,Screen_Width,Screen_Height,Touchscreen,Cpu_Speed_GHz,Cpu_Brand,Cpu_Name,Gpu_Brand,Gpu_Name
0,Apple,Ultrabook,13.3,1339.69,8,1.37,0.0,128.0,0.0,0.0,128.0,MacOS,2560,1600,0,2.3,Intel,Core i5,Intel,Iris Plus Graphics 640
1,Apple,Ultrabook,13.3,898.94,8,1.34,0.0,0.0,0.0,128.0,128.0,MacOS,1440,900,0,1.8,Intel,Core i5,Intel,HD Graphics 6000
2,HP,Notebook,15.6,575.0,8,1.86,0.0,256.0,0.0,0.0,256.0,No OS,1920,1080,0,2.5,Intel,Core i5 7200U,Intel,HD Graphics 620
3,Apple,Ultrabook,15.4,2537.45,16,1.83,0.0,512.0,0.0,0.0,512.0,MacOS,2880,1800,0,2.7,Intel,Core i7,AMD,Radeon Pro 455
4,Apple,Ultrabook,13.3,1803.6,8,1.37,0.0,256.0,0.0,0.0,256.0,MacOS,2560,1600,0,3.1,Intel,Core i5,Intel,Iris Plus Graphics 650


In [None]:
categorical_cols = ['Company','TypeName','OpSys_Clean','Cpu_Brand','Cpu_Name','Gpu_Brand','Gpu_Name']

for col in categorical_cols:
    print(f"{col}: {df[col].nunique()} unique values")
    print(df[col].unique()[:10])  # show first 10 unique values
    print("----")

Company: 19 unique values
['Apple' 'HP' 'Acer' 'Asus' 'Dell' 'Lenovo' 'Chuwi' 'MSI' 'Microsoft'
 'Toshiba']
----
TypeName: 6 unique values
['Ultrabook' 'Notebook' 'Netbook' 'Gaming' '2 in 1 Convertible'
 'Workstation']
----
OpSys_Clean: 6 unique values
['MacOS' 'No OS' 'Windows' 'Linux' 'Android' 'Chrome OS']
----
Cpu_Brand: 3 unique values
['Intel' 'AMD' 'Samsung']
----
Cpu_Name: 93 unique values
['Core i5' 'Core i5 7200U' 'Core i7' 'A9-Series 9420' 'Core i7 8550U'
 'Core i5 8250U' 'Core i3 6006U' 'Core M m3' 'Core i7 7500U'
 'Core i3 7100U']
----
Gpu_Brand: 4 unique values
['Intel' 'AMD' 'Nvidia' 'ARM']
----
Gpu_Name: 106 unique values
['Iris Plus Graphics 640' 'HD Graphics 6000' 'HD Graphics 620'
 'Radeon Pro 455' 'Iris Plus Graphics 650' 'Radeon R5' 'Iris Pro Graphics'
 'GeForce MX150' 'UHD Graphics 620' 'HD Graphics 520']
----


In [None]:
# Save cleaned data
df_cleaned.to_csv("../data/processed/laptops_clean.csv", index=False)

#### ENCODING THE CLEANED DATA

In [None]:
# Create a copy of clean data
df_encoded = df.copy()

In [None]:
# One-Hot Encoder for the low-cardinality categorical features
low_cardinality_features = [
    "Company",
    "TypeName",
    "OpSys_Clean",
    "Cpu_Brand",
    "Gpu_Brand",
]
df_encoded = pd.get_dummies(
    df_encoded, columns=low_cardinality_features, drop_first=True
)

In [None]:
# Target Encoder for the high-cardinality categorical features
cpu_mean_price = df.groupby("Cpu_Name")["Price_euros"].mean()
df_encoded["Cpu_Name_TE"] = df["Cpu_Name"].map(cpu_mean_price)

gpu_mean_price = df.groupby("Gpu_Name")["Price_euros"].mean()
df_encoded["Gpu_Name_TE"] = df["Gpu_Name"].map(gpu_mean_price)

In [None]:
# Drop original high-cardinality columns
df_encoded.drop(["Cpu_Name", "Gpu_Name"], axis=1, inplace=True)

df_encoded.head()

Unnamed: 0,Product,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros,...,OpSys_Clean_MacOS,OpSys_Clean_No OS,OpSys_Clean_Windows,Cpu_Brand_Intel,Cpu_Brand_Samsung,Gpu_Brand_ARM,Gpu_Brand_Intel,Gpu_Brand_Nvidia,Cpu_Name_TE,Gpu_Name_TE
0,MacBook Pro,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69,...,True,False,False,True,False,False,True,False,1391.948333,1764.01125
1,Macbook Air,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94,...,True,False,False,True,False,False,True,False,1391.948333,1022.728
2,250 G6,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0,...,False,True,False,True,False,False,True,False,919.318083,1141.089823
3,MacBook Pro,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45,...,True,False,False,True,False,False,False,False,2493.8475,2537.45
4,MacBook Pro,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6,...,True,False,False,True,False,False,True,False,1391.948333,1921.8


In [None]:
# Save encoded data
df_encoded.to_csv("../data/encoded/laptops_encoded.csv", index=False)