In [3]:
import pandas as pd

In [4]:
df = pd.read_csv('specs_simplified_all.csv')
df

Unnamed: 0,Procesador,RAM,Tipo RAM,Almacenamiento,Graficos,Pantalla,Resolucion,Sistema Operativo,Bateria,Precio
0,Intel Core i5-1135G7,8.0,DDR4,512.0,Intel Iris Xe Graphics,15.6,1920x1080,No,70.0,54900
1,AMD Ryzen 7-8845HS,32.0,DDR5,1.0,AMD Radeon 780M,17.3,2560x1440,Windows 11 Home,83.0,186241
2,Intel Core i7-14650HX,32.0,DDR5,1.0,Intel® UHD Graphics,16.1,1920x1080,FreeDOS,70.0,147111
3,Intel Core i7-12650H,16.0,DDR4,512.0,Intel® UHD Graphics,15.6,1920x1080,FreeDOS,70.0,96046
4,AMD Ryzen 7-8845HS,32.0,DDR5,1.0,AMD Radeon 780M,17.3,1920x1080,FreeDOS,83.0,147111
...,...,...,...,...,...,...,...,...,...,...
242,Intel Core i7-1255U,8.0,DDR4,512.0,Intel Iris Xe Graphics,15.6,1920x1080,No,38.0,78299
243,Intel Core Ultra 9-185H,32.0,DDR5,2.0,Intel Arc Graphics,14.0,2880x1800,Windows 11 Home,75.0,259900
244,AMD Ryzen 5-7520U,8.0,DDR5,512.0,AMD Radeon 610M,15.6,1920x1080,Windows 11 Home,42.0,59900
245,AMD Ryzen AI 9 HX-370,32.0,DDR5,1.0,AMD Radeon 890M,13.3,2880x1800,Windows 11 Home,73.0,219900


In [5]:
df['Precio'] = pd.to_numeric(df['Precio'].str.replace(',', '').str.replace('.', ''), errors='coerce') / 100
df['Precio']

0       549.00
1      1862.41
2      1471.11
3       960.46
4      1471.11
        ...   
242     782.99
243    2599.00
244     599.00
245    2199.00
246    1219.99
Name: Precio, Length: 247, dtype: float64

In [6]:
df['Sistema Operativo'] = df['Sistema Operativo'].replace('No', 'FreeDOS')
df['Sistema Operativo']

0              FreeDOS
1      Windows 11 Home
2              FreeDOS
3              FreeDOS
4              FreeDOS
            ...       
242            FreeDOS
243    Windows 11 Home
244    Windows 11 Home
245    Windows 11 Home
246     Windows 11 Pro
Name: Sistema Operativo, Length: 247, dtype: object

In [10]:
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

# Fill missing values with the mean of the respective columns
imputer = SimpleImputer(strategy='mean')
df_imputed = pd.DataFrame(imputer.fit_transform(df.select_dtypes(include=[float])))
df_imputed.columns = df.select_dtypes(include=[float]).columns

# Normalize the data
scaler = StandardScaler()
df_normalized = pd.DataFrame(scaler.fit_transform(df_imputed), columns=df_imputed.columns)

# Combine normalized data with non-numeric columns
df_transformed = pd.concat([df_normalized, df.select_dtypes(exclude=[float]).reset_index(drop=True)], axis=1)
df_transformed

Unnamed: 0,RAM,Almacenamiento,Pantalla,Bateria,Precio,Procesador,Tipo RAM,Graficos,Resolucion,Sistema Operativo
0,-1.088144,0.860127,0.267989,-0.048629,-0.906083,Intel Core i5-1135G7,DDR4,Intel Iris Xe Graphics,1920x1080,FreeDOS
1,1.407287,-1.202271,1.799043,-0.026625,0.714939,AMD Ryzen 7-8845HS,DDR5,AMD Radeon 780M,2560x1440,Windows 11 Home
2,1.407287,-1.202271,0.718299,-0.048629,0.231993,Intel Core i7-14650HX,DDR5,Intel® UHD Graphics,1920x1080,FreeDOS
3,-0.256333,0.860127,0.267989,-0.048629,-0.398255,Intel Core i7-12650H,DDR4,Intel® UHD Graphics,1920x1080,FreeDOS
4,1.407287,-1.202271,1.799043,-0.026625,0.231993,AMD Ryzen 7-8845HS,DDR5,AMD Radeon 780M,1920x1080,FreeDOS
...,...,...,...,...,...,...,...,...,...,...
242,-1.088144,0.860127,0.267989,-0.102794,-0.617290,Intel Core i7-1255U,DDR4,Intel Iris Xe Graphics,1920x1080,FreeDOS
243,1.407287,-1.198235,-1.173003,-0.040166,1.624044,Intel Core Ultra 9-185H,DDR5,Intel Arc Graphics,2880x1800,Windows 11 Home
244,-1.088144,0.860127,0.267989,-0.096023,-0.844372,AMD Ryzen 5-7520U,DDR5,AMD Radeon 610M,1920x1080,Windows 11 Home
245,1.407287,-1.202271,-1.803437,-0.043551,1.130361,AMD Ryzen AI 9 HX-370,DDR5,AMD Radeon 890M,2880x1800,Windows 11 Home


In [11]:
df_transformed = pd.get_dummies(df_transformed, columns=['Procesador', 'Tipo RAM', 'Graficos', 'Resolucion', 'Sistema Operativo'])
df_transformed

Unnamed: 0,RAM,Almacenamiento,Pantalla,Bateria,Precio,Procesador_AMD 3000-3020E,Procesador_AMD Ryzen 3-7320U,Procesador_AMD Ryzen 3-7330U,Procesador_AMD Ryzen 5 PRO-7530U,Procesador_AMD Ryzen 5-5500U,...,Sistema Operativo_Windows 10 Home,Sistema Operativo_Windows 10 Home en modo S,Sistema Operativo_Windows 10 Pro,Sistema Operativo_Windows 11,Sistema Operativo_Windows 11 Home,Sistema Operativo_Windows 11 Home in S mode,Sistema Operativo_Windows 11 Pro,Sistema Operativo_macOS Monterey,Sistema Operativo_macOS Sequoia,Sistema Operativo_macOS Sonoma
0,-1.088144,0.860127,0.267989,-0.048629,-0.906083,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,1.407287,-1.202271,1.799043,-0.026625,0.714939,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,False
2,1.407287,-1.202271,0.718299,-0.048629,0.231993,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,-0.256333,0.860127,0.267989,-0.048629,-0.398255,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,1.407287,-1.202271,1.799043,-0.026625,0.231993,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
242,-1.088144,0.860127,0.267989,-0.102794,-0.617290,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
243,1.407287,-1.198235,-1.173003,-0.040166,1.624044,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,False
244,-1.088144,0.860127,0.267989,-0.096023,-0.844372,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,False
245,1.407287,-1.202271,-1.803437,-0.043551,1.130361,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,False
