In [1]:
import pandas as pd

from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

## Carregar os dados

In [2]:
laptop_price = pd.read_pickle("../datasets/processed/laptop_price.pkl")
laptop_price = laptop_price.drop(columns=['product', 'gpu', 'weight_kg', 'inches'])
laptop_price.head()

Unnamed: 0,company,typename,screen_resolution,cpu_manufacturer,cpu,gpu_manufacturer,ram_gb,type_storage,hdd_gb,ssd_gb,hybrid_gb,flash_storage_gb,opsys,price_euros
0,Apple,Ultrabook,2560x1600,Intel,Others Intel,Intel,8,ssd,0,128,0,0,macOS,1339.69
1,Apple,Ultrabook,1440x900,Intel,Others Intel,Intel,8,flash storage,0,0,0,128,macOS,898.94
2,HP,Notebook,1920x1080,Intel,Intel Core i5,Intel,8,ssd,0,256,0,0,Outros,575.0
3,Apple,Ultrabook,2880x1800,Intel,Others Intel,AMD,16,ssd,0,512,0,0,macOS,2537.45
4,Apple,Ultrabook,2560x1600,Intel,Others Intel,Intel,8,ssd,0,256,0,0,macOS,1803.6


## Dividir dados em treino e teste

In [3]:
y = laptop_price['price_euros']
X = laptop_price.drop(columns='price_euros')

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2) 

x_train.shape, y_train.shape, x_val.shape, y_val.shape, x_test.shape, y_test.shape

((833, 13), (833,), (209, 13), (209,), (261, 13), (261,))

## Normalizar dados

In [4]:
categorical_features = ['company', 'typename', 'screen_resolution', 'cpu_manufacturer', 'gpu_manufacturer']
one_hot_enconder = OneHotEncoder(handle_unknown="ignore", sparse=False)

preprocessor = ColumnTransformer(
    transformers=[
        ("ohe", one_hot_enconder, categorical_features)
    ]
)

models = {'RandomForestRegressor': RandomForestRegressor(n_estimators=200), 
          'KneighborsRegressor':KNeighborsRegressor(), 
          'SVC': SVR(), 
          'DecisionTreeRegressor': DecisionTreeRegressor()}


for name, current_model in models.items():
    model = Pipeline(
        steps=[
            ("preprocessor", preprocessor),
            ("classifier", current_model)
        ]
    )
    
    model.fit(x_train, y_train)
    score = model.score(x_val, y_val)

    print(f"{name}: {score}")

RandomForestRegressor: 0.5785655610755829
KneighborsRegressor: 0.5167587173420367
SVC: 0.0027147824854795166
DecisionTreeRegressor: 0.5780359481198679
