In [None]:
print("Create pipeline")

Create pipeline


In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [2]:
%cd /content/drive/MyDrive/pipe/mtuci_labs

/content/drive/MyDrive/pipe/mtuci_labs


In [42]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import (
    StandardScaler,
    RobustScaler,
    PowerTransformer,
    OneHotEncoder
  )
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import GridSearchCV
from xgboost import XGBRegressor


In [43]:
df = pd.read_csv('Laptop_price.csv')
y = df['Price'] # Price как ключевой
X = df.drop("Price", axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=12345)

In [44]:
num_columns = list(X_train.select_dtypes(exclude=['object']).columns)
cat_columns = list(X_train.select_dtypes(include=['object']).columns)

numerical = Pipeline(
    steps=[
        ('simple_imputer', SimpleImputer(strategy='constant')),
        ('power_transform', PowerTransformer()),
        ('Scaler', StandardScaler())
    ]
)

categorial = Pipeline(
    steps=[
        ('simple_imputer', SimpleImputer(strategy='most_frequent')),
        ('OneHotEncoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
    ]
)

ct = ColumnTransformer([
    ('numerical', numerical, num_columns),
    ('categorial', categorial, cat_columns)
])

In [45]:
pipe = Pipeline(steps=[
    ('ct', ct),
    ('XGBRegressor', XGBRegressor())
])

In [46]:
param_grid = {
    'XGBRegressor__n_estimators': [25],
    'XGBRegressor__learning_rate': [0,1],
    'XGBRegressor__max_depth': [6],
    'XGBRegressor__gamma': [2],
    'XGBRegressor__min_child_weight': [1],
    'ct__numerical': [StandardScaler(), RobustScaler()]
}

In [50]:
clf = GridSearchCV(pipe, param_grid, verbose=3)
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

model = clf.best_estimator_
clf.best_params_

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV 1/5] END XGBRegressor__gamma=2, XGBRegressor__learning_rate=0, XGBRegressor__max_depth=6, XGBRegressor__min_child_weight=1, XGBRegressor__n_estimators=25, ct__numerical=StandardScaler();, score=-0.013 total time=   0.1s
[CV 2/5] END XGBRegressor__gamma=2, XGBRegressor__learning_rate=0, XGBRegressor__max_depth=6, XGBRegressor__min_child_weight=1, XGBRegressor__n_estimators=25, ct__numerical=StandardScaler();, score=-0.006 total time=   0.1s
[CV 3/5] END XGBRegressor__gamma=2, XGBRegressor__learning_rate=0, XGBRegressor__max_depth=6, XGBRegressor__min_child_weight=1, XGBRegressor__n_estimators=25, ct__numerical=StandardScaler();, score=-0.002 total time=   0.1s
[CV 4/5] END XGBRegressor__gamma=2, XGBRegressor__learning_rate=0, XGBRegressor__max_depth=6, XGBRegressor__min_child_weight=1, XGBRegressor__n_estimators=25, ct__numerical=StandardScaler();, score=-0.016 total time=   0.1s
[CV 5/5] END XGBRegressor__gamma=2, XGBRegre

{'XGBRegressor__gamma': 2,
 'XGBRegressor__learning_rate': 1,
 'XGBRegressor__max_depth': 6,
 'XGBRegressor__min_child_weight': 1,
 'XGBRegressor__n_estimators': 25,
 'ct__numerical': StandardScaler()}