In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.impute import SimpleImputer
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PowerTransformer

In [2]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [3]:
%cd /content/drive/MyDrive/

/content/drive/MyDrive


In [4]:
file_path = '/content/drive/MyDrive/Laptop_price.csv'
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,Brand,Processor_Speed,RAM_Size,Storage_Capacity,Screen_Size,Weight,Price
0,Asus,3.830296,16,512,11.185147,2.641094,17395.093065
1,Acer,2.912833,4,1000,11.311372,3.260012,31607.605919
2,Lenovo,3.241627,4,256,11.853023,2.029061,9291.023542
3,Acer,3.806248,16,512,12.28036,4.573865,17436.728334
4,Acer,3.268097,32,1000,14.990877,4.193472,32917.990718


In [5]:
y = df['Price']
X = df.drop('Price', axis=1)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
categorical_features = ['Brand']
numeric_features = ['Processor_Speed', 'RAM_Size', 'Storage_Capacity', 'Screen_Size', 'Weight']

In [7]:
print("Размер обучающего набора X_train:", X_train.shape)
print("Размер тестового набора X_test:", X_test.shape)
print("Размер обучающего набора y_train:", y_train.shape)
print("Размер тестового набора y_test:", y_test.shape)
print("Категориальные признаки:", categorical_features)
print("Количественные признаки:", numeric_features)

Размер обучающего набора X_train: (800, 6)
Размер тестового набора X_test: (200, 6)
Размер обучающего набора y_train: (800,)
Размер тестового набора y_test: (200,)
Категориальные признаки: ['Brand']
Количественные признаки: ['Processor_Speed', 'RAM_Size', 'Storage_Capacity', 'Screen_Size', 'Weight']


In [8]:
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder())
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)

pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                          ('clf', GradientBoostingRegressor())])

In [10]:
param_grid = {
    'clf__n_estimators': [150, 250, 350],
    'clf__learning_rate': [0.03, 0.3, 1],
    'clf__max_depth': [4, 6, 8],
    'clf__max_features': [1, 'sqrt', 'log2']
}

clf = GridSearchCV(pipeline, param_grid, cv=6)
clf.fit(X_train, y_train)

best_model = clf.best_estimator_
best_params = clf.best_params_
print("Лучшие параметры модели:", best_params)

Лучшие параметры модели: {'clf__learning_rate': 0.03, 'clf__max_depth': 4, 'clf__max_features': 'sqrt', 'clf__n_estimators': 350}
