In [34]:
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
import warnings


In [35]:
warnings.filterwarnings("ignore")

In [36]:
df = sns.load_dataset('diamonds')

In [37]:
df.drop(['depth', 'table', 'x', 'y', 'z'], axis=1, inplace=True)
df = pd.get_dummies(df, drop_first=True)

df['carat'] = np.log(1+df['carat'])
df['price'] = np.log(1+df['price'])


In [38]:
X = df.drop(columns="price")
y = df["price"]

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)


In [40]:

# Создание модели SGDRegressor
model = SGDRegressor()

# Определение сетки параметров
param_grid = {
    'loss': ['squared_loss', 'epsilon_insensitive'],
    'penalty': ['elasticnet'],
    'alpha': np.logspace(-3, 3, 10),
    'l1_ratio': np.linspace(0, 1, 10),
    'learning_rate': ['constant'],
    'eta0': np.logspace(-4, -1, 4)
}

# Подбор оптимальных параметров с помощью GridSearchCV
grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Вывод наилучших параметров
print("Наилучшие параметры:", grid_search.best_params_)

Наилучшие параметры: {'alpha': 0.001, 'eta0': 0.001, 'l1_ratio': 0.1111111111111111, 'learning_rate': 'constant', 'loss': 'epsilon_insensitive', 'penalty': 'elasticnet'}


In [43]:
sgd = SGDRegressor(alpha=0.001, 
                   eta0=0.01, 
                   l1_ratio=0, 
                   learning_rate='constant', 
                   loss='epsilon_insensitive', 
                   penalty='elasticnet',
                   random_state=42)

sgd.fit(X_train, y_train)
y_pred = sgd.predict(X_test)

print('MSE {:.3f}'.format(metrics.mean_squared_error(y_test, y_pred)))

MSE 0.045


In [42]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

parameters = {
    "loss": ["squared_error", "epsilon_insensitive"],
    "penalty": ["elasticnet"],
    "alpha": np.logspace(-3, 3, 10),
    "l1_ratio": np.linspace(0, 1, 10),
    "learning_rate": ["constant"],
    "eta0": np.logspace(-4, -1, 4)
}

sgd = SGDRegressor(random_state=42)
sgd_cv = GridSearchCV(estimator=sgd, param_grid=parameters, n_jobs=-1)
sgd_cv.fit(X_train, y_train)

print(sgd_cv.best_params_)

sgd = SGDRegressor(**sgd_cv.best_params_, random_state = 42)

sgd.fit(X_train, y_train)
sgd.score(X_train, y_train) # r2
ls = sgd.predict(X_test)

round(metrics.mean_squared_error(y_test, ls), 3)

{'alpha': 0.001, 'eta0': 0.001, 'l1_ratio': 0.0, 'learning_rate': 'constant', 'loss': 'epsilon_insensitive', 'penalty': 'elasticnet'}


0.044