Градиентный бустинг

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier
from sklearn.metrics import roc_auc_score


data = pd.read_csv('/datasets/travel_insurance.csv')

features_train, features_valid, target_train, target_valid = train_test_split(
    data.drop('Claim', axis=1), data.Claim, test_size=0.25, random_state=12345)

cat_features = ['Agency', 'Agency Type', 'Distribution Channel',
                'Product Name', 'Destination', 'Gender']

model = CatBoostClassifier(loss_function='Logloss', iterations=50)

model.fit(features_train, target_train, cat_features=cat_features, verbose=10)

probabilities_valid = model.predict_proba(features_valid)
probabilities_one_valid = probabilities_valid[:, 1]
print(roc_auc_score(target_valid, probabilities_one_valid))

Градиентный спуск

In [None]:
import pandas as pd
from sklearn.linear_model import Ridge, Lasso, SGDRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Отключим уведомления
import warnings
warnings.filterwarnings('ignore')

# Загрузим данные
data = pd.read_csv('tennis.csv')

# Отделим целевой признак, подготовим выборки
X = data.drop('Выигрыш на подаче', axis=1)
y = data['Выигрыш на подаче']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Подберём силу регуляризации для Ridge
for i in [0, 0.1, 1, 5]:
    model = Ridge(alpha=i, solver='lsqr')
    model.fit(X_train, y_train)
    print(f'Коэффициент регуляризации {i}')
    print(mean_squared_error(y_test, model.predict(X_test)))

# Узнаем, сколько шагов требуется для нахождения решения
for i in range(1, 8):
    model = Ridge(alpha=0, solver='lsqr', max_iter=i)
    model.fit(X_train, y_train)
    print(f'Максимальное число шагов {i}')
    print(mean_squared_error(y_test, model.predict(X_test)))

# Подберём значения параметров для Lasso
for i in [0, 0.1, 1, 5]:
    model = Lasso(alpha=i)
    model.fit(X_train, y_train)
    print(f'Коэффициент регуляризации {i}')
    print(mean_squared_error(y_test, model.predict(X_test)))

# Подберём значения параметров для SGDRegressor
for i in ['l1', 'l2']:
    for j in [0, 0.1, 1, 5]:
        model = SGDRegressor(penalty=i, alpha=j, max_iter=100, tol=0.0001, n_iter_no_change=10, early_stopping=False)
        model.fit(X_train, y_train)
        print(f'Тип регуляризации {i}, сила регуляризации {j}')
        print(mean_squared_error(y_test, model.predict(X_test)))

# Изменим значение early_stopping
for i in ['l1', 'l2']:
    for j in [0, 0.1, 1, 5]:
        model = SGDRegressor(penalty=i, alpha=j, max_iter=100, tol=0.0001, n_iter_no_change=10, early_stopping=True)
        model.fit(X_train, y_train)
        print(f'Тип регуляризации {i}, сила регуляризации {j}')
        print(mean_squared_error(y_test, model.predict(X_test)))