In [1]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import warnings

warnings.filterwarnings('ignore')

import lightgbm as lgb
import xgboost as xgb

import optuna
import ray
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, StandardScaler
from sklearn.metrics import (
    confusion_matrix,
    accuracy_score,
    roc_auc_score,
    balanced_accuracy_score,
)
from imblearn.over_sampling import RandomOverSampler, SMOTE, SMOTENC
from sklearn.utils import compute_class_weight, class_weight
from sklearn.manifold import Isomap
from typing import Tuple
from scipy.special import expit
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.manifold import Isomap

In [2]:
train = pd.read_csv('train.csv')
train['EJ'].replace(['A', 'B'], [1, 0], inplace=True)

ej = np.array(train['EJ']).reshape(-1, 1)

sample_submission = pd.read_csv('sample_submission.csv')

y = train['Class']

In [3]:
scaler = StandardScaler()

x_numerical_columns = train.drop(columns=['Id', 'Class', 'EJ']).columns.tolist()
x_categorical_columns = ['EJ']
x_cols = x_numerical_columns + x_categorical_columns

scaler.fit(train[x_numerical_columns])

X = scaler.transform(train[x_numerical_columns])
X = np.concatenate((X, ej), axis=1)

In [4]:
from sklearn.impute import KNNImputer

knn = KNNImputer()
knn.fit(X)

X = knn.fit_transform(X)

X = pd.DataFrame(X, columns=x_cols)
X['EJ'] = X['EJ'].astype('int')

In [5]:
outlier_df = X[X>10].dropna(how='all').dropna(how='all', axis=1)

outlier_index = outlier_df.loc[(y==0)].index.tolist()

X = X.drop(index=outlier_index).reset_index(drop=True)
y = y.drop(index=outlier_index).reset_index(drop=True)

X['EJ'] = X['EJ'].astype('category')

In [6]:
def balancedlogloss_lgb(
    predt: np.ndarray, dtrain: lgb.Dataset
) -> Tuple[np.ndarray, np.ndarray]:
    y = dtrain.get_label()
    n0 = len(y[y == 0])
    n1 = len(y[y == 1])

    p = expit(predt)
    p[p == 0] = 1e-15

    grad = 1 / 2 * ((1 - y) / (1 - p) - y / p)
    hess = 1 / 2 * ((1 - y) / ((1 - p) ** 2) + y / (p**2))
    return grad, hess

def balancedlogloss_xgb(
    predt: np.ndarray, dtrain: xgb.DMatrix
) -> Tuple[np.ndarray, np.ndarray]:
    y = dtrain.get_label()
    n0 = len(y[y == 0])
    n1 = len(y[y == 1])

    p = expit(predt)
    p[p == 0] = 1e-15

    grad = 1 / 2 * ((1 - y) / (1 - p) - y / p)
    hess = 1 / 2 * ((1 - y) / ((1 - p) ** 2) + y / (p**2))
    return grad, hess


def balancedlogloss_eval_lgb(
    predt: np.ndarray, dtrain: lgb.Dataset
) -> Tuple[np.ndarray, np.ndarray]:
    y = dtrain.get_label()
    n0 = len(y[y == 0])
    n1 = len(y[y == 1])
    p = expit(predt)

    p[p == 0] = 1e-15

    return (
        'balanced_logloss',
        (-1/ n0 * (sum((1 - y) * np.log(1 - p))) - 1 / n1 * (sum(y * np.log(p)))) / 2,
        False
    )

def balancedlogloss_eval_xgb(
    predt: np.ndarray, dtrain: lgb.Dataset
) -> Tuple[np.ndarray, np.ndarray]:
    y = dtrain.get_label()
    n0 = len(y[y == 0])
    n1 = len(y[y == 1])
    p = expit(predt)

    p[p == 0] = 1e-15

    return (
        'balanced_logloss',
        (-1 / n0 * (sum((1 - y) * np.log(1 - p))) - 1 / n1 * (sum(y * np.log(p)))) / 2,
    )

def score(p, y):

    p[p == 0] = 1e-15

    n0 = len(y[y == 0])
    n1 = len(y[y == 1])

    return ((-1/ n0 * (sum((1 - y) * np.log(1 - p))) - 1 / n1 * (sum(y * np.log(p)))) / 2)

In [7]:
def get_trials_df(trials_dataframe):
    col_index = [1] + [i for i in range(5, trials_dataframe.shape[1]-1)]

    trials_dataframe = trials_dataframe.iloc[:, col_index]
    trials_dataframe = trials_dataframe.groupby(trials_dataframe.columns.tolist()[1:]).mean()

    trials_dataframe = trials_dataframe.sort_values(by=['value'], ascending=True)

    return trials_dataframe

In [8]:
""" def xgb_objective(trial):

    xgb_params = {
        'learning_rate': 0.1,
        'min_child_weight': trial.suggest_categorical('min_child_weight', [i for i in range(8, 15)]),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.3, 1, step=0.05),
        'reg_alpha': trial.suggest_float('reg_alpha', 3.5, 4.5, step=0.1),
        'max_depth': trial.suggest_categorical('max_depth', [8, 10, 12]),
        'max_delta_step': 4,
        'subsample': trial.suggest_float('subsample', 0.2, 1, step=0.1),
        'colsample_bytree': trial.suggest_categorical('colsample_bytree', [0.08, 0.1, 0.12, 0.18, 0.2]),
        'disable_default_eval_metric': True, 
        'seed': 5,
    }

    kf = StratifiedKFold(10, shuffle=True, random_state=30)
    cols = X.columns.tolist()

    xgb_scores = []
    
    for train_index, test_index in kf.split(X, y):
        X_train_val, X_test = X.loc[train_index], X.loc[test_index]
        y_train_val, y_test = y.loc[train_index], y.loc[test_index]

        X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.05, 
                                                          stratify=y_train_val, random_state=32)

        sampler = RandomOverSampler()
        X_train, y_train = sampler.fit_resample(X_train, y_train)

        n_components = 3
        isomap = Isomap(n_components=n_components)
        isomap.fit(X_train)

        x_isomap_train = isomap.transform(X_train)
        x_isomap_test = isomap.transform(X_test)
        x_isomap_val = isomap.transform(X_val)

        x_isomap_train = pd.DataFrame(x_isomap_train, columns=['isomap_' + str(i) for i in range(n_components)], index=X_train.index)
        x_isomap_test = pd.DataFrame(x_isomap_test, columns=['isomap_' + str(i) for i in range(n_components)], index=X_test.index)
        x_isomap_val = pd.DataFrame(x_isomap_val, columns=['isomap_' + str(i) for i in range(n_components)], index=X_val.index)

        X_train = pd.concat([X_train, x_isomap_train], axis=1)
        X_test = pd.concat([X_test, x_isomap_test], axis=1)
        X_val = pd.concat([X_val, x_isomap_val], axis=1)
        cols = X_train.columns.tolist()

        dtrain_xgb = xgb.DMatrix(X_train, y_train, feature_names=cols, enable_categorical=True)
        dtest_xgb = xgb.DMatrix(X_test, y_test, feature_names=cols, enable_categorical=True)
        dval_xgb = xgb.DMatrix(X_val, y_val, feature_names=cols, enable_categorical=True)

        xgb_model = xgb.train(params=xgb_params,
                            dtrain=dtrain_xgb,
                            verbose_eval=False,
                            obj=balancedlogloss_xgb,
                            evals=[(dtrain_xgb, 'train'), (dval_xgb, 'validation')],
                            feval=balancedlogloss_eval_xgb,
                            num_boost_round=300,
                            early_stopping_rounds=10,
                            )

        xgb_test_preds = expit(xgb_model.predict(dtest_xgb, output_margin=True))

        xgb_score = score(xgb_test_preds, y_test)
        xgb_scores = xgb_scores + [xgb_score]

    if np.isnan(np.mean(xgb_scores)):
        raise optuna.exceptions.TrialPruned()
    
    return np.mean(xgb_scores)

pruner = optuna.pruners.MedianPruner(n_warmup_steps=5)
xgb_study = optuna.create_study(direction='minimize', pruner=pruner)
xgb_study.optimize(xgb_objective, n_trials=50)

xgb_trials_dataframe = xgb_study.trials_dataframe()
get_trials_df(xgb_trials_dataframe)
 """

" def xgb_objective(trial):\n\n    xgb_params = {\n        'learning_rate': 0.1,\n        'min_child_weight': trial.suggest_categorical('min_child_weight', [i for i in range(8, 15)]),\n        'reg_lambda': trial.suggest_float('reg_lambda', 0.3, 1, step=0.05),\n        'reg_alpha': trial.suggest_float('reg_alpha', 3.5, 4.5, step=0.1),\n        'max_depth': trial.suggest_categorical('max_depth', [8, 10, 12]),\n        'max_delta_step': 4,\n        'subsample': trial.suggest_float('subsample', 0.2, 1, step=0.1),\n        'colsample_bytree': trial.suggest_categorical('colsample_bytree', [0.08, 0.1, 0.12, 0.18, 0.2]),\n        'disable_default_eval_metric': True, \n        'seed': 5,\n    }\n\n    kf = StratifiedKFold(10, shuffle=True, random_state=30)\n    cols = X.columns.tolist()\n\n    xgb_scores = []\n    \n    for train_index, test_index in kf.split(X, y):\n        X_train_val, X_test = X.loc[train_index], X.loc[test_index]\n        y_train_val, y_test = y.loc[train_index], y.loc[tes

In [16]:
def lgb_objective(trial):

    lgb_params = {
        'learning_rate': 0.1,
        'min_child_weight': trial.suggest_categorical('min_child_weight', [1, 5, 10, 20]),
        'lambda_l2': trial.suggest_int('lambda_l2', 0, 40, step=10),
        'lambda_l1': trial.suggest_int('lambda_l1', 0, 40, step=10),
        'max_depth': trial.suggest_categorical('max_depth', [2, 5, 10, 20]),
        'max_delta_step': 4,
        'subsample': trial.suggest_float('subsample', 0.1, 1, step=0.1),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.1, 0.5, step=0.1),
        'random_seed': 5,
        'first_metric_only': True,
        'verbosity': -1,
    }

    kf = StratifiedKFold(10, shuffle=True, random_state=30)
    cols = X.columns.tolist()

    lgb_scores = []
    
    for train_index, test_index in kf.split(X, y):
        X_train_val, X_test = X.loc[train_index], X.loc[test_index]
        y_train_val, y_test = y.loc[train_index], y.loc[test_index]

        X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.05, 
                                                          stratify=y_train_val, random_state=32)

        sampler = RandomOverSampler()
        X_train, y_train = sampler.fit_resample(X_train, y_train)

        try:
            n_components = 3
            isomap = Isomap(n_components=n_components)
            isomap.fit(X_train)

            x_isomap_train = isomap.transform(X_train)
            x_isomap_test = isomap.transform(X_test)
            x_isomap_val = isomap.transform(X_val)

            x_isomap_train = pd.DataFrame(x_isomap_train, columns=['isomap_' + str(i) for i in range(n_components)], index=X_train.index)
            x_isomap_test = pd.DataFrame(x_isomap_test, columns=['isomap_' + str(i) for i in range(n_components)], index=X_test.index)
            x_isomap_val = pd.DataFrame(x_isomap_val, columns=['isomap_' + str(i) for i in range(n_components)], index=X_val.index)

            X_train = pd.concat([X_train, x_isomap_train], axis=1)
            X_test = pd.concat([X_test, x_isomap_test], axis=1)
            X_val = pd.concat([X_val, x_isomap_val], axis=1)
        
        except ValueError as e:
            print("An error occurred during Isomap fitting or transforming, skipping this fold. Error: ", str(e))


        cols = X_train.columns.tolist()

        dtrain_lgb = lgb.Dataset(X_train, y_train)
        dtest_lgb = lgb.Dataset(X_test, y_test)
        dval_lgb = lgb.Dataset(X_val, y_val)

        lgb_evals = {}
        lgb_model = lgb.train(params=lgb_params,
                            train_set=dtrain_lgb,
                            valid_sets=[dtrain_lgb, dval_lgb],
                            fobj=balancedlogloss_lgb,
                            feval=balancedlogloss_eval_lgb,
                            num_boost_round=100,
                            early_stopping_rounds=2,
                            verbose_eval=False,
                            evals_result=lgb_evals,
                            )

        lgb_test_preds = expit(lgb_model.predict(X_test, raw_score=True))

        lgb_score = score(lgb_test_preds, y_test)
        lgb_scores = lgb_scores + [lgb_score]
        
    if np.isnan(np.mean(lgb_scores)):
        raise optuna.exceptions.TrialPruned()

    return np.mean(lgb_scores)

pruner = optuna.pruners.MedianPruner(n_warmup_steps=5)
lgb_study = optuna.create_study(direction='minimize', pruner=pruner)
lgb_study.optimize(lgb_objective, n_trials=50)

lgb_trials_dataframe = lgb_study.trials_dataframe()
get_trials_df(lgb_trials_dataframe)


[I 2023-06-28 17:40:27,869] A new study created in memory with name: no-name-e01a8a75-6381-4dbd-8432-dba72093598a


[I 2023-06-28 17:40:41,888] Trial 0 finished with value: 0.49466552714659606 and parameters: {'min_child_weight': 20, 'lambda_l2': 20, 'lambda_l1': 20, 'max_depth': 2, 'subsample': 0.1, 'colsample_bytree': 0.1}. Best is trial 0 with value: 0.49466552714659606.
[I 2023-06-28 17:40:58,532] Trial 1 finished with value: 0.41596755737994046 and parameters: {'min_child_weight': 1, 'lambda_l2': 30, 'lambda_l1': 20, 'max_depth': 2, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.30000000000000004}. Best is trial 1 with value: 0.41596755737994046.
[I 2023-06-28 17:41:17,174] Trial 2 finished with value: 0.36224269200502685 and parameters: {'min_child_weight': 1, 'lambda_l2': 20, 'lambda_l1': 10, 'max_depth': 10, 'subsample': 0.2, 'colsample_bytree': 0.30000000000000004}. Best is trial 2 with value: 0.36224269200502685.
[I 2023-06-28 17:41:34,358] Trial 3 finished with value: 0.3303467169231117 and parameters: {'min_child_weight': 5, 'lambda_l2': 30, 'lambda_l1': 10, 'max_depth': 5, 'sub

In [10]:
''' lgb_params = {'learning_rate': 0.1,
              'lambda_l1': 30,
              'lambda_l2': 10,
              'verbosity': -1,
              'first_metric_only': True
              }


lgb_scores = []
lgb_train_df = pd.DataFrame()
lgb_eval_df = pd.DataFrame()

for train_index, test_index in kf.split(X, y):
    X_train_val, X_test = X.loc[train_index], X.loc[test_index]
    y_train_val, y_test = y.loc[train_index], y.loc[test_index]

    X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.03, 
                                                        stratify=y_train_val, random_state=32)

    sampler = RandomOverSampler()
    X_train, y_train = sampler.fit_resample(X_train, y_train)

    n_components = 10
    isomap = Isomap(n_components=n_components)
    isomap.fit(X_train)

    x_isomap_train = isomap.transform(X_train)
    x_isomap_test = isomap.transform(X_test)
    x_isomap_val = isomap.transform(X_val)

    x_isomap_train = pd.DataFrame(x_isomap_train, columns=['isomap_' + str(i) for i in range(n_components)], index=X_train.index)
    x_isomap_test = pd.DataFrame(x_isomap_test, columns=['isomap_' + str(i) for i in range(n_components)], index=X_test.index)
    x_isomap_val = pd.DataFrame(x_isomap_val, columns=['isomap_' + str(i) for i in range(n_components)], index=X_val.index)

    X_train = pd.concat([X_train, x_isomap_train], axis=1)
    X_test = pd.concat([X_test, x_isomap_test], axis=1)
    X_val = pd.concat([X_val, x_isomap_val], axis=1)
    cols = X_train.columns.tolist()

    dtrain_lgb = lgb.Dataset(X_train, y_train)
    dtest_lgb = lgb.Dataset(X_test, y_test)
    dval_lgb = lgb.Dataset(X_val, y_val)

    lgb_evals = {}
    lgb_model = lgb.train(params=lgb_params,
                        train_set=dtrain_lgb,
                        valid_sets=[dtrain_lgb, dval_lgb],
                        valid_names=['train', 'eval'],
                        fobj=balancedlogloss_lgb,
                        feval=balancedlogloss_eval_lgb,
                        num_boost_round=200,
                        early_stopping_rounds=5,
                        verbose_eval=False,
                        evals_result=lgb_evals,
                        )

    lgb_test_preds = expit(lgb_model.predict(X_test, raw_score=True))

    lgb_score = score(lgb_test_preds, y_test)
    lgb_scores = lgb_scores + [lgb_score]

    lgb_train_df = pd.concat([lgb_train_df, pd.DataFrame(lgb_evals['train'])], axis=1)
    lgb_eval_df = pd.concat([lgb_eval_df, pd.DataFrame(lgb_evals['eval'])], axis=1)

lgb_train_df = lgb_train_df.mean(axis=1)
lgb_eval_df = lgb_eval_df.mean(axis=1, skipna=False)

print(np.mean(lgb_scores))
# pd.concat([lgb_train_df, lgb_eval_df], axis=1) '''

" lgb_params = {'learning_rate': 0.1,\n              'lambda_l1': 30,\n              'lambda_l2': 10,\n              'verbosity': -1,\n              'first_metric_only': True\n              }\n\n\nlgb_scores = []\nlgb_train_df = pd.DataFrame()\nlgb_eval_df = pd.DataFrame()\n\nfor train_index, test_index in kf.split(X, y):\n    X_train_val, X_test = X.loc[train_index], X.loc[test_index]\n    y_train_val, y_test = y.loc[train_index], y.loc[test_index]\n\n    X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.03, \n                                                        stratify=y_train_val, random_state=32)\n\n    sampler = RandomOverSampler()\n    X_train, y_train = sampler.fit_resample(X_train, y_train)\n\n    n_components = 10\n    isomap = Isomap(n_components=n_components)\n    isomap.fit(X_train)\n\n    x_isomap_train = isomap.transform(X_train)\n    x_isomap_test = isomap.transform(X_test)\n    x_isomap_val = isomap.transform(X_val)\n\n    x_iso

In [11]:
xgb_param = study.best_params
xgb_param['learning_rate'] = 0.1
xgb_param['max_delta_step'] = 4
xgb_param['seed'] = 5
xgb_param['disable_default_eval_metric'] = True

lgb_params = {'learning_rate': 0.1,
              'lambda_l1': 0.1,
              'verbosity': -1,
              'first_metric_only': True
              }


kf = StratifiedKFold(10, shuffle=True, random_state=30)
cols = X.columns.tolist()

df_xgb_train, df_xgb_test = pd.DataFrame(), pd.DataFrame()
df_lgb_train, df_lgb_test = pd.DataFrame(), pd.DataFrame()

xgb_scores = []
lgb_scores = []
scores = []

for train_index, test_index in kf.split(X, y):

    X_train_val, X_test = X.loc[train_index], X.loc[test_index]
    y_train_val, y_test = y.loc[train_index], y.loc[test_index]

    X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, stratify=y_train_val, test_size=0.05, random_state=32)

    sampler = RandomOverSampler()
    X_train, y_train = sampler.fit_resample(X_train, y_train)

    n_components = 3
    isomap = Isomap(n_components=n_components)
    isomap.fit(X_train)

    x_isomap_train = isomap.transform(X_train)
    x_isomap_test = isomap.transform(X_test)
    x_isomap_val = isomap.transform(X_val)

    x_isomap_train = pd.DataFrame(x_isomap_train, columns=['isomap_' + str(i) for i in range(n_components)], index=X_train.index)
    x_isomap_test = pd.DataFrame(x_isomap_test, columns=['isomap_' + str(i) for i in range(n_components)], index=X_test.index)
    x_isomap_val = pd.DataFrame(x_isomap_val, columns=['isomap_' + str(i) for i in range(n_components)], index=X_val.index)

    X_train = pd.concat([X_train, x_isomap_train], axis=1)
    X_test = pd.concat([X_test, x_isomap_test], axis=1)
    X_val = pd.concat([X_val, x_isomap_val], axis=1)
    cols = X_train.columns.tolist()

    evals_xgb = {}
    dtrain_xgb = xgb.DMatrix(X_train, y_train, feature_names=cols, enable_categorical=True)
    dtest_xgb = xgb.DMatrix(X_test, y_test, feature_names=cols, enable_categorical=True)
    dval_xgb = xgb.DMatrix(X_val, y_val, feature_names=cols, enable_categorical=True)

    xgb_model = xgb.train(params=xgb_param,
                          dtrain=dtrain_xgb,
                          obj=balancedlogloss_xgb,
                          verbose_eval=False,
                          evals=[(dtrain_xgb, 'train'), (dval_xgb, 'val')],
                          feval=balancedlogloss_eval_xgb,
                          evals_result=evals_xgb,
                          early_stopping_rounds=10,
                          num_boost_round=300,
                          )
    
    df_xgb_train = pd.concat([df_xgb_train, pd.Series(evals_xgb['train']['balanced_logloss'])], axis=1)
    df_xgb_test = pd.concat([df_xgb_test, pd.Series(evals_xgb['val']['balanced_logloss'])], axis=1)

    xgb_train_preds = expit(xgb_model.predict(dtrain_xgb, output_margin=True))
    xgb_test_preds = expit(xgb_model.predict(dtest_xgb, output_margin=True))

    xgb_score = score(xgb_test_preds, y_test)
    xgb_scores = xgb_scores + [xgb_score]
    print(xgb_score)

    evals_lgb = {}
    dtrain_lgb = lgb.Dataset(X_train, y_train)
    dtest_lgb = lgb.Dataset(X_test, y_test)
    dval_lgb = lgb.Dataset(X_val, y_val)

    lgb_model = lgb.train(params=lgb_param,
                          train_set=dtrain_lgb,
                          valid_sets=[dtrain_lgb, dval_lgb],
                          fobj=balancedlogloss_lgb,
                          feval=balancedlogloss_eval_lgb,
                          evals_result=evals_lgb,
                          valid_names=['train', 'val'],
                          num_boost_round=500,
                          verbose_eval=False)

    df_lgb_train = pd.concat([df_lgb_train, pd.Series(evals_lgb['train']['balanced_logloss'])], axis=1)
    df_lgb_test = pd.concat([df_lgb_test, pd.Series(evals_lgb['val']['balanced_logloss'])], axis=1)

    lgb_train_preds = expit(lgb_model.predict(X_train, raw_score=True))
    lgb_test_preds = expit(lgb_model.predict(X_test, raw_score=True))

    lgb_score = score(lgb_test_preds, y_test)
    lgb_scores = lgb_scores + [lgb_score]
    print(lgb_score)

    stacked_preds_train = np.column_stack(((expit(xgb_train_preds)), (expit(lgb_train_preds))))
    stacked_preds_test = np.column_stack(((expit(xgb_test_preds)), (expit(lgb_test_preds))))

    meta_model = LogisticRegression(C=10, random_state=20)
    # meta_model = xgb.XGBClassifier()
    meta_model.fit(stacked_preds_train, y_train)
    ensemble_preds = meta_model.predict_proba(stacked_preds_test)[:, 1]

    ensemble_score = score(ensemble_preds, np.array(y_test))
    scores = scores + [ensemble_score]
    print('ensemble: ' + str(ensemble_score))

df_xgb = pd.DataFrame()
df_xgb['train'] = df_xgb_train.mean(axis=1)
df_xgb['val'] = df_xgb_test.mean(axis=1)

df_lgb = pd.DataFrame()
df_lgb['train'] = df_lgb_train.mean(axis=1)
df_lgb['val'] = df_lgb_test.mean(axis=1)

print('\n')
print('xgb: ' + str(np.mean(xgb_scores)))
print('lgb: ' + str(np.mean(lgb_scores)))
print('ensemble:' + str(np.mean(scores)))

NameError: name 'study' is not defined