In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

In [2]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.metrics import roc_auc_score
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
import seaborn as sns
from sklearn.model_selection import StratifiedKFold

In [3]:
def reduce_mem(df: pd.DataFrame):
    "This method reduces memory for numeric columns in the dataframe";

    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64', "uint16", "uint32", "uint64"];
    start_mem = df.memory_usage().sum() / 1024**2;

    for col in df.columns:
        col_type = df[col].dtypes

        if col_type in numerics:
            c_min = df[col].min();
            c_max = df[col].max();

            if "int" in str(col_type):
                if c_min >= np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min >= np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min >= np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min >= np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min >= np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                if c_min >= np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)  

    end_mem = df.memory_usage().sum() / 1024**2

    print(f"Start - end memory:- {start_mem:5.2f} - {end_mem:5.2f} Mb");
    return df;

In [4]:
train = pd.read_csv("./playground-series-s4e7/train.csv")
target = pd.read_csv("./playground-series-s4e7/test.csv")

In [5]:
target = pd.read_csv("./playground-series-s4e7/test.csv")

In [6]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11504798 entries, 0 to 11504797
Data columns (total 12 columns):
 #   Column                Dtype  
---  ------                -----  
 0   id                    int64  
 1   Gender                object 
 2   Age                   int64  
 3   Driving_License       int64  
 4   Region_Code           float64
 5   Previously_Insured    int64  
 6   Vehicle_Age           object 
 7   Vehicle_Damage        object 
 8   Annual_Premium        float64
 9   Policy_Sales_Channel  float64
 10  Vintage               int64  
 11  Response              int64  
dtypes: float64(3), int64(6), object(3)
memory usage: 1.0+ GB


In [7]:
train = train.drop(columns=["id"], axis=1)

In [8]:
target= target.drop(columns=["id"], axis=1)

In [9]:
category_columns = train.select_dtypes(include="object").columns
category_columns = category_columns.tolist()

def label_encoding(df):
    # Instance of LabelEncoder
    label_encoders = {col: LabelEncoder() for col in category_columns}

    # Label Encoding
    for col in category_columns:
    
        df[col] = label_encoders[col].fit_transform(df[col])

label_encoding(train)

In [10]:
category_columns = target.select_dtypes(include="object").columns
category_columns = category_columns.tolist()
def label_encoding(df):
    # Instance of LabelEncoder
    label_encoders = {col: LabelEncoder() for col in category_columns}

    # Label Encoding
    for col in category_columns:
    
        df[col] = label_encoders[col].fit_transform(df[col])
label_encoding(target)

In [13]:
train

Unnamed: 0,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,1,21,1,35.0,0,0,1,65101.0,124.0,187,0
1,1,43,1,28.0,0,2,1,58911.0,26.0,288,1
2,0,25,1,14.0,1,1,0,38043.0,152.0,254,0
3,0,35,1,1.0,0,0,1,2630.0,156.0,76,0
4,0,36,1,15.0,1,0,0,31951.0,152.0,294,0
...,...,...,...,...,...,...,...,...,...,...,...
11504793,1,48,1,6.0,0,0,1,27412.0,26.0,218,0
11504794,0,26,1,36.0,0,1,1,29509.0,152.0,115,1
11504795,0,29,1,32.0,1,1,0,2630.0,152.0,189,0
11504796,0,51,1,28.0,0,0,1,48443.0,26.0,274,1


In [14]:
target

Unnamed: 0,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage
0,0,20,1,47.0,0,1,0,2630.0,160.0,228
1,1,47,1,28.0,0,0,1,37483.0,124.0,123
2,1,47,1,43.0,0,0,1,2630.0,26.0,271
3,0,22,1,47.0,1,1,0,24502.0,152.0,115
4,1,51,1,19.0,0,0,0,34115.0,124.0,148
...,...,...,...,...,...,...,...,...,...,...
7669861,1,57,1,28.0,0,0,1,51661.0,124.0,109
7669862,1,28,1,50.0,1,1,0,25651.0,152.0,184
7669863,1,47,1,33.0,1,0,0,2630.0,138.0,63
7669864,1,30,1,28.0,0,1,1,38866.0,124.0,119


In [11]:
train = reduce_mem(train)

Start - end memory:- 833.86 - 230.41 Mb


In [12]:
target = reduce_mem(target)

Start - end memory:- 497.39 - 146.29 Mb


In [17]:
target

Unnamed: 0,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage
0,0,20,1,47.0,0,1,0,2630.0,160.0,228
1,1,47,1,28.0,0,0,1,37483.0,124.0,123
2,1,47,1,43.0,0,0,1,2630.0,26.0,271
3,0,22,1,47.0,1,1,0,24502.0,152.0,115
4,1,51,1,19.0,0,0,0,34115.0,124.0,148
...,...,...,...,...,...,...,...,...,...,...
7669861,1,57,1,28.0,0,0,1,51661.0,124.0,109
7669862,1,28,1,50.0,1,1,0,25651.0,152.0,184
7669863,1,47,1,33.0,1,0,0,2630.0,138.0,63
7669864,1,30,1,28.0,0,1,1,38866.0,124.0,119


In [13]:
b=train['Response'][train['Response']==1].count()

In [14]:
a=train['Response'][train['Response']==0].count()

In [15]:
b/len(train)

0.12299729208631043

In [16]:
a/len(train)

0.8770027079136896

In [17]:
train.isna().sum()

Gender                  0
Age                     0
Driving_License         0
Region_Code             0
Previously_Insured      0
Vehicle_Age             0
Vehicle_Damage          0
Annual_Premium          0
Policy_Sales_Channel    0
Vintage                 0
Response                0
dtype: int64

In [18]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [22]:
train.describe()

Unnamed: 0,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
count,11504798.0,11504798.0,11504798.0,11504798.0,11504798.0,11504798.0,11504798.0,11504798.0,11504798.0,11504798.0,11504798.0
mean,0.541,38.384,0.998,26.419,0.463,0.522,0.503,30461.359,112.425,163.898,0.123
std,0.498,14.993,0.044,12.992,0.499,0.577,0.5,16454.744,54.036,79.98,0.328
min,0.0,20.0,0.0,0.0,0.0,0.0,0.0,2630.0,1.0,10.0,0.0
25%,0.0,24.0,1.0,15.0,0.0,0.0,0.0,25277.0,29.0,99.0,0.0
50%,1.0,36.0,1.0,28.0,0.0,0.0,1.0,31824.0,151.0,166.0,0.0
75%,1.0,49.0,1.0,35.0,1.0,1.0,1.0,39451.0,152.0,232.0,0.0
max,1.0,85.0,1.0,52.0,1.0,2.0,1.0,540165.0,163.0,299.0,1.0


In [23]:
train

Unnamed: 0,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,1,21,1,35.000,0,0,1,65101.000,124.000,187,0
1,1,43,1,28.000,0,2,1,58911.000,26.000,288,1
2,0,25,1,14.000,1,1,0,38043.000,152.000,254,0
3,0,35,1,1.000,0,0,1,2630.000,156.000,76,0
4,0,36,1,15.000,1,0,0,31951.000,152.000,294,0
...,...,...,...,...,...,...,...,...,...,...,...
11504793,1,48,1,6.000,0,0,1,27412.000,26.000,218,0
11504794,0,26,1,36.000,0,1,1,29509.000,152.000,115,1
11504795,0,29,1,32.000,1,1,0,2630.000,152.000,189,0
11504796,0,51,1,28.000,0,0,1,48443.000,26.000,274,1


In [24]:
target

Unnamed: 0,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage
0,0,20,1,47.000,0,1,0,2630.000,160.000,228
1,1,47,1,28.000,0,0,1,37483.000,124.000,123
2,1,47,1,43.000,0,0,1,2630.000,26.000,271
3,0,22,1,47.000,1,1,0,24502.000,152.000,115
4,1,51,1,19.000,0,0,0,34115.000,124.000,148
...,...,...,...,...,...,...,...,...,...,...
7669861,1,57,1,28.000,0,0,1,51661.000,124.000,109
7669862,1,28,1,50.000,1,1,0,25651.000,152.000,184
7669863,1,47,1,33.000,1,0,0,2630.000,138.000,63
7669864,1,30,1,28.000,0,1,1,38866.000,124.000,119


In [26]:
condition = train[train['Response']==0]

In [27]:
Q1 = condition.quantile(0.25)
Q3 = condition.quantile(0.75)
IQR = Q3 - Q1

In [28]:
no_outliers = condition[~((condition < (Q1 - 1.5 * IQR)) | (condition > (Q3 + 1.5 * IQR))).any(axis=1)]

In [29]:
no_outliers

Unnamed: 0,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
2,0,25,1,14.000,1,1,0,38043.000,152.000,254,0
4,0,36,1,15.000,1,0,0,31951.000,152.000,294,0
5,0,31,1,47.000,1,1,0,28150.000,152.000,197,0
6,1,23,1,45.000,1,1,0,27128.000,152.000,190,0
8,0,26,1,28.000,1,1,0,31639.000,152.000,36,0
...,...,...,...,...,...,...,...,...,...,...,...
11504787,0,22,1,13.000,0,1,1,31484.000,152.000,103,0
11504789,1,27,1,8.000,1,1,0,53615.000,152.000,98,0
11504790,0,21,1,36.000,1,1,0,38097.000,152.000,275,0
11504792,1,34,1,28.000,1,0,0,29974.000,154.000,201,0


In [30]:
train_clean = pd.concat([train[train['Response'] != 0], no_outliers])

In [31]:
train_clean

Unnamed: 0,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
1,1,43,1,28.000,0,2,1,58911.000,26.000,288,1
7,0,47,1,8.000,0,0,1,40659.000,26.000,262,1
28,1,40,1,35.000,0,0,1,2630.000,157.000,204,1
36,0,50,1,46.000,0,0,1,29248.000,124.000,273,1
44,1,40,1,8.000,0,0,1,36217.000,124.000,286,1
...,...,...,...,...,...,...,...,...,...,...,...
11504787,0,22,1,13.000,0,1,1,31484.000,152.000,103,0
11504789,1,27,1,8.000,1,1,0,53615.000,152.000,98,0
11504790,0,21,1,36.000,1,1,0,38097.000,152.000,275,0
11504792,1,34,1,28.000,1,0,0,29974.000,154.000,201,0


In [32]:
condition2 = train_clean[train_clean['Response']==0]

In [33]:
condition2

Unnamed: 0,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
2,0,25,1,14.000,1,1,0,38043.000,152.000,254,0
4,0,36,1,15.000,1,0,0,31951.000,152.000,294,0
5,0,31,1,47.000,1,1,0,28150.000,152.000,197,0
6,1,23,1,45.000,1,1,0,27128.000,152.000,190,0
8,0,26,1,28.000,1,1,0,31639.000,152.000,36,0
...,...,...,...,...,...,...,...,...,...,...,...
11504787,0,22,1,13.000,0,1,1,31484.000,152.000,103,0
11504789,1,27,1,8.000,1,1,0,53615.000,152.000,98,0
11504790,0,21,1,36.000,1,1,0,38097.000,152.000,275,0
11504792,1,34,1,28.000,1,0,0,29974.000,154.000,201,0


In [34]:
df = train_clean.copy()
x= df.drop(columns=['Response'],axis=1)
y= df['Response']

In [35]:
x_train, x_val, y_train, y_val = train_test_split(x,y,test_size=0.1,random_state=0,stratify=y)

In [36]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score
from sklearn.metrics import f1_score, confusion_matrix, precision_recall_curve, roc_curve
from lightgbm import LGBMClassifier

In [37]:
def get_clf_eval(y_test, y_pred=None, pred_proba=None):
    confusion = confusion_matrix(y_test,y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    F1 = f1_score(y_test, y_pred)
    AUC = roc_auc_score(y_test, pred_proba)
    AU2 = roc_auc_score(y_test, y_pred.astype('float32'))
    
    print('\n정확도: {:.4f}'.format(accuracy))
    print('정밀도: {:.4f}'.format(precision))
    print('재현율: {:.4f}'.format(recall))
    print('F1: {:.4f}'.format(F1))
    print('AUC: {:.4f}'.format(AUC))
    print('AUC2 : {:.4f}'.format(AU2))

In [38]:
x_train

Unnamed: 0,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage
3708870,1,48,1,28.000,0,0,1,44719.000,124.000,238
1654968,0,25,1,33.000,1,1,0,27803.000,152.000,56
10001077,0,46,1,41.000,0,0,1,27302.000,124.000,100
9023312,1,27,1,30.000,0,1,1,32067.000,152.000,55
3865625,0,52,1,37.000,0,0,0,39432.000,124.000,269
...,...,...,...,...,...,...,...,...,...,...
2420826,0,47,1,28.000,1,0,0,44163.000,124.000,261
8360268,0,25,1,23.000,1,1,0,54966.000,152.000,83
11397977,1,36,1,28.000,0,0,1,36789.000,26.000,298
2343823,0,22,1,21.000,1,1,0,24338.000,152.000,116


In [31]:
y_train

5853872     0
4394468     0
10202146    1
4141629     0
10688668    1
           ..
5010815     0
3679955     0
10562844    0
10500569    0
3678888     0
Name: Response, Length: 10354318, dtype: int8

In [58]:
print("Best parameters found: ", best)

Best parameters found:  {'colsample_bytree': 0.8950326994698486, 'learning_rate': 0.01608853715084591, 'min_child_samples': 20.0, 'n_estimators': 773.0, 'num_leaves': 128.0, 'subsample': 0.6257147453238197}


In [40]:
best_params = {
    'n_estimators': int(773.0),
    'num_leaves': int(128.0),
    'learning_rate': 0.01608853715084591,
    'min_child_samples': int(20.0),
    'subsample': 0.6257147453238197,
    'colsample_bytree': 0.8950326994698486,
    'n_jobs': -1,
    'is_unbalance' : True,
    'boost_from_average': False,
    
    'verbose': 10
}


In [41]:
best_lgbm_clf = LGBMClassifier(**best_params)
best_lgbm_clf.fit(x_train, y_train)

[LightGBM] [Info] Number of positive: 1273553, number of negative: 5486407
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.212326
[LightGBM] [Debug] init for col-wise cost 0.000278 seconds, init for row-wise cost 0.093403 seconds
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Debug] Using Dense Multi-Val Bin
[LightGBM] [Info] Total Bins 720
[LightGBM] [Info] Number of data points in the train set: 6759960, number of used features: 10
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 18
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 19
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 18
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 18
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 19
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 16
[LightGBM] [Debug] Trained a tree with leaves = 128

[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 21
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 26
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 17
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 27
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 19
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 26
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 21
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 20
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 20
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 21
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 21
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 20
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 20
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 22
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth 

[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 17
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 19
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 19
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 19
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 19
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 21
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 23
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 23
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 20
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 19
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 25
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 23
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 21
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 27
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth 

[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 18
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 25
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 29
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 25
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 31
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 24
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 21
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 26
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 23
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 25
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 19
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 26
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 26
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 22
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth 

[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 21
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 23
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 28
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 23
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 24
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 20
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 21
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 30
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 31
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 20
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 25
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 18
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 24
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 22
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth 

[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 24
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 28
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 22
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 33
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 18
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 33
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 19
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 18
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 26
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 25
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 33
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 47
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 18
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 28
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth 

[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 23
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 19
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 21
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 27
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 20
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 24
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 20
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 33
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 19
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 20
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 18
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 20
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 21
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth = 16
[LightGBM] [Debug] Trained a tree with leaves = 128 and depth 

In [42]:
pred = best_lgbm_clf.predict(x_val)
pred_proba = best_lgbm_clf.predict_proba(x_val)[:,1]
get_clf_eval(y_val,pred,pred_proba)


정확도: 0.8536
정밀도: 0.5696
재현율: 0.9127
F1: 0.7014
AUC: 0.9595
AUC2 : 0.8763


In [43]:
a=best_lgbm_clf.predict_proba(target)[:,1]

In [44]:
submit = pd.read_csv("./playground-series-s4e7/sample_submission.csv")
pred = a 
submit["Response"] = pred
submit.to_csv("submission_LGBM_nooutliar.csv", index=False)
submit.head()

Unnamed: 0,id,Response
0,11504798,1.0
1,11504799,0.805
2,11504800,1.0
3,11504801,0.0
4,11504802,0.206
