<a href="https://colab.research.google.com/github/Tiabet/DACON_WebClick/blob/main/WebClick_Baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import random
import numpy as np
import pandas as pd
from tqdm import tqdm

import plotly.express as px

import category_encoders as ce
from sklearn.ensemble import AdaBoostClassifier

In [None]:
# 동일한 결과 보장을 위해 Seed값을 고정합니다
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed를 42로 고정

In [None]:

# 제공된 train 데이터와 test 데이터를 불러옵니다
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

In [None]:
#EDA 1 : Sparse and Dense
train.head()

In [None]:
#EDA 2 : Imbalance
click = train['Click'].value_counts(normalize=True)

click_figure = px.bar(click,
             x=['Not Clicked : 0', 'Clicked : 1'],
             y=click.values.tolist(),
             labels={'x': 'Value', 'y': 'Percentage'},
             width = 450,
             height = 500
            )

# 그래프 표시
click_figure.show()

In [None]:
#Data Preprocessing 1 : Select x, y
train_x = train.drop(columns=['ID', 'Click'])
train_y = train['Click']

test_x = test.drop(columns=['ID'])

In [None]:
#Data Preprocessing 2 : Fill NaN
for col in tqdm(train_x.columns):
    if train_x[col].isnull().sum() != 0:
        train_x[col].fillna(0, inplace=True)
        test_x[col].fillna(0, inplace=True)

In [None]:
#Data Preprocessing 3 : Count Encoding
encoding_target = list(train_x.dtypes[train_x.dtypes == "object"].index)

enc = ce.CountEncoder(cols = encoding_target).fit(train_x, train_y)
X_train_encoded = enc.transform(train_x)
X_test_encoded = enc.transform(test_x)

In [None]:
#Model Setting
model = AdaBoostClassifier()
#Model Train and Inference
model.fit(X_train_encoded, train_y)

In [None]:
pred = model.predict_proba(X_test_encoded)
display(model.classes_)
display(pred)

In [None]:
#Submission
sample_submission = pd.read_csv('sample_submission_sample.csv')
sample_submission

sample_submission['Click'] = pred[:,1]
sample_submission

sample_submission.to_csv('baseline_submission.csv', index=False)