In [None]:
# https://www.kaggle.com/datasets/gauravduttakiit/smoker-status-prediction-using-biosignals

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
data.info()

In [None]:
if(data.isna().sum().sum() == 0):
    print("no missing values in the train dataset")
    
else:
    data.dropna(inplace=True)

In [None]:
data

In [None]:
data.columns

In [None]:
def summary_style(df, caption=""):
    return df.style.background_gradient(cmap="PuBu").set_caption(caption).set_table_styles([{
        'selector' : 'caption',
        'props' : [
            ('font-size', '18px'),
            ('color', '#1c4978'),
            ('font-weight', 'bold'),
            ('font-weight', 'bold'),
            ('text-align', 'left'),
            ('padding', '10px')
        ]
    }])

display(summary_style(data.sample(10), "Sample Rows"))
display(summary_style(data.nunique().to_frame().rename({0 : 'Unique Value Count'}, axis=1).transpose(), "Training Dataset Unique Value Counts"))

In [None]:
x = np.array(data)[:, 1:23]
x.shape

In [None]:
x[0]

In [None]:
y = np.array(data)[:, 23]
y

In [None]:
from sklearn.preprocessing import MinMaxScaler

# 對 x 進行 Min-Max 標準化
scaler_x = MinMaxScaler()
x = scaler_x.fit_transform(x)

# 對 y 進行 Min-Max 標準化
scaler_y = MinMaxScaler()
y = scaler_y.fit_transform(y.reshape(-1, 1))


In [None]:
from sklearn.model_selection import train_test_split

x_train_set, x_test, y_train_set, y_test= train_test_split(x, y, random_state = 1)
x_train, x_valid, y_train, y_valid = train_test_split(x_train_set, y_train_set, random_state = 1)

print(x_train.shape,y_train.shape)
print(x[0])

In [None]:
#1. build model

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential   #模型建構
from tensorflow.keras.layers import Dense        #建隱藏層
from tensorflow.keras.optimizers import SGD      #訓練model -> SGD隨機梯度下降法

keras.backend.clear_session()                    #設定reset
np.random.seed(1)
tf.random.set_seed(1)

In [None]:
model = Sequential(
    [
        keras.layers.Dense(128, activation = 'relu', input_shape = x_train.shape[1:]),
        keras.layers.Dense(50, activation = 'relu'),
        keras.layers.Dense(100, activation = 'tanh'),
        keras.layers.Dense(30, activation = 'relu'),
        keras.layers.Dense(1, activation = 'sigmoid') 
    ]
)   

In [None]:
model.summary()

In [None]:
model.compile(  loss = 'binary_crossentropy',
                optimizer = 'adam',
                metrics = ['accuracy'] #accuracy：預測正確性的百分比
             )

In [None]:
train = model.fit(x_train, y_train, epochs=20, validation_data= (x_valid, y_valid)) #epochs:進行幾次，batch_size:每一小批次有幾筆資料(預設32)


In [None]:
pd.DataFrame(train.history).plot()
plt.grid(True)
plt.show

In [None]:
model.evaluate(x_test, y_test)

In [None]:
y_proba = model.predict(x_test)
y_proba[:10].round(2)

In [None]:
y_pred = np.argmax(y_proba, axis = 1)
y_pred[:5]

In [None]:
y_test[:10]

In [None]:
#Max-norm regularization

mn_model = keras.models.Sequential(

    [
        keras.layers.Dense(128, activation = 'relu', input_shape = x_train.shape[1:]),
        keras.layers.AlphaDropout(rate = 0.2),
        keras.layers.Dense(300, activation = 'relu', kernel_initializer = 'lecun_normal', kernel_constraint = keras.constraints.max_norm(1.)),
        keras.layers.AlphaDropout(rate = 0.2),
        keras.layers.Dense(100, activation = 'relu', kernel_initializer = 'lecun_normal', kernel_constraint = keras.constraints.max_norm(1.)),
        keras.layers.AlphaDropout(rate = 0.2),
        keras.layers.Dense(1, activation = 'sigmoid')

    ]

)

In [None]:
mn_model.compile(loss = 'binary_crossentropy',
              optimizer = 'nadam',
              metrics = ['accuracy'])

In [None]:
train = mn_model.fit(x_train, y_train, epochs = 20, validation_data = (x_valid, y_valid) )

In [None]:
pd.DataFrame(train.history).plot()
plt.grid(True)
plt.show

In [None]:
mn_model.evaluate(x_test, y_test)

In [None]:
y_proba = mn_model.predict(x_test)
y_proba[:10].round(2)
