In [4]:
import os
import sklearn
import warnings
import numpy as np
import pandas as pd
import tensorflow as tf
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=FutureWarning)

In [13]:
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')
scale_col = ['physical_activity_minutes_per_week','diet_score','sleep_hours_per_day','screen_time_hours_per_day','bmi','waist_to_hip_ratio',
             'systolic_bp','diastolic_bp','heart_rate','cholesterol_total','hdl_cholesterol','ldl_cholesterol','triglycerides']
encode_col = ['gender','ethnicity','education_level','income_level',
              'smoking_status','employment_status']

from sklearn.preprocessing import LabelEncoder, MinMaxScaler

def train_scale_encode_process(df,scale_col,encode_col):
    scalers = {}
    encoders = {}
    for col in scale_col:
        scaler = MinMaxScaler()
        df['scale_'+col] = scaler.fit_transform(df[[col]])
        scalers[col] = scaler
        df = df.drop(columns = col)
    for col in encode_col:
        encoder = LabelEncoder()
        df['encode_'+col] = encoder.fit_transform(df[[col]])
        encoders[col] = encoder
        df = df.drop(columns = col)
    df = df.drop(columns = ['id']).reset_index(drop = True)
    return df,scalers,encoders
def pred_encode_scale_process(df_pred,scale_col,encode_col,scalers,encoders):
    for col in scale_col:
        scaler = scalers[col]
        df_pred['scale_'+col] = scaler.transform(df_pred[[col]])
        df_pred = df_pred.drop(columns = col)
    for col in encode_col:
        encoder = encoders[col]
        df_pred['encode_'+col] = encoder.transform(df_pred[[col]])
        df_pred = df_pred.drop(columns = col)
    df_pred= df_pred.drop(columns = ['id']).reset_index(drop = True)
    return df_pred
train,scalers,encoders = train_scale_encode_process(df_train,scale_col,encode_col)
pred = pred_encode_scale_process(df_test,scale_col,encode_col,scalers,encoders)
x_col = list(train.columns)
y_col = 'diagnosed_diabetes'
x_col.remove('diagnosed_diabetes')
x = train[x_col]
y = train[y_col]

In [15]:
len(train.columns)

25

In [14]:
from sklearn.model_selection import train_test_split

train_data,test_data,train_label,test_label = train_test_split(x,y,test_size=0.2,random_state=42,shuffle=True)

In [16]:
train_data.shape[1]

24

In [21]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
model = Sequential([
    Dense(256, input_dim = train_data.shape[1], activation = 'relu'),
    Dropout(0.4),
    Dense(128, activation = 'relu'),
    Dropout(0.4),
    Dense(64, activation = 'relu'),
    Dropout(0.4),
    Dense(32, activation = 'relu'),
    Dropout(0.4),
    Dense(1, activation = 'relu'),
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
es = EarlyStopping(
    monitor = 'accuracy',
    min_delta = 0.0001,
    patience=3,
    verbose=1,
    mode='auto',
    restore_best_weights = True
)

# 模型訓練
history = model.fit(
    train_data, train_label,
    validation_data=(test_data, test_label),
    epochs=15,  # 訓練 50 個世代，可調整
    batch_size=128,
    verbose=1,
    callbacks = [es]
)

# 測試集預測
predictions = model.predict(pred)
predictions = (predictions > 0.5).astype(int)  # 大於 0.5 的預測為 1 (有糖尿病診斷)

# 輸出預測結果
print(predictions)

Epoch 1/15
[1m4375/4375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2ms/step - accuracy: 0.6199 - loss: 6.0418 - val_accuracy: 0.6241 - val_loss: 5.9931
Epoch 2/15
[1m4375/4375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - accuracy: 0.6225 - loss: 6.0158 - val_accuracy: 0.6241 - val_loss: 5.9931
Epoch 3/15
[1m4375/4375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - accuracy: 0.6230 - loss: 6.0086 - val_accuracy: 0.6241 - val_loss: 5.9931
Epoch 4/15
[1m4375/4375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - accuracy: 0.6231 - loss: 6.0085 - val_accuracy: 0.6241 - val_loss: 5.9931
Epoch 5/15
[1m4375/4375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.6231 - loss: 6.0084 - val_accuracy: 0.6241 - val_loss: 5.9931
Epoch 6/15
[1m4375/4375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step - accuracy: 0.6230 - loss: 6.0091 - val_accuracy: 0.6241 - val_loss: 5.9931
Epoch 6: ea