In [252]:
import numpy as np
import pandas as pd
import torch
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.model_selection import StratifiedKFold


In [253]:
train_data = pd.read_csv('data/train.csv', index_col='id')
train_data.columns

Index(['age', 'alcohol_consumption_per_week',
       'physical_activity_minutes_per_week', 'diet_score',
       'sleep_hours_per_day', 'screen_time_hours_per_day', 'bmi',
       'waist_to_hip_ratio', 'systolic_bp', 'diastolic_bp', 'heart_rate',
       'cholesterol_total', 'hdl_cholesterol', 'ldl_cholesterol',
       'triglycerides', 'gender', 'ethnicity', 'education_level',
       'income_level', 'smoking_status', 'employment_status',
       'family_history_diabetes', 'hypertension_history',
       'cardiovascular_history', 'diagnosed_diabetes'],
      dtype='object')

In [254]:
features = train_data.drop('diagnosed_diabetes', axis=1)
labels = train_data['diagnosed_diabetes'].astype(int)

features.head()

Unnamed: 0_level_0,age,alcohol_consumption_per_week,physical_activity_minutes_per_week,diet_score,sleep_hours_per_day,screen_time_hours_per_day,bmi,waist_to_hip_ratio,systolic_bp,diastolic_bp,...,triglycerides,gender,ethnicity,education_level,income_level,smoking_status,employment_status,family_history_diabetes,hypertension_history,cardiovascular_history
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,31,1,45,7.7,6.8,6.1,33.4,0.93,112,70,...,102,Female,Hispanic,Highschool,Lower-Middle,Current,Employed,0,0,0
1,50,2,73,5.7,6.5,5.8,23.8,0.83,120,77,...,124,Female,White,Highschool,Upper-Middle,Never,Employed,0,0,0
2,32,3,158,8.5,7.4,9.1,24.1,0.83,95,89,...,108,Male,Hispanic,Highschool,Lower-Middle,Never,Retired,0,0,0
3,54,3,77,4.6,7.0,9.2,26.6,0.83,121,69,...,123,Female,White,Highschool,Lower-Middle,Current,Employed,0,1,0
4,54,1,55,5.7,6.2,5.1,28.8,0.9,108,60,...,124,Male,White,Highschool,Upper-Middle,Never,Retired,0,1,0


In [255]:
DROP_COLUMNS = ['gender', 'ethnicity', 'education_level', 'income_level', 'smoking_status', 'employment_status']
features = features.drop(columns=DROP_COLUMNS, axis=1)
features.head()

Unnamed: 0_level_0,age,alcohol_consumption_per_week,physical_activity_minutes_per_week,diet_score,sleep_hours_per_day,screen_time_hours_per_day,bmi,waist_to_hip_ratio,systolic_bp,diastolic_bp,heart_rate,cholesterol_total,hdl_cholesterol,ldl_cholesterol,triglycerides,family_history_diabetes,hypertension_history,cardiovascular_history
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,31,1,45,7.7,6.8,6.1,33.4,0.93,112,70,62,199,58,114,102,0,0,0
1,50,2,73,5.7,6.5,5.8,23.8,0.83,120,77,71,199,50,121,124,0,0,0
2,32,3,158,8.5,7.4,9.1,24.1,0.83,95,89,73,188,59,114,108,0,0,0
3,54,3,77,4.6,7.0,9.2,26.6,0.83,121,69,74,182,54,85,123,0,1,0
4,54,1,55,5.7,6.2,5.1,28.8,0.9,108,60,85,206,49,131,124,0,1,0


In [256]:
# 分箱
from sklearn.preprocessing import KBinsDiscretizer

BINARY_COLUMNS = ['family_history_diabetes', 'hypertension_history', 'cardiovascular_history']
ID_COL = ['id']

discretizer = KBinsDiscretizer(n_bins=20, encode='ordinal', strategy='quantile', random_state=42)
binned_features = discretizer.fit_transform(features.drop(columns=BINARY_COLUMNS, axis=1))
binned_features = pd.DataFrame(binned_features, columns=features.drop(columns=BINARY_COLUMNS, axis=1).columns)
features = pd.concat([binned_features, features[BINARY_COLUMNS]], axis=1)
features.head()



Unnamed: 0,age,alcohol_consumption_per_week,physical_activity_minutes_per_week,diet_score,sleep_hours_per_day,screen_time_hours_per_day,bmi,waist_to_hip_ratio,systolic_bp,diastolic_bp,heart_rate,cholesterol_total,hdl_cholesterol,ldl_cholesterol,triglycerides,family_history_diabetes,hypertension_history,cardiovascular_history
0,1.0,0.0,4.0,17.0,8.0,10.0,19.0,13.0,7.0,4.0,2.0,15.0,14.0,14.0,3.0,0,0,0
1,10.0,1.0,10.0,8.0,6.0,9.0,4.0,4.0,12.0,11.0,10.0,15.0,6.0,16.0,10.0,0,0,0
2,1.0,2.0,18.0,19.0,13.0,18.0,5.0,4.0,0.0,18.0,12.0,10.0,15.0,14.0,5.0,0,0,0
3,12.0,2.0,11.0,3.0,10.0,18.0,12.0,4.0,13.0,3.0,13.0,8.0,10.0,3.0,10.0,0,1,0
4,12.0,0.0,6.0,8.0,4.0,6.0,17.0,11.0,5.0,0.0,18.0,17.0,6.0,18.0,10.0,0,1,0


In [257]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
params = {
    "loss_function": "Logloss",
    "eval_metric": "AUC",
    "iterations": 4000,
    "learning_rate": 0.05,
    "depth": 6,
    "l2_leaf_reg": 6,
    "random_strength": 1.0,
    "bootstrap_type": "Bayesian",
    "bagging_temperature": 0.8,
    "min_data_in_leaf": 50,
    "random_seed": 42,
    "verbose": 200,
    "task_type": "GPU"   # TURN GPU ON
}
test_data = pd.read_csv('data/test.csv', index_col='id').drop(DROP_COLUMNS, axis=1)
test_data.head()

Unnamed: 0_level_0,age,alcohol_consumption_per_week,physical_activity_minutes_per_week,diet_score,sleep_hours_per_day,screen_time_hours_per_day,bmi,waist_to_hip_ratio,systolic_bp,diastolic_bp,heart_rate,cholesterol_total,hdl_cholesterol,ldl_cholesterol,triglycerides,family_history_diabetes,hypertension_history,cardiovascular_history
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
700000,45,4,100,4.3,6.8,6.2,25.5,0.84,123,70,64,209,55,135,111,0,0,0
700001,35,1,87,3.5,4.6,9.0,28.6,0.88,120,74,59,159,47,83,145,0,0,0
700002,45,1,61,7.6,6.8,7.0,28.5,0.94,112,71,75,173,43,99,184,0,0,0
700003,55,2,81,7.3,7.3,5.0,26.9,0.91,114,81,61,203,59,116,128,0,0,0
700004,77,2,29,7.3,7.6,8.5,22.0,0.83,131,78,79,177,59,87,133,0,0,0


In [258]:
discretizer.transform(test_data.drop(columns=BINARY_COLUMNS, axis=1))
binned_test_features = discretizer.transform(test_data.drop(columns=BINARY_COLUMNS, axis=1))
binned_test_features = pd.DataFrame(binned_test_features, columns=test_data.drop(columns=BINARY_COLUMNS, axis=1).columns)
binned_test_features.index = test_data.index
test_data = pd.concat([binned_test_features, test_data[BINARY_COLUMNS]], axis=1)
test_data.head()

Unnamed: 0_level_0,age,alcohol_consumption_per_week,physical_activity_minutes_per_week,diet_score,sleep_hours_per_day,screen_time_hours_per_day,bmi,waist_to_hip_ratio,systolic_bp,diastolic_bp,heart_rate,cholesterol_total,hdl_cholesterol,ldl_cholesterol,triglycerides,family_history_diabetes,hypertension_history,cardiovascular_history
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
700000,6.0,3.0,15.0,2.0,8.0,11.0,9.0,5.0,14.0,4.0,4.0,18.0,11.0,19.0,6.0,0,0,0
700001,2.0,0.0,13.0,1.0,0.0,18.0,16.0,9.0,12.0,8.0,1.0,0.0,4.0,3.0,16.0,0,0,0
700002,6.0,0.0,8.0,17.0,8.0,13.0,16.0,13.0,7.0,5.0,14.0,4.0,2.0,8.0,19.0,0,0,0
700003,13.0,1.0,12.0,16.0,13.0,6.0,12.0,12.0,8.0,15.0,2.0,16.0,15.0,15.0,11.0,0,0,0
700004,19.0,1.0,1.0,16.0,15.0,17.0,1.0,4.0,18.0,12.0,17.0,6.0,15.0,4.0,13.0,0,0,0


In [259]:
test_data.shape

(300000, 18)

In [260]:
oof = np.zeros(len(features))
cat_test_preds = np.zeros(len(test_data))
for fold, (train_index, val_index) in enumerate(skf.split(features, labels)):
    print(f"Fold {fold + 1}")
    X_train_fold, X_val_fold = features.iloc[train_index], features.iloc[val_index]
    y_train_fold, y_val_fold = labels.iloc[train_index], labels.iloc[val_index]
    model = CatBoostClassifier(**params)
    model.fit(
        X_train_fold, y_train_fold,
        eval_set=(X_val_fold, y_val_fold),
        early_stopping_rounds=100,
        use_best_model=True
    )
    oof[val_index] = model.predict_proba(X_val_fold)[:, 1]
    cat_test_preds += model.predict_proba(test_data)[:, 1] / skf.n_splits
    


Fold 1
0:	test: 0.6743267	best: 0.6743267 (0)	total: 3.68ms	remaining: 14.7s


Default metric period is 5 because AUC is/are not implemented for GPU


200:	test: 0.7018071	best: 0.7018071 (200)	total: 724ms	remaining: 13.7s
400:	test: 0.7026670	best: 0.7026670 (400)	total: 1.46s	remaining: 13.1s
600:	test: 0.7028689	best: 0.7028696 (551)	total: 2.18s	remaining: 12.3s
bestTest = 0.7029508948
bestIteration = 671
Shrink model to first 672 iterations.
Fold 2
0:	test: 0.6777811	best: 0.6777811 (0)	total: 4.09ms	remaining: 16.4s


Default metric period is 5 because AUC is/are not implemented for GPU


200:	test: 0.7003444	best: 0.7003444 (200)	total: 745ms	remaining: 14.1s
400:	test: 0.7012239	best: 0.7012245 (398)	total: 1.48s	remaining: 13.3s
600:	test: 0.7015609	best: 0.7015638 (584)	total: 2.2s	remaining: 12.5s
800:	test: 0.7016698	best: 0.7016703 (798)	total: 2.93s	remaining: 11.7s
1000:	test: 0.7017887	best: 0.7017891 (999)	total: 3.65s	remaining: 10.9s
bestTest = 0.7017958462
bestIteration = 1051
Shrink model to first 1052 iterations.
Fold 3
0:	test: 0.6743363	best: 0.6743363 (0)	total: 3.91ms	remaining: 15.6s


Default metric period is 5 because AUC is/are not implemented for GPU


200:	test: 0.7005262	best: 0.7005262 (200)	total: 747ms	remaining: 14.1s
400:	test: 0.7016584	best: 0.7016584 (400)	total: 1.46s	remaining: 13.1s
600:	test: 0.7020776	best: 0.7020792 (597)	total: 2.18s	remaining: 12.4s
800:	test: 0.7021902	best: 0.7022012 (783)	total: 2.9s	remaining: 11.6s
1000:	test: 0.7022057	best: 0.7022433 (907)	total: 3.6s	remaining: 10.8s
bestTest = 0.7022432685
bestIteration = 907
Shrink model to first 908 iterations.
Fold 4
0:	test: 0.6786501	best: 0.6786501 (0)	total: 3.59ms	remaining: 14.4s


Default metric period is 5 because AUC is/are not implemented for GPU


200:	test: 0.7019023	best: 0.7019031 (199)	total: 720ms	remaining: 13.6s
400:	test: 0.7030329	best: 0.7030407 (398)	total: 1.44s	remaining: 12.9s
600:	test: 0.7033354	best: 0.7033354 (600)	total: 2.15s	remaining: 12.1s
800:	test: 0.7034208	best: 0.7034355 (779)	total: 2.86s	remaining: 11.4s
1000:	test: 0.7034459	best: 0.7034608 (957)	total: 3.58s	remaining: 10.7s
bestTest = 0.703460753
bestIteration = 957
Shrink model to first 958 iterations.
Fold 5
0:	test: 0.6788318	best: 0.6788318 (0)	total: 3.87ms	remaining: 15.5s


Default metric period is 5 because AUC is/are not implemented for GPU


200:	test: 0.7020959	best: 0.7020959 (200)	total: 706ms	remaining: 13.3s
400:	test: 0.7031103	best: 0.7031103 (400)	total: 1.41s	remaining: 12.7s
600:	test: 0.7034619	best: 0.7034653 (599)	total: 2.13s	remaining: 12.1s
800:	test: 0.7035549	best: 0.7035549 (799)	total: 2.86s	remaining: 11.4s
1000:	test: 0.7035730	best: 0.7035929 (938)	total: 3.59s	remaining: 10.8s
bestTest = 0.7035935521
bestIteration = 1030
Shrink model to first 1031 iterations.


In [None]:
print("=== CatBoost Classifier Performance ===")

accuracy = accuracy_score(labels, (oof > 0.5).astype(int))
precision = precision_score(labels, (oof > 0.5).astype(int))
recall = recall_score(labels, (oof > 0.5).astype(int))
f1 = f1_score(labels, (oof > 0.5).astype(int))
roc_auc = roc_auc_score(labels, oof)
print(f'准确率: {accuracy:.4f}')
print(f'精确率: {precision:.4f}')
print(f'召回率: {recall:.4f}')
print(f'F1 分数: {f1:.4f}')
print(f'ROC AUC: {roc_auc:.4f}')

=== CatBoost Classifier Performance ===
准确率: 0.6681
精确率: 0.6898
召回率: 0.8496
F1 分数: 0.7614
ROC AUC: 0.7028
预测的自信程度: 0.6025


In [137]:
lgb_params = {
    'n_estimators': 200,
    'learning_rate': 0.03,
    'num_leaves': 64,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'objective': "binary",
    'random_state': 42,
}

In [138]:
test_data.shape

(300000, 18)

In [None]:
oof = np.zeros(len(features))
lgb_test_preds = np.zeros(len(test_data))
for fold, (train_index, val_index) in enumerate(skf.split(features, labels)):
    print(f"Fold {fold + 1}")
    X_train_fold, X_val_fold = features.iloc[train_index], features.iloc[val_index]
    y_train_fold, y_val_fold = labels.iloc[train_index], labels.iloc[val_index]
    model = LGBMClassifier(**lgb_params)
    model.fit(
        X_train_fold, y_train_fold,
    )
    oof[val_index] = model.predict_proba(X_val_fold)[:, 1]
    lgb_test_preds += model.predict_proba(test_data)[:, 1] / skf.n_splits

Fold 1
[LightGBM] [Info] Number of positive: 349045, number of negative: 210955
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011435 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81
[LightGBM] [Info] Number of data points in the train set: 560000, number of used features: 18
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.623295 -> initscore=0.503556
[LightGBM] [Info] Start training from score 0.503556
Fold 2
[LightGBM] [Info] Number of positive: 349045, number of negative: 210955
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008952 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 81
[LightGBM] [Info] Number of data points in the train set: 560000, number of used features: 18
[Lig

In [None]:
print("=== LightGBM Classifier Performance ===")

accuracy = accuracy_score(labels, (oof > 0.5).astype(int))
precision = precision_score(labels, (oof > 0.5).astype(int))
recall = recall_score(labels, (oof > 0.5).astype(int))
f1 = f1_score(labels, (oof > 0.5).astype(int))
roc_auc = roc_auc_score(labels, oof)
print(f'准确率: {accuracy:.4f}')
print(f'精确率: {precision:.4f}')
print(f'召回率: {recall:.4f}')
print(f'F1 分数: {f1:.4f}')
print(f'ROC AUC: {roc_auc:.4f}')

=== LightGBM Classifier Performance ===
准确率: 0.6530
精确率: 0.6683
召回率: 0.8798
F1 分数: 0.7596
ROC AUC: 0.6774
预测的自信程度: 0.6102


In [145]:
feature_train, feature_test, label_train, label_test = train_test_split(
    features, labels, test_size=0.2, random_state=42, stratify=labels
)

In [None]:
oof = np.zeros(len(features))
rf_test_preds = np.zeros(len(test_data))
for fold, (train_index, val_index) in enumerate(skf.split(features, labels)):
    print(f"Fold {fold + 1}")
    X_train_fold, X_val_fold = features.iloc[train_index], features.iloc[val_index]
    y_train_fold, y_val_fold = labels.iloc[train_index], labels.iloc[val_index]
    rf = RandomForestClassifier(
        n_estimators=500,
        max_depth=10,
        min_samples_split=5,
        min_samples_leaf=2,
        random_state=42,
        n_jobs=-1
    )
    rf.fit(X_train_fold, y_train_fold)
    oof[val_index] = rf.predict_proba(X_val_fold)[:, 1]
    rf_test_preds += rf.predict_proba(test_data)[:, 1] / skf.n_splits
print("=== Random Forest Classifier Performance ===")
accuracy = accuracy_score(labels, (oof > 0.5).astype(int))
precision = precision_score(labels, (oof > 0.5).astype(int))
recall = recall_score(labels, (oof > 0.5).astype(int))
f1 = f1_score(labels, (oof > 0.5).astype(int))
roc_auc = roc_auc_score(labels, oof)
print(f'准确率: {accuracy:.4f}')
print(f'精确率: {precision:.4f}')
print(f'召回率: {recall:.4f}')
print(f'F1 分数: {f1:.4f}')
print(f'ROC AUC: {roc_auc:.4f}')

Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
=== Random Forest Classifier Performance ===
准确率: 0.6520
精确率: 0.6614
召回率: 0.9050
F1 分数: 0.7643
ROC AUC: 0.6766
预测的自信程度: 0.6109


In [None]:
oof = np.zeros(len(features))
net_test_preds = np.zeros(len(test_data))
for fold, (train_index, val_index) in enumerate(skf.split(features, labels)):
    print(f"Fold {fold + 1}")
    X_train_fold, X_val_fold = features.iloc[train_index], features.iloc[val_index]
    y_train_fold, y_val_fold = labels.iloc[train_index], labels.iloc[val_index]
    
    net = MLPClassifier(
        hidden_layer_sizes=(128, 64),
        activation='relu',
        solver='adam',
        alpha=0.0001,
        batch_size='auto',
        learning_rate='adaptive',
        max_iter=200,
        random_state=42,
        early_stopping=True,
        n_iter_no_change=20,
        validation_fraction=0.1
    )
    net.fit(X_train_fold, y_train_fold)
    oof[val_index] = net.predict_proba(X_val_fold)[:, 1]
    net_test_preds += net.predict_proba(test_data)[:, 1] / skf.n_splits
print("=== Neural Network Classifier Performance ===")
accuracy = accuracy_score(labels, (oof > 0.5).astype(int))
precision = precision_score(labels, (oof > 0.5).astype(int))
recall = recall_score(labels, (oof > 0.5).astype(int))
f1 = f1_score(labels, (oof > 0.5).astype(int))
roc_auc = roc_auc_score(labels, oof)
print(f'准确率: {accuracy:.4f}')
print(f'精确率: {precision:.4f}')
print(f'召回率: {recall:.4f}')
print(f'F1 分数: {f1:.4f}')
print(f'ROC AUC: {roc_auc:.4f}')

Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
=== Neural Network Classifier Performance ===
准确率: 0.6528
精确率: 0.6668
召回率: 0.8853
F1 分数: 0.7607
ROC AUC: 0.6768
预测的自信程度: 0.6104


In [143]:
# 排名概率融合
from scipy.stats import rankdata
cat_test_df = pd.DataFrame(cat_test_preds, columns=["diagnosed_diabetes"])
lgb_test_df = pd.DataFrame(lgb_test_preds, columns=["diagnosed_diabetes"])
cat_test_df["rank"] = rankdata(cat_test_df["diagnosed_diabetes"])
lgb_test_df["rank"] = rankdata(lgb_test_df["diagnosed_diabetes"])

test_df = pd.DataFrame({
    'id': test_data.index
})
test_df["diagnosed_diabetes"] = 0.5 * cat_test_df["rank"] + 0.5 * lgb_test_df["rank"]
test_df["diagnosed_diabetes"] /= test_df["diagnosed_diabetes"].max()

test_df["diagnosed_diabetes"] = (test_df["diagnosed_diabetes"] > 0.5).astype(int)

test_df[["id", "diagnosed_diabetes"]].to_csv("data/submission.csv", index=False)
test_df.head()

Unnamed: 0,id,diagnosed_diabetes
0,700000,0
1,700001,1
2,700002,1
3,700003,0
4,700004,1
