In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data

import torchvision.transforms as transforms
import torchvision.datasets as datasets

from sklearn import metrics
from sklearn import decomposition
from sklearn import manifold
from tqdm.notebook import trange, tqdm
import matplotlib.pyplot as plt
import numpy as np

import copy
import random
import time

In [2]:
SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [3]:
import pandas as pd
import sklearn
from sklearn import preprocessing
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler  
from sklearn.neural_network import MLPClassifier 
from sklearn.metrics import classification_report, confusion_matrix 

In [4]:
import pandas as pd
import numpy as np
all_Chinese = pd.read_excel('/Users/anyingbai/Desktop/连续剧/Oxford/COPD模型/最新修改/心血管共病/机器学习模型训练/CHARLS_10_18基线无共病.xlsx', index_col=0)

In [7]:
Chinese2=all_Chinese.copy(deep=True)
Chinese2.dropna(axis=0, how='any', subset=None, inplace=True)
Chinese2.head()
Chinese2.isnull().sum()

edu_group                   0
ADL_Disability              0
IADL_Disability             0
Male                        0
Married                     0
age                         0
household_wealth            0
Excessive_drink             0
physical_activity           0
smoking_present             0
Cardiometabolic_multi_18    0
dtype: int64

In [8]:
Chinese2_Y = Chinese2['Cardiometabolic_multi_18']
Chinese2_X = Chinese2.loc[:,['ADL_Disability','IADL_Disability','edu_group','Male','Married','age','household_wealth','Excessive_drink','physical_activity','smoking_present']]

In [9]:
X_train, X_test, y_train, y_test = train_test_split(Chinese2_X, Chinese2_Y, test_size = 0.40)  
scaler = StandardScaler()  
scaler.fit(X_train)
X_train = scaler.transform(X_train)  
X_test = scaler.transform(X_test)  

In [10]:
from sklearn.model_selection import cross_val_score
# 定义不同的模型参数配置
params = [
    {
        'solver': 'adam',
        'learning_rate': 'constant',
        'learning_rate_init': 1e-4,
        'hidden_layer_sizes': (10, 10, 10),
        'max_iter': 1000,
        'alpha': 0.0001
    },
    {
        'solver': 'adam',
        'learning_rate': 'constant',
        'learning_rate_init': 1e-4,
        'hidden_layer_sizes': (50, 50, 50, 50),
        'max_iter': 1000,
        'alpha': 0.0001
    },

]

best_score = 0
best_params = None

for param_set in params:
    mlp = MLPClassifier(**param_set)
    scores = cross_val_score(mlp, X_train, y_train, cv=5, scoring='precision')  # 使用 precision 作为评估指标
    avg_precision = scores.mean()
    if avg_precision > best_score:
        best_score = avg_precision
        best_params = param_set

# 打印最佳参数配置
print("Best Parameters:", best_params)

# 使用最佳参数配置训练最终模型
mlp = MLPClassifier(**best_params)
mlp.fit(X_train, y_train)

# 在测试集上评估模型
from sklearn.metrics import classification_report
y_pred = mlp.predict(X_test)
print(classification_report(y_test, y_pred))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Best Parameters: {'solver': 'adam', 'learning_rate': 'constant', 'learning_rate_init': 0.0001, 'hidden_layer_sizes': (50, 50, 50, 50), 'max_iter': 1000, 'alpha': 0.0001}
              precision    recall  f1-score   support

           0       0.88      0.98      0.93      2620
           1       0.18      0.04      0.06       364

    accuracy                           0.86      2984
   macro avg       0.53      0.51      0.49      2984
weighted avg       0.79      0.86      0.82      2984



In [14]:
#美国数据集测试

In [11]:
all_US = pd.read_excel('/Users/anyingbai/Desktop/连续剧/Oxford/COPD模型/最新修改/心血管共病/机器学习模型训练/HRS_10_18基线无共病.xlsx', index_col=0)
US2=all_US.copy(deep=True)
US2.dropna(axis=0, how='any', subset=None, inplace=True)
US2.head()

Unnamed: 0_level_0,Male,edu_group,Married,age,smoking_present,physical_activity,household_wealth,ADL_Disability,IADL_Disability,Excessive_drink,Cardiometabolic_multi_18
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10004040,0,1.0,1.0,64,0.0,1.0,103050000,0,0,1,0
10004040,0,1.0,1.0,64,0.0,1.0,103050000,0,0,1,0
10004040,0,1.0,1.0,64,0.0,1.0,103050000,0,0,1,0
10004040,0,1.0,1.0,64,0.0,1.0,103050000,0,0,1,0
10004040,0,1.0,1.0,64,0.0,1.0,103050000,0,0,1,0


In [12]:
US2_Y = US2['Cardiometabolic_multi_18']
US2_X = US2.loc[:,['ADL_Disability','IADL_Disability','edu_group','Male','Married','age','household_wealth','Excessive_drink','physical_activity','smoking_present']]

In [13]:
predictions = mlp.predict(US2_X) 
print(predictions)
print(confusion_matrix(US2_Y ,predictions))  
print(classification_report(US2_Y ,predictions))  

[0 0 0 ... 0 0 0]
[[26346     0]
 [ 6203     0]]
              precision    recall  f1-score   support

           0       0.81      1.00      0.89     26346
           1       0.00      0.00      0.00      6203

    accuracy                           0.81     32549
   macro avg       0.40      0.50      0.45     32549
weighted avg       0.66      0.81      0.72     32549



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [18]:
#欧洲数据集测试

In [15]:
all_Europe = pd.read_excel('/Users/anyingbai/Desktop/连续剧/Oxford/COPD模型/最新修改/心血管共病/机器学习模型训练/SHARE_10_18基线无共病.xlsx', index_col=0)
Europe2=all_Europe.copy(deep=True)
Europe2.dropna(axis=0, how='any', subset=None, inplace=True)
Europe2.head()

Unnamed: 0_level_0,smoking_present,Married,age,Male,edu_group,physical_activity,ADL_Disability,IADL_Disability,Excessive_drink,household_wealth,Cardiometabolic_multi_18
mergeid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AT-001492-01,0.0,1.0,59,2,1.0,0.0,0.0,0.0,1,32720.621094,0
AT-001492-02,1.0,1.0,60,1,1.0,0.0,0.0,0.0,1,32720.621094,0
AT-001881-01,0.0,1.0,81,2,0.0,0.0,0.0,0.0,1,80034.742188,1
AT-002136-01,1.0,1.0,60,1,2.0,0.0,0.0,0.0,0,371308.71875,0
AT-002136-03,1.0,1.0,58,2,1.0,0.0,0.0,0.0,0,371308.71875,0


In [16]:
Europe2_Y = Europe2['Cardiometabolic_multi_18']
Europe2_X = Europe2.loc[:,['ADL_Disability','IADL_Disability','edu_group','Male','Married','age','household_wealth','Excessive_drink','physical_activity','smoking_present']]

In [17]:
predictions = mlp.predict(Europe2_X) 
print(predictions)
print(confusion_matrix(Europe2_Y,predictions))  
print(classification_report(Europe2_Y,predictions))  

[0 0 0 ... 0 0 0]
[[8682    0]
 [1336    0]]
              precision    recall  f1-score   support

           0       0.87      1.00      0.93      8682
           1       0.00      0.00      0.00      1336

    accuracy                           0.87     10018
   macro avg       0.43      0.50      0.46     10018
weighted avg       0.75      0.87      0.80     10018



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
