In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [None]:
# 读取数据
data = pd.read_excel("no_error.xlsx")

# 提取睡眠质量指标并进行归一化
sleep_quality = data[["整晚睡眠时间", "睡醒次数", "入睡方式"]]
scaler = MinMaxScaler()
scaled_sleep_quality = scaler.fit_transform(sleep_quality)

In [None]:
# 熵权法计算指标权重
def entropy_weight(data):
    # 为每个指标的取值范围加上微小值
    data = data + 0.001
    # 计算熵
    p = data / data.sum()
    log_p = np.log(p)
    e = -(p * log_p).sum()
    # 计算权重
    w = (1 - e) / (3 - e)
    return w

In [None]:
weights0 = entropy_weight(scaled_sleep_quality[:,0])
weights1 = entropy_weight(scaled_sleep_quality[:,1])
weights2 = entropy_weight(scaled_sleep_quality[:,2])

In [None]:
# 计算综合睡眠质量得分
# 睡眠时间越长越好
scaled_sleep_time_score = weights1 * scaled_sleep_quality[:, 0]
# 入睡方式越大越好
scaled_sleep_way_score = weights0 * scaled_sleep_quality[:, 2]
# 睡醒次数越少越好
scaled_wakeup_score = weights2 * (1 - scaled_sleep_quality[:, 1])

In [None]:
# 计算相似度得分
similarity_score = scaled_sleep_time_score + scaled_sleep_way_score + scaled_wakeup_score

# 归一化相似度得分
normalized_similarity_score = (similarity_score - similarity_score.min()) / (similarity_score.max() - similarity_score.min())

# 添加综合睡眠质量列
data["综合睡眠质量"] = pd.cut(normalized_similarity_score, bins=[-0.001, 0.45, 0.60, 0.75, 1], labels=["差", "中", "良", "优"])

# 输出结果
print(data[["编号", "综合睡眠质量"]])

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# 读取数据
data1 = pd.read_excel("no_error.xlsx")

# 提取特征和标签
X = data1[["母亲年龄", "婚姻状况", "教育程度", "妊娠时间", "分娩方式", "CBTS", "EPDS", "HADS"]]
#X = data1[["母亲年龄","教育程度", "分娩方式", "CBTS", "EPDS", "HADS"]]
y = data["综合睡眠质量"]

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# 建立随机森林模型
rf_model = RandomForestClassifier(max_depth=1,n_estimators=300, random_state=42)

# 训练模型
rf_model.fit(X_train, y_train)

# 在测试集上进行预测
y_pred = rf_model.predict(X_test)

# 评估模型性能
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))