# 睡眠健康与生活方式分析

## 1. 导入必要的库

In [None]:
import numpy as np
import pandas as pd
import requests
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

## 2. 数据预处理函数

In [None]:
# 睡眠质量分类函数
def classify_sleep_quality(row):
    if row['Sleep Duration'] >= 7 and row['Quality of Sleep'] >= 8:
        return 'Excellent'   
    elif row['Quality of Sleep'] >= 6:
        return 'Good'
    else:
        return 'Need Improvement'

In [None]:
# 增强数据预处理流程
def enhanced_preprocessing(df):
    # 验证数据完整性
    required_columns = ['Heart Rate', 'Quality of Sleep', 'Occupation']
    for col in required_columns:
        if col not in df.columns:
            raise ValueError(f"数据集缺少必要字段: {col}")
    
    # 处理缺失值
    df['Heart Rate'] = df['Heart Rate'].fillna(df['Heart Rate'].median())
    df['Occupation'] = df['Occupation'].fillna('Unknown')
    
    # BMI类别编码
    if 'BMI Category'] in df.columns:
        df['BMI Category'] = df['BMI Category'].str.strip().str.title()
        bmi_mapping = {'Normal': 0, 'Normal Weight': 1, 'Overweight': 2, 'Obese': 3}
        df['BMI Category'] = df['BMI Category'].map(bmi_mapping)
    
    # 睡眠质量分类
    df['SleepClass'] = df.apply(classify_sleep_quality, axis=1)
    
    return df

## 3. 天气API集成

In [None]:
def get_weather(city, api_key):
    """获取指定城市的天气数据"""
    try:
        api_url = 'http://apis.juhe.cn/simpleWeather/query'
        params = {'city': city, 'key': api_key}
        response = requests.get(api_url, params=params, timeout=10)
        response.raise_for_status()
        data = response.json()
        
        if data['error_code'] != 0:
            raise ValueError(data['reason'])
            
        realtime = data['result']['realtime']
        return {
            'temperature': float(realtime['temperature']),
            'humidity': float(realtime['humidity']),
            'weather': realtime['info'],
            'aqi': float(realtime['aqi'])
        }
    except Exception as e:
        raise ValueError(f"天气API调用失败: {str(e)}")

## 4. 主程序流程

In [None]:
# 1. 数据加载
data_paths = [
    os.path.expanduser('~/Sleep_health_and_lifestyle_dataset.csv'),  # 用户主目录
    'Sleep_health_and_lifestyle_dataset.csv',  # 当前目录
    '../Sleep_health_and_lifestyle_dataset.csv'  # 上级目录
]

sleep_data = None
for path in data_paths:
    try:
        sleep_data = pd.read_csv(path, encoding='utf-8')
        print(f"成功加载数据文件: {os.path.abspath(path)}")
        break
    except Exception as e:
        print(f"尝试加载 {path} 失败，请检查文件是否存在")
        continue
        
if sleep_data is None:
    raise FileNotFoundError(
        "未找到数据文件，请确保文件位于以下位置之一:\n"
        f"1. 用户主目录: {os.path.expanduser('~/')}\n"
        "2. 程序所在目录\n"
        "3. 程序目录的上级目录\n"
        "文件应命名为'Sleep_health_and_lifestyle_dataset.csv'"
    )

In [None]:
# 2. 数据预处理
processed_data = enhanced_preprocessing(sleep_data)
print("数据预处理完成，可用列:", list(processed_data.columns))

In [None]:
# 3. 获取并整合天气数据
try:
    weather_data = get_weather('广州', '您的API密钥')
    processed_data = processed_data.assign(
        temperature=weather_data['temperature'],
        humidity=weather_data['humidity'],
        weather_condition=weather_data['weather'],
        air_quality=weather_data['aqi']
    )
    print("成功添加天气数据特征")
    print("当前数据列:", list(processed_data.columns))
except Exception as e:
    print(f"天气数据获取失败: {str(e)}")
    # 添加默认天气数据
    processed_data = processed_data.assign(
        temperature=25.0,
        humidity=60.0,
        weather_condition='晴',
        air_quality=50
    )
    print("使用默认天气数据继续执行")

In [None]:
# 4. 模型训练
# 确保所有特征列存在
required_features = ['Age', 'Daily Steps', 'BMI Category', 'temperature', 'humidity']
missing_features = [col for col in required_features if col not in processed_data.columns]
if missing_features:
    raise ValueError(f"缺少必要特征列: {missing_features}")

features = processed_data[required_features]
target = processed_data['SleepClass']

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2)
model = RandomForestClassifier()
model.fit(X_train, y_train)

print(f"模型训练完成，测试集准确率: {model.score(X_test, y_test):.2f}")

In [None]:
# 5. 示例预测
user_example = {
    'Age': 30,
    'Daily Steps': 4500,
    'BMI Category': 0,  # Normal
    'temperature': 26,
    'humidity': 70
}

example_df = pd.DataFrame([user_example])
prediction = model.predict(example_df)
print(f"睡眠建议: {prediction[0]}")