In [1]:
import numpy as np
import pandas as pd
import sys
import os
import torch
import torch.nn as nn

import glob
import glob
import numpy as np
import onnxruntime as ort

from collections import defaultdict

In [2]:
import pandas as pd

# 加载数据
train_pos_dataset = pd.read_pickle('./new_data_2024_09_20/samples/train_pos_dataset.pkl')
train_neg_dataset = pd.read_pickle('./new_data_2024_09_20/samples/train_neg_dataset.pkl')
test_pos_dataset = pd.read_pickle('./new_data_2024_09_20/samples/test_pos_dataset.pkl')
test_neg_dataset = pd.read_pickle('./new_data_2024_09_20/samples/test_neg_dataset.pkl')
testing_pos_dataset = pd.read_pickle('./new_data_2024_09_20/samples/testing_positive.pkl')
testing_neg_dataset = pd.read_pickle('./new_data_2024_09_20/samples/testing_negative.pkl')

# 要删除的列
columns_to_drop = ['HRV_LF', 'HRV_LFHF', 'HRV_LFn']
# columns_to_drop = []

# 从所有数据集中删除指定的列
train_pos_dataset.drop(columns=columns_to_drop, inplace=True, errors='ignore')
train_neg_dataset.drop(columns=columns_to_drop, inplace=True, errors='ignore')
test_pos_dataset.drop(columns=columns_to_drop, inplace=True, errors='ignore')
test_neg_dataset.drop(columns=columns_to_drop, inplace=True, errors='ignore')
testing_pos_dataset.drop(columns=columns_to_drop, inplace=True, errors='ignore')
testing_neg_dataset.drop(columns=columns_to_drop, inplace=True, errors='ignore')

# 提取唯一 ID 列表
test_val_pos_list = test_pos_dataset['id'].unique().tolist()
test_val_neg_list = test_neg_dataset['id'].unique().tolist()

# 创建验证和测试数据集
validation_pos_data = test_pos_dataset[test_pos_dataset['id'].isin(test_val_pos_list[:3])]
test_pos_dataset = test_pos_dataset[test_pos_dataset['id'].isin(test_val_pos_list[3:])]
validation_neg_data = test_neg_dataset[test_neg_dataset['id'].isin(test_val_neg_list[:3])]
test_neg_dataset = test_neg_dataset[test_neg_dataset['id'].isin(test_val_neg_list[3:])]


In [3]:
profile_columns = ['wave_quality', 'age', 'gender', 'label', 'id']

In [4]:
#  map_age will change the age into one-hot encoding  
def map_age(age):
    if age <= 15:
        return [1, 0, 0, 0]
    elif age > 15 and age <= 22:
        return [0, 1, 0, 0]
    elif age > 22 and age <= 30:
        return [0, 0, 1, 0]
    else:
        return [0, 0, 0, 1] 

#  map_gender will change the male and female into one-hot encoding  
def map_gender(gender):
    return [1, 0] if gender == 'Female' else [0, 1]

def preprocess(df):
    df_new = df.copy()
    df_new[['age_0', 'age_1', 'age_2', 'age_3']] = df_new['age'].apply(map_age).apply(pd.Series)
    df_new[['gender_0', 'gender_1']] = df_new['gender'].apply(map_gender).apply(pd.Series)
    return df_new.drop(profile_columns, axis=1)


# 应用预处理
train_pos_features = preprocess(train_pos_dataset)
train_neg_features = preprocess(train_neg_dataset)
val_pos_features = preprocess(validation_pos_data)
val_neg_features = preprocess(validation_neg_data)
test_pos_features = preprocess(test_pos_dataset)
test_neg_features = preprocess(test_neg_dataset)


testing_pos_features = preprocess(testing_pos_dataset)
testing_neg_features = preprocess(testing_neg_dataset)


In [5]:
# # # ---------------------------- 数据预处理 ----------------------------
# def map_age(age):
#     if age < 13:
#         return [1, 0]
#     elif age >= 13:
#         return [0, 1] 

# # ---------------------------- 数据预处理 ----------------------------
# def map_gender(gender):
#     return [1, 0] if gender == 'Female' else [0, 1]

# def preprocess(df):
#     df_new = df.copy()
    
#     df_new[['age_0', 'age_1']] = df_new['age'].apply(map_age).apply(pd.Series)
#     df_new[['gender_0', 'gender_1']] = df_new['gender'].apply(map_gender).apply(pd.Series)
#     return df_new.drop(profile_columns, axis=1)


# # 应用预处理
# train_pos_features = preprocess(train_pos_dataset)
# train_neg_features = preprocess(train_neg_dataset)
# val_pos_features = preprocess(validation_pos_data)
# val_neg_features = preprocess(validation_neg_data)
# test_pos_features = preprocess(test_pos_dataset)
# test_neg_features = preprocess(test_neg_dataset)


# testing_pos_features = preprocess(testing_pos_dataset)
# testing_neg_features = preprocess(testing_neg_dataset)


In [18]:
import enum

def get_onnxfile():
    outdir = './outdir'
    # outdir = './new_data_2024_09_20/outdir'
    onnx_files = glob.glob(os.path.join(outdir, '*.onnx'))
    # 如果找到的文件不为空
    if onnx_files:
        # 按文件名排序
        onnx_files.sort()

        # 选择最后一个文件
        last_onnx_file = onnx_files[-1]
        return last_onnx_file

onnx_model_path = get_onnxfile()
ort_session = ort.InferenceSession(onnx_model_path)

# 预测函数
def predict(features):
    inputs = {ort_session.get_inputs()[0].name: features}
    outputs = ort_session.run(None, inputs)
    return outputs[0]

for idx, pair_data in enumerate([[train_neg_dataset, train_neg_features], [train_pos_dataset, train_pos_features]]):
    if idx == 0:
        print("\nTest for Negative subjects:")
    else:
        print("\nTest for Positive subjects:")

    # 加载测试数据集
    # 计算每个 subject 的准确率
    subject_results = defaultdict(list)

    for (_, row_dataset), (_, features), in zip(pair_data[0].iterrows(), pair_data[1].iterrows()):
        subject = row_dataset['id']
        label = row_dataset['label']

        features = np.array(features, dtype=np.float32)  # 确保 features 的数据类型与模型输入匹配
        features = features.reshape(1, -1)  # 调整形状以匹配模型输入
        prediction = predict(features)
        predicted_label = (prediction >= 0.5).astype(int)  # 使用阈值 0.5 将预测概率转换为类标签
        subject_results[subject].append((predicted_label, label))

    subject_accuracies = {}
    subject_count = {}
    for subject, results in subject_results.items():
        correct = sum(1 for pred, label in results if pred == label)
        accuracy = correct / len(results)
        subject_accuracies[subject] = accuracy
        subject_count[subject] = len(results)

    # 输出每个 subject 的准确度
    correct_count = 0
    for (subject, accuracy),(_,count) in zip(subject_accuracies.items(), subject_count.items()):
        print(f'Subject: {subject}, Accuracy: {accuracy:.2f}, Sample Count: {count}')
        if accuracy > 0.5:
            correct_count += 1
        elif accuracy == 0.5:
            correct_count += 0.5
    print(f"subject accuracy: {(correct_count / len(subject_accuracies)):.2f}")


Test for Negative subjects:
Subject: 8ee348e4-45c8-a3b3-9b30-c0fb2ac25c92, Accuracy: 1.00, Sample Count: 15
Subject: 23d09035-1aa8-2fef-8065-f2addcf680d1, Accuracy: 1.00, Sample Count: 4
Subject: e1a05473-85af-4aaf-dff5-9a9a507b188d, Accuracy: 1.00, Sample Count: 15
Subject: 1949e035-2ac5-2dd1-c2f7-91d81ea9b546, Accuracy: 1.00, Sample Count: 14
Subject: 2f8a7e09-3897-4161-d964-13f9736e20aa, Accuracy: 1.00, Sample Count: 15
Subject: 5f9bfd68-ffc2-c961-4b39-d48a29d72b01, Accuracy: 0.18, Sample Count: 11
Subject: 6ebcfb5d-f938-f673-4fa1-c2323b393ba6, Accuracy: 1.00, Sample Count: 5
Subject: 8df51dda-6ebd-2c29-86b1-472960ceb38c, Accuracy: 1.00, Sample Count: 15
Subject: 179dad82-dee9-d423-da3d-4917bae95aa8, Accuracy: 1.00, Sample Count: 6
Subject: 8ae1e3fe-9ac7-5bc2-251c-62efe59be142, Accuracy: 1.00, Sample Count: 9
Subject: 3e3d3729-8cb7-bc9c-a93e-98b196dd9fad, Accuracy: 1.00, Sample Count: 12
Subject: 127d9913-29a2-25ff-d123-27423c776fd9, Accuracy: 1.00, Sample Count: 15
Subject: 0b20c8

In [12]:
for key, value in subject_results.items():
    print(key,len(value))

20240717_2A12 246
20240717_2A21 230
25342799-aff8-b14a-81eb-940a97ce65a7 71
0cf44683-dda2-b7fb-5d13-9151df4db1c5 31
59d9f43d-b1cb-5c9e-82b2-e6f649c67f20 24
f34fb6c0-c2f7-7367-3e25-d101c0605938 64
6a44c3c3-f41c-91a8-85d8-915156655f23 12
00212fb4-0488-59a8-2688-5fe74ee77781 42
dc7e8552-e3c4-4695-379c-10e144338a4b 64
bf102cd1-feed-4807-9ce7-d8bbfe9d0afd 31
2a4bd1c8-888b-915b-d645-1ae939a492c5 71
a4993294-3529-e653-3177-ce307357ea08 25
5f9070ae-5c60-fc6c-23f7-027734646ce4 61
d269758d-078b-c2d3-9996-1a54e129cba9 24
0f00efc4-65ef-cace-2e33-394ed0eaac10 71
5c941061-c729-0f7a-c1b1-6c5813bd8363 71
0db4808b-1953-08fd-c848-086386c6707c 70
850e5fd0-90a9-b081-8202-339998bb51cf 64
5d1ed612-d672-588d-91e9-19616832e65e 69
f1b5a636-2ab1-52de-df4b-648c041aaa19 71
513195bb-f9b4-1d3d-6829-4cdf70008d23 37
eeb255b5-6a3d-0f9f-2f96-7f14b74472a4 13
c9f6bffc-451c-2f44-b658-d1e15dac902f 61
9878443f-63d9-06a0-2314-c105d40cfbf0 4
711c0cea-be2d-f87e-57d9-05fd203426e9 64
0e88cac0-231c-e492-409c-8a5d2683b181 45
a793e

In [1]:
import pandas as pd


Student_path = r'C:\Users\Administrator\Desktop\Backend-Algorithm\Algorithm\PPG\ppg_depression\data_preprocess\new_data_2024_09_20\outdir\MiddleSchool_Depression_Anxiety.xlsx'
middle_file_dir = r'C:\Users\Administrator\Desktop\middle_school_data\ppg_data'
students = pd.read_excel(Student_path, sheet_name= 'MHT')
student_id = students['subjectid']
gender = students['gender']
age = students['age']

In [14]:
student_data = students[students['subjectid'] == '1787dc68-e640-2db3-0b3a-bfc0791f0f44']

In [19]:
s = student_data['gender'].tolist()[0]

In [20]:
s

'Male'

In [None]:
import sys
from service.ppg.ppg_algorithm_service import *
from Service.config import *
import pandas as pd


Student_path = r'C:\Users\Administrator\Desktop\Backend-Algorithm\Algorithm\PPG\ppg_depression\data_preprocess\new_data_2024_09_20\outdir\MiddleSchool_Depression_Anxiety.xlsx'
middle_file_dir = r'C:\Users\Administrator\Desktop\middle_school_data\ppg_data'
students = pd.read_excel(Student_path, sheet_name= 'MHT')
student_id = students['subjectid']
gender = students['gender']
age = students['age']


def load_wave_middleSchool(path):
    try:
        with open(path, 'r', encoding='utf-8') as file:  
            data = json.load(file)
        return data["ppg_raw_data"]
    except:
        print("Subject Not Loaded")
        pass

def generate_dataset(subject_collection):
    ignored_id = []
    dataset = pd.DataFrame()
    student_id = students['subjectid']
    gender = students['gender']
    age = students['age']
    for subject, g, a in zip(student_id, gender, age):
            # The university subjects.
            files = os.listdir(middle_file_dir)
            for file in files:
                if file.startswith(subject) and file.endswith('.json'):
                    file_path = os.path.join(middle_file_dir, file)
                    tmp_wave = load_wave_middleSchool(file_path)
                    print(f"Subject Wave {len(tmp_wave)}, Subject gender: {g}, Subject age: {a}")

            if len(tmp_wave) >= (sampling_frequency * data_minimum_minutes_threshold * 60):
                # Perform prediction
                prediction_results = ppg_get_pyschoindex(tmp_wave, 125, g, a)
                # Extract predictions
                depression_prediction = prediction_results.depression_prediction
                anxiety_prediction = prediction_results.anxiety_prediction
                stress_prediction = prediction_results.stress_prediction
                # vitality_prediction = prediction_results.vitality_prediction
                # insomnia_service = prediction_results.insomnia_prediction
                print(depression_prediction, anxiety_prediction, stress_prediction)

train_pos_dataset = generate_dataset(student_id)

In [None]:
def generate_dataset(student_id):
    Student_path = r'C:\Users\Administrator\Desktop\Backend-Algorithm\Algorithm\PPG\ppg_depression\data_preprocess\new_data_2024_09_20\outdir\MiddleSchool_Depression_Anxiety.xlsx'
    middle_file_dir = r'C:\Users\Administrator\Desktop\middle_school_data\ppg_data'
    students = pd.read_excel(Student_path, sheet_name= 'MHT')
    student_data = students[students['subjectid'] == student_id]
    gender = students['gender']
    age = students['age']
    files = os.listdir(middle_file_dir)
    for file in files:
        if file.startswith(student_id) and file.endswith('.json'):
            file_path = os.path.join(middle_file_dir, file)
            X = load_wave_middleSchool(file_path)
            print(f"Subject Wave {len(X)}, Subject gender: {gender}, Subject age: {age}")

