In [1]:
import pandas as pd
import plotly.express as px
from dash import Dash, html, dcc, Input, Output
import plotly.graph_objects as go
# 原始数据清洗和结构化
import numpy as np




In [2]:
# 原始数据内容
content = """
22  24  21  23  21  22  19  21  17  20  17  20

20  22  19  21  19  21  17  20  15  19  15  18  
19  21  17  20  17  19  15  18  13  17  13  17  
18  20  17  20  16  18  14  17  13  16  12  17  
17  19  15  19  15  17  13  16  11  15  10  15  
15  18  14  17  13  16  11  14  9  14  9  14  
14  17  13  16  13  15  10  14  9  13  8  13  
13  16  11  15  11  14  9  12  7  11  7  11  
11  14  9  13  7  12  6  10  5  9  4  9  
"""

# 按行分割并过滤空行
lines = [line.strip() for line in content.split('\n') if line.strip()]

# 转换为二维整数列表
data = []
for line in lines:
    elements = line.split()
    data.append([int(e) for e in elements])

# 创建DataFrame
df = pd.DataFrame(data)

In [3]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,22,24,21,23,21,22,19,21,17,20,17,20
1,20,22,19,21,19,21,17,20,15,19,15,18
2,19,21,17,20,17,19,15,18,13,17,13,17
3,18,20,17,20,16,18,14,17,13,16,12,17
4,17,19,15,19,15,17,13,16,11,15,10,15
5,15,18,14,17,13,16,11,14,9,14,9,14
6,14,17,13,16,13,15,10,14,9,13,8,13
7,13,16,11,15,11,14,9,12,7,11,7,11
8,11,14,9,13,7,12,6,10,5,9,4,9


In [11]:
data=pd.DataFrame(
    data=[
        [22, 24, 21, 23, 21, 22, 19, 21, 17, 20, 17, 20],
        [20, 22, 19, 21, 19, 21, 17, 20, 15, 19, 15, 18],
        [19, 21, 17, 20, 17, 19, 15, 18, 13, 17, 13, 17],
        [18, 20, 17, 20, 16, 18, 14, 17, 13, 16, 12, 17],
        [17, 19, 15, 19, 15, 17, 13, 16, 11, 15, 10, 15],
        [15, 18, 14, 17, 13, 16, 11, 14,  9, 14,  9, 14],
        [14, 17, 13, 16, 13, 15, 10, 14,  9, 13,  8, 13],
        [13, 16, 11, 15, 11, 14,  9, 12,  7, 11,  7, 11],
        [11, 14,  9, 13,  7, 12,  6, 10,  5,  9,  4,  9]
    ],
    index=[90,80,70,60,50,40,30,20,10],
    columns=["m_18-25","wm_18-25","m_26-35","wm_26-35","m_36-45","wm_36-45","m_46-55","wm_46-55","m_56-65","wm_56-65","m_over65","wm_over65"]
)
data.sort_index(inplace=True)
data

Unnamed: 0,m_18-25,wm_18-25,m_26-35,wm_26-35,m_36-45,wm_36-45,m_46-55,wm_46-55,m_56-65,wm_56-65,m_over65,wm_over65
10,11,14,9,13,7,12,6,10,5,9,4,9
20,13,16,11,15,11,14,9,12,7,11,7,11
30,14,17,13,16,13,15,10,14,9,13,8,13
40,15,18,14,17,13,16,11,14,9,14,9,14
50,17,19,15,19,15,17,13,16,11,15,10,15
60,18,20,17,20,16,18,14,17,13,16,12,17
70,19,21,17,20,17,19,15,18,13,17,13,17
80,20,22,19,21,19,21,17,20,15,19,15,18
90,22,24,21,23,21,22,19,21,17,20,17,20


In [29]:
# 重置索引并将数据转换为长格式
data_long = data.reset_index().melt(id_vars='index', var_name='category', value_name='value')

# 提取性别（首字母'm'为男性，'w'为女性）
data_long['gender'] = data_long['category'].str[0].map({'m': 'male', 'w': 'female'})

# 提取年龄段
data_long['age_group'] = data_long['category'].str.split('_').str[1]

# 可选：删除原category列并重新排列列顺序
data_long = data_long[['index', 'gender', 'age_group', 'value']]

data_long=data_long.rename(columns={"index":"ecdf"})

data_long

Unnamed: 0,ecdf,gender,age_group,value
0,10,male,18-25,11
1,20,male,18-25,13
2,30,male,18-25,14
3,40,male,18-25,15
4,50,male,18-25,17
...,...,...,...,...
103,50,female,over65,15
104,60,female,over65,17
105,70,female,over65,17
106,80,female,over65,18


In [30]:
def get_age_group(age):
    if age >= 66:
        return 'over65'
    elif 56 <= age <= 65:
        return '56-65'
    elif 46 <= age <= 55:
        return '46-55'
    elif 36 <= age <= 45:
        return '36-45'
    elif 26 <= age <= 35:
        return '26-35'
    elif 18 <= age <= 25:
        return '18-25'
    else:
        return None

def find_ecdf(age, gender, value, df):
    # 获取年龄分组
    age_group = get_age_group(age)
    if not age_group:
        return None  # 或处理年龄超出范围的情况
    
    # 筛选符合条件的行
    filtered_df = df[(df['gender'] == gender) & 
                     (df['age_group'] == age_group) & 
                     (df['value'] <= value)]
    
    if filtered_df.empty:
        return 10  # 没有找到匹配的行
    else:
        ecdf=filtered_df['ecdf'].max()
        # 返回第一个匹配的index列的值
        return ecdf

In [31]:
find_ecdf(18, 'male', 1, data_long)

10