In [1]:
import pandas as pd
import numpy as np
import os
from etnn.data.prepare_ferris_wheel import prepare_1_ferris

In [2]:
df = prepare_1_ferris(
    dataset_path=".",
    df_name_output="health_dataset_preprocessed-1.csv",
    df_name_input="Sleep_health_and_lifestyle_dataset.csv",
    try_pregen=True
)

In [3]:
df.head()

Unnamed: 0,id,age,occupation,sleep_duration,sleep_quality,physical_activity,stress_level,bmi,heart_rate,daily_steps,sleep_disorder,blood_pressure1,blood_pressure2,gender_male,gender_female,gender_other
0,1,27,1,6.1,6,42,6,2,77,4200,0,126,83,True,False,False
1,2,28,2,6.2,6,60,8,0,75,10000,0,125,80,True,False,False
2,3,28,2,6.2,6,60,8,0,75,10000,0,125,80,True,False,False
3,4,28,3,5.9,4,30,8,3,85,3000,1,140,90,True,False,False
4,5,28,3,5.9,4,30,8,3,85,3000,1,140,90,True,False,False


In [4]:
# take group of ids from persons and state how the label for this group shall be calculated

In [5]:
# define ferris wheel
num_gondolas = 10
num_part_pg = 5

In [6]:
# generate sample element
random_order = np.arange(len(df))+1
np.random.shuffle(random_order)

In [7]:
example = [
    random_order[i*num_part_pg:(i+1)*num_part_pg]
    for i in range(num_gondolas)
]

In [8]:
example

[array([ 53, 331, 367, 229, 248]),
 array([192, 179,  92, 199, 258]),
 array([213, 197, 174, 158,  72]),
 array([ 38, 147, 290, 165,  41]),
 array([339, 146, 134, 283,  95]),
 array([190, 171, 113, 268, 247]),
 array([266,  37,  78, 310,  75]),
 array([334, 116, 284, 340,  97]),
 array([250, 177, 256, 299,  33]),
 array([161, 245, 209, 234, 132])]

Rules:
- People being happy with other people in same gondola
    + Age composition too seperated is bad
    + shift in gender is bad if too much, 50-50 is good or all one gender
    + same with age composition
    + sleep derived(multiplier with quality) persons get a subtraction and 'good sleepers' get bonus (sleep disorder counts as stronger subtraction)
    + higher heart rate and pressure = joy or fear
    + composition of persons in regards to bmi : extreme values make others (no exception for group all those as to many underweight or overweight persons may be awquard as well)
- People being happy with neighboring gondolas composition
    + same age gets bonus, none gets penalty as potentially group is separated
    + gap between happyness index between self and neighbors causes it to produce a mean of only the neighbors

In [9]:
def age_composition_score(
        df_elements: pd.DataFrame,
        id_list
) -> float:
    df_subset = df_elements[df_elements.id.isin(id_list)]

    score = 0

    age = df_subset.age.to_numpy()
    sorted_age = np.sort(age)

    # check if in 10 year gap
    gap = sorted_age[-1]-sorted_age[0]
    # print(gap)
    if gap <= 10:
        score = 1
        return score
    else:
        x = gap - 10
        score = (-1/60)*x + 1
        return score

In [10]:
def gender_composition_score(
        df_elements: pd.DataFrame,
        id_list
) -> float:
    df_sub = df_elements[df_elements.id.isin(id_list)]
    # other genders are considered neutral
    male = sum(df_sub.gender_male)
    female = sum(df_sub.gender_female)
    gsum = male + female

    return 2*min(male/gsum, female/gsum) if (male and female) else 1

In [11]:
def sleep_composition_score(
        df_elements: pd.DataFrame,
        id_list
) -> float:
    df_sub = df_elements[df_elements.id.isin(id_list)]

    sleep_d_mean = df_sub.sleep_duration.mean()
    sleep_q_mean = df_sub.sleep_quality.mean()

    return min((sleep_q_mean/10)*(sleep_d_mean/7.5), 1)

In [12]:
def bmi_composition_score(
        df_elements: pd.DataFrame,
        id_list
) -> float:
    df_sub = df_elements[df_elements.id.isin(id_list)]

    bmi = df_sub.bmi.to_numpy()
    bmi = np.where(bmi<2, 0, (bmi-1)/2)
    score = 1-np.sum(bmi)/len(df_sub)

    return score

In [13]:
def fear_composition_score(
        df_elements: pd.DataFrame,
        id_list
) -> float:
    df_sub = df_elements[df_elements.id.isin(id_list)]

    blood_value = (df_sub.blood_pressure1 * df_sub.blood_pressure2 * df_sub.heart_rate).max()
    # print(blood_value)
    border = 900000

    if blood_value >= border:
        return 0
    else:
        return 1

In [14]:
def build_gondola_score(
        df_elements: pd.DataFrame,
        id_list
) -> float:
    age_score = age_composition_score(df_elements, id_list)
    gender_score = gender_composition_score(df_elements, id_list)
    sleep_score = sleep_composition_score(df_elements, id_list)
    bmi_score = bmi_composition_score(df_elements, id_list)
    fear_score = fear_composition_score(df_elements, id_list)

    return 2.5*age_score + 2*gender_score + 0.5*sleep_score + 2*bmi_score + 3*fear_score

In [15]:
for i in range(len(example)):
    print(build_gondola_score(df, example[i]))

5.156466666666667
8.022933333333333
9.63888
5.0754
5.933493333333333
5.724026666666667
5.494626666666667
6.118600000000001
8.658266666666666
9.43408


In [16]:
def build_wheel_happyness(
        df_elements: pd.DataFrame,
        wheel
) -> float:

    individual_scores = [
        build_gondola_score(df_elements, wheel[i])
        for i in range(len(wheel))
    ]
    neighbor_scores = [
        (individual_scores[(i-1)%len(wheel)] + individual_scores[(i+1)%len(wheel)]) / 2
        for i in range(len(wheel))
    ]
    age_mean = [
        df_elements[df_elements.isin(wheel[i])].age.mean()
        for i in range(len(wheel))
    ]
    age_group_bonus = [
        abs(age_mean[(i-1)%len(wheel)] - age_mean[i]) < 5 or abs(age_mean[(i+1)%len(wheel)] - age_mean[i]) < 5
        for i in range(len(wheel))
    ]

    return sum([
        min(
            (own + foreign) / 2 + 2*bonus,
            10
        )
        for own, foreign, bonus in zip(individual_scores, neighbor_scores, age_group_bonus)
    ])

In [17]:
build_wheel_happyness(df, example)

69.25677333333333