In [9]:
import pandas as pd
import numpy as np
import os
from etnn.data.prepare_ferris_wheel import prepare_1_ferris

In [2]:
df = prepare_1_ferris(
    dataset_path=".",
    df_name_output="health_dataset_preprocessed-1.csv",
    df_name_input="Sleep_health_and_lifestyle_dataset.csv",
    try_pregen=True
)

In [3]:
df.head()

Unnamed: 0,id,age,occupation,sleep_duration,sleep_quality,physical_activity,stress_level,bmi,heart_rate,daily_steps,sleep_disorder,blood_pressure1,blood_pressure2,gender_male,gender_female,gender_other
0,1,27,1,6.1,6,42,6,2,77,4200,0,126,83,True,False,False
1,2,28,2,6.2,6,60,8,0,75,10000,0,125,80,True,False,False
2,3,28,2,6.2,6,60,8,0,75,10000,0,125,80,True,False,False
3,4,28,3,5.9,4,30,8,3,85,3000,1,140,90,True,False,False
4,5,28,3,5.9,4,30,8,3,85,3000,1,140,90,True,False,False


In [4]:
# take group of ids from persons and state how the label for this group shall be calculated

In [5]:
# define ferris wheel
num_gondolas = 10
num_part_pg = 5

In [15]:
# generate sample element
random_order = np.arange(len(df))+1
np.random.shuffle(random_order)

In [19]:
example = [
    random_order[i*num_part_pg:(i+1)*num_part_pg]
    for i in range(num_gondolas)
]

In [20]:
example

[array([156, 354,  93, 365,  33]),
 array([ 46, 235,  53, 108, 106]),
 array([124,  52,  40,  24, 308]),
 array([ 16, 142, 215,  63, 260]),
 array([168, 211,  75,  73,  89]),
 array([227,  69, 261, 311, 178]),
 array([295, 189, 197, 107,  21]),
 array([  6, 202, 222, 327, 276]),
 array([242, 246, 259,  86,  11]),
 array([351, 371, 281, 198,  22])]

Rules:
- People being happy with other people in same gondola
    + Age composition too seperated is bad
    + shift in gender is bad if too much, 50-50 is good or all one gender
    + same with age composition
    + sleep derived(multiplier with quality) persons get a subtraction and 'good sleepers' get bonus (sleep disorder counts as stronger subtraction)
    + higher heart rate and pressure = joy or fear
    + composition of persons in regards to bmi : extreme values make others (no exception for group all those as to many underweight or overweight persons may be awquard as well)
- People being happy with neighboring gondolas composition
    + same age gets bonus, none gets penalty as potentially group is separated
    + gap between happyness index between self and neighbors causes it to produce a mean of only the neighbors

In [35]:
def age_composition_score(
        df_elements: pd.DataFrame,
        id_list
) -> float:
    df_subset = df_elements[df_elements.id.isin(id_list)]

    score = 0

    age = df_subset.age.to_numpy()
    sorted_age = np.sort(age)

    # check if in 10 year gap
    gap = sorted_age[-1]-sorted_age[0]
    print(gap)
    if gap <= 10:
        score = 1
        return score
    else:
        x = gap - 10
        score = (-1/60)*x + 1
        return score

In [43]:
def gender_composition_score(
        df_elements: pd.DataFrame,
        id_list
) -> float:
    df_sub = df[df.id.isin(example[0])]
    # other genders are considered neutral
    male = sum(df_sub.gender_male)
    female = sum(df_sub.gender_female)
    gsum = male + female

    return 2*min(male/gsum, female/gsum) if (male and female) else 1

In [53]:
def sleep_score(
        df_elements: pd.DataFrame,
        id_list
) -> float:
    df_sub = df[df.id.isin(example[0])]

    sleep_d_mean = df_sub.sleep_duration.mean()
    sleep_q_mean = df_sub.sleep_quality.mean()

    return min((sleep_q_mean/10)*(sleep_d_mean/7.5), 1)

In [23]:
df_subset = df[df.id.isin(example[0])]

In [24]:
df_subset

Unnamed: 0,id,age,occupation,sleep_duration,sleep_quality,physical_activity,stress_level,bmi,heart_rate,daily_steps,sleep_disorder,blood_pressure1,blood_pressure2,gender_male,gender_female,gender_other
32,33,31,5,7.9,8,75,4,1,69,6800,0,117,76,False,True,False
92,93,35,1,7.5,8,60,5,1,70,8000,0,120,80,True,False,False
155,156,39,9,7.2,8,60,5,0,68,8000,0,130,85,True,False,False
353,354,58,5,8.0,9,75,3,2,68,7000,1,140,95,False,True,False
364,365,59,5,8.0,9,75,3,2,68,7000,1,140,95,False,True,False


In [26]:
age = df_subset.age.to_numpy()

In [27]:
sorted_age = np.sort(age)

array([31, 35, 39, 58, 59], dtype=int64)

In [36]:
age_composition_score(df, example[0])

28


0.7

In [44]:
gender_composition_score(df, example[0])

0.8

In [54]:
sleep_score(df, example[0])

0.8646400000000002

In [48]:
df.sleep_duration.mean()

7.132085561497325

In [50]:
df.sleep_quality.mean()

7.31283422459893