In [1]:
import pandas as pd
import numpy as np
import os
from etnn.data.prepare_ferris_wheel import prepare_1_ferris, generate_ferris_dataset
from etnn.data.ferris_score_helpers import build_wheel_happyness
from tqdm import tqdm

In [2]:
df_health = prepare_1_ferris(
    dataset_path=".",
    df_name_output="health_dataset_preprocessed-1.csv",
    df_name_input="Sleep_health_and_lifestyle_dataset.csv",
    try_pregen=True
)

In [3]:
df_health.head()

Unnamed: 0,id,age,occupation,sleep_duration,sleep_quality,physical_activity,stress_level,bmi,heart_rate,daily_steps,sleep_disorder,blood_pressure1,blood_pressure2,gender_male,gender_female,gender_other
0,1,27,1,6.1,6,42,6,2,77,4200,0,126,83,True,False,False
1,2,28,2,6.2,6,60,8,0,75,10000,0,125,80,True,False,False
2,3,28,2,6.2,6,60,8,0,75,10000,0,125,80,True,False,False
3,4,28,3,5.9,4,30,8,3,85,3000,1,140,90,True,False,False
4,5,28,3,5.9,4,30,8,3,85,3000,1,140,90,True,False,False


In [4]:
# take group of ids from persons and state how the label for this group shall be calculated

In [5]:
# define ferris wheel
num_gondolas = 10
num_part_pg = 5

In [6]:
# generate sample element
random_order = np.arange(len(df_health)) + 1
np.random.shuffle(random_order)

In [7]:
example = [
    random_order[i*num_part_pg:(i+1)*num_part_pg]
    for i in range(num_gondolas)
]

In [8]:
example

[array([ 23,  38, 261, 206,  80]),
 array([286, 337, 233, 279, 126]),
 array([293, 198,  45, 303, 304]),
 array([ 75, 225,  21, 224, 157]),
 array([342,  47,  56, 173, 131]),
 array([ 97, 209, 288, 164, 356]),
 array([367, 260,  43, 355, 311]),
 array([146, 138, 332, 102,  20]),
 array([192, 129, 189, 175, 195]),
 array([179,  91, 309, 152, 235])]

In [9]:
random_order[:num_gondolas*num_part_pg].reshape(num_gondolas, num_part_pg)

array([[ 23,  38, 261, 206,  80],
       [286, 337, 233, 279, 126],
       [293, 198,  45, 303, 304],
       [ 75, 225,  21, 224, 157],
       [342,  47,  56, 173, 131],
       [ 97, 209, 288, 164, 356],
       [367, 260,  43, 355, 311],
       [146, 138, 332, 102,  20],
       [192, 129, 189, 175, 195],
       [179,  91, 309, 152, 235]])

Rules:
- People being happy with other people in same gondola
    + Age composition too seperated is bad
    + shift in gender is bad if too much, 50-50 is good or all one gender
    + same with age composition
    + sleep derived(multiplier with quality) persons get a subtraction and 'good sleepers' get bonus (sleep disorder counts as stronger subtraction)
    + higher heart rate and pressure = joy or fear
    + composition of persons in regards to bmi : extreme values make others (no exception for group all those as to many underweight or overweight persons may be awquard as well)
- People being happy with neighboring gondolas composition
    + same age gets bonus, none gets penalty as potentially group is separated
    + gap between happyness index between self and neighbors causes it to produce a mean of only the neighbors

In [10]:
build_wheel_happyness(df_health, example)

68.47836000000001

In [11]:
build_wheel_happyness(df_health, random_order[:num_gondolas * num_part_pg].reshape(num_gondolas, num_part_pg))

68.47836000000001

In [12]:
# define dataset size
num_to_generate = 10_000
# define ferris wheel
num_gondolas = 10
num_part_pg = 5

In [13]:
df_index, df_health = generate_ferris_dataset(
    num_gondolas=num_gondolas,
    num_part_pg=num_part_pg,
    num_to_generate=num_to_generate,
    dataset_path=".",
    df_intermediate_output_name="health_dataset_preprocessed-1.csv",
    df_name_input="Sleep_health_and_lifestyle_dataset.csv",
    try_pregen=True
)

In [14]:
df_index.head()

Unnamed: 0,g-0_p-0,g-0_p-1,g-0_p-2,g-0_p-3,g-0_p-4,g-1_p-0,g-1_p-1,g-1_p-2,g-1_p-3,g-1_p-4,...,g-8_p-1,g-8_p-2,g-8_p-3,g-8_p-4,g-9_p-0,g-9_p-1,g-9_p-2,g-9_p-3,g-9_p-4,label
0,0,1,2,3,4,5,6,7,8,9,...,41,42,43,44,45,46,47,48,49,88.193557
1,0,1,2,3,4,5,6,7,8,9,...,41,42,43,44,45,46,47,48,49,88.193557
2,0,1,2,3,4,5,6,7,8,9,...,41,42,43,44,45,46,47,48,49,88.193557
3,0,1,2,3,4,5,6,7,8,9,...,41,42,43,44,45,46,47,48,49,88.193557
4,0,1,2,3,4,5,6,7,8,9,...,41,42,43,44,45,46,47,48,49,88.193557


In [15]:
df_health.head()

Unnamed: 0,id,age,occupation,sleep_duration,sleep_quality,physical_activity,stress_level,bmi,heart_rate,daily_steps,sleep_disorder,blood_pressure1,blood_pressure2,gender_male,gender_female,gender_other
0,1,27,1,6.1,6,42,6,2,77,4200,0,126,83,True,False,False
1,2,28,2,6.2,6,60,8,0,75,10000,0,125,80,True,False,False
2,3,28,2,6.2,6,60,8,0,75,10000,0,125,80,True,False,False
3,4,28,3,5.9,4,30,8,3,85,3000,1,140,90,True,False,False
4,5,28,3,5.9,4,30,8,3,85,3000,1,140,90,True,False,False


In [16]:
df_index.iloc[:, :-1]

Unnamed: 0,g-0_p-0,g-0_p-1,g-0_p-2,g-0_p-3,g-0_p-4,g-1_p-0,g-1_p-1,g-1_p-2,g-1_p-3,g-1_p-4,...,g-8_p-0,g-8_p-1,g-8_p-2,g-8_p-3,g-8_p-4,g-9_p-0,g-9_p-1,g-9_p-2,g-9_p-3,g-9_p-4
0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
1,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
2,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
3,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
4,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
9996,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
9997,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
9998,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
