In [1]:
import pandas as pd
import numpy as np
import os
from etnn.data.prepare_ferris_wheel import prepare_1_ferris, generate_ferris_dataset
from etnn.data.ferris_score_helpers import build_wheel_happyness
from tqdm import tqdm
import torch

In [2]:
df_health = prepare_1_ferris(
    dataset_path=".",
    df_name_output="health_dataset_preprocessed-1.csv",
    df_name_input="Sleep_health_and_lifestyle_dataset.csv",
    try_pregen=True
)

In [3]:
df_health.head()

Unnamed: 0,id,age,occupation,sleep_duration,sleep_quality,physical_activity,stress_level,bmi,heart_rate,daily_steps,sleep_disorder,blood_pressure1,blood_pressure2,gender_male,gender_female,gender_other
0,1,27,1,6.1,6,42,6,2,77,4200,0,126,83,True,False,False
1,2,28,2,6.2,6,60,8,0,75,10000,0,125,80,True,False,False
2,3,28,2,6.2,6,60,8,0,75,10000,0,125,80,True,False,False
3,4,28,3,5.9,4,30,8,3,85,3000,1,140,90,True,False,False
4,5,28,3,5.9,4,30,8,3,85,3000,1,140,90,True,False,False


In [4]:
# take group of ids from persons and state how the label for this group shall be calculated

In [5]:
# define ferris wheel
num_gondolas = 10
num_part_pg = 5

In [6]:
# generate sample element
random_order = np.arange(len(df_health)) + 1
np.random.shuffle(random_order)

In [7]:
example = [
    random_order[i*num_part_pg:(i+1)*num_part_pg]
    for i in range(num_gondolas)
]

In [8]:
example

[array([156, 290, 276, 294,  41]),
 array([149, 346,  50, 360, 181]),
 array([ 30, 238, 214, 171,  72]),
 array([230, 247, 307,  26, 126]),
 array([ 84, 223, 106, 243,  69]),
 array([229,  49, 336, 179, 272]),
 array([101, 368, 116, 244, 349]),
 array([313, 155, 204, 246, 197]),
 array([219,  22, 359, 255, 232]),
 array([107,  99, 366, 242,  81])]

In [9]:
random_order[:num_gondolas*num_part_pg].reshape(num_gondolas, num_part_pg)

array([[156, 290, 276, 294,  41],
       [149, 346,  50, 360, 181],
       [ 30, 238, 214, 171,  72],
       [230, 247, 307,  26, 126],
       [ 84, 223, 106, 243,  69],
       [229,  49, 336, 179, 272],
       [101, 368, 116, 244, 349],
       [313, 155, 204, 246, 197],
       [219,  22, 359, 255, 232],
       [107,  99, 366, 242,  81]])

Rules:
- People being happy with other people in same gondola
    + Age composition too seperated is bad
    + shift in gender is bad if too much, 50-50 is good or all one gender
    + same with age composition
    + sleep derived(multiplier with quality) persons get a subtraction and 'good sleepers' get bonus (sleep disorder counts as stronger subtraction)
    + higher heart rate and pressure = joy or fear
    + composition of persons in regards to bmi : extreme values make others (no exception for group all those as to many underweight or overweight persons may be awquard as well)
- People being happy with neighboring gondolas composition
    + same age gets bonus, none gets penalty as potentially group is separated
    + gap between happyness index between self and neighbors causes it to produce a mean of only the neighbors

In [10]:
build_wheel_happyness(df_health, example)

64.32137333333334

In [11]:
build_wheel_happyness(df_health, random_order[:num_gondolas * num_part_pg].reshape(num_gondolas, num_part_pg))

64.32137333333334

In [12]:
# define dataset size
num_to_generate = 10_000
# define ferris wheel
num_gondolas = 10
num_part_pg = 5

In [13]:
df_index, df_health = generate_ferris_dataset(
    num_gondolas=num_gondolas,
    num_part_pg=num_part_pg,
    num_to_generate=num_to_generate,
    dataset_path=".",
    df_intermediate_output_name="health_dataset_preprocessed-1.csv",
    df_name_input="Sleep_health_and_lifestyle_dataset.csv",
    try_pregen=True
)

In [14]:
df_index.head()

Unnamed: 0,g-0_p-0,g-0_p-1,g-0_p-2,g-0_p-3,g-0_p-4,g-1_p-0,g-1_p-1,g-1_p-2,g-1_p-3,g-1_p-4,...,g-8_p-1,g-8_p-2,g-8_p-3,g-8_p-4,g-9_p-0,g-9_p-1,g-9_p-2,g-9_p-3,g-9_p-4,label
0,263,190,97,234,34,355,252,8,146,310,...,314,264,91,22,286,16,233,222,116,68.63584
1,337,175,94,10,160,247,363,52,63,70,...,180,364,95,186,164,37,113,97,55,58.485253
2,289,98,74,20,236,286,214,212,171,213,...,75,187,273,76,263,116,253,167,64,63.481333
3,14,276,92,105,289,353,59,225,74,317,...,242,287,73,130,343,85,265,217,82,60.01896
4,189,259,33,285,359,166,250,297,255,14,...,47,257,106,4,306,120,161,6,36,66.660397


In [15]:
df_health.head()

Unnamed: 0,id,age,occupation,sleep_duration,sleep_quality,physical_activity,stress_level,bmi,heart_rate,daily_steps,sleep_disorder,blood_pressure1,blood_pressure2,gender_male,gender_female,gender_other
0,1,27,1,6.1,6,42,6,2,77,4200,0,126,83,True,False,False
1,2,28,2,6.2,6,60,8,0,75,10000,0,125,80,True,False,False
2,3,28,2,6.2,6,60,8,0,75,10000,0,125,80,True,False,False
3,4,28,3,5.9,4,30,8,3,85,3000,1,140,90,True,False,False
4,5,28,3,5.9,4,30,8,3,85,3000,1,140,90,True,False,False


In [16]:
df_index.iloc[:, :-1]

Unnamed: 0,g-0_p-0,g-0_p-1,g-0_p-2,g-0_p-3,g-0_p-4,g-1_p-0,g-1_p-1,g-1_p-2,g-1_p-3,g-1_p-4,...,g-8_p-0,g-8_p-1,g-8_p-2,g-8_p-3,g-8_p-4,g-9_p-0,g-9_p-1,g-9_p-2,g-9_p-3,g-9_p-4
0,263,190,97,234,34,355,252,8,146,310,...,298,314,264,91,22,286,16,233,222,116
1,337,175,94,10,160,247,363,52,63,70,...,99,180,364,95,186,164,37,113,97,55
2,289,98,74,20,236,286,214,212,171,213,...,121,75,187,273,76,263,116,253,167,64
3,14,276,92,105,289,353,59,225,74,317,...,234,242,287,73,130,343,85,265,217,82
4,189,259,33,285,359,166,250,297,255,14,...,27,47,257,106,4,306,120,161,6,36
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,126,83,109,151,264,89,317,71,330,236,...,345,6,340,125,69,207,66,247,290,245
9996,285,96,190,287,31,267,40,346,271,168,...,193,177,268,309,78,369,107,188,339,23
9997,371,167,197,198,238,368,322,276,173,280,...,190,111,74,330,176,162,154,6,187,289
9998,153,309,223,58,86,206,96,77,289,240,...,11,337,253,182,144,21,94,246,157,230


In [17]:
class FerrisWheelDataset(torch.utils.data.Dataset):
    def __init__(self, df_health, df_index):
        self.df_health = df_health
        self.df_index = df_index

        self.df_health.set_index('id', inplace=True)

    def __len__(self):
        return len(self.df_index)

    def __getitem__(self, idx):
        # Get the ID from the id_frame
        p_ids = self.df_index.iloc[idx, :-1]

        # Use the ID to index into the data_frame
        data = self.df_health.loc[p_ids]

        # Get the label from the last column of id_frame
        label = self.df_index.iloc[idx, -1]

        return torch.tensor(data.to_numpy(float)), torch.tensor(label)


In [18]:
dataset = FerrisWheelDataset(df_health, df_index)

In [19]:
dataset[0]

(tensor([[4.5000e+01, 4.0000e+00, 6.6000e+00, 7.0000e+00, 4.5000e+01, 4.0000e+00,
          2.0000e+00, 6.5000e+01, 6.0000e+03, 0.0000e+00, 1.3500e+02, 9.0000e+01,
          0.0000e+00, 1.0000e+00, 0.0000e+00],
         [4.3000e+01, 1.0000e+01, 6.5000e+00, 6.0000e+00, 4.5000e+01, 7.0000e+00,
          2.0000e+00, 7.2000e+01, 6.0000e+03, 2.0000e+00, 1.3000e+02, 8.5000e+01,
          1.0000e+00, 0.0000e+00, 0.0000e+00],
         [3.6000e+01, 7.0000e+00, 7.2000e+00, 8.0000e+00, 6.0000e+01, 4.0000e+00,
          0.0000e+00, 6.8000e+01, 7.0000e+03, 0.0000e+00, 1.1500e+02, 7.5000e+01,
          0.0000e+00, 1.0000e+00, 0.0000e+00],
         [4.4000e+01, 1.0000e+01, 6.3000e+00, 6.0000e+00, 4.5000e+01, 7.0000e+00,
          2.0000e+00, 7.2000e+01, 6.0000e+03, 2.0000e+00, 1.3000e+02, 8.5000e+01,
          1.0000e+00, 0.0000e+00, 0.0000e+00],
         [3.1000e+01, 2.0000e+00, 6.1000e+00, 6.0000e+00, 3.0000e+01, 8.0000e+00,
          0.0000e+00, 7.2000e+01, 5.0000e+03, 0.0000e+00, 1.2500e+02, 8.00