#### This Python Notebook is used to generate Causal Hans Example Distributions (Static, Static Temporal).

In [1]:
import pickle, os
import pandas as pd
import numpy as np
os.makedirs('tmp_data', exist_ok=True)

#### Dynamic Context, We are here specially interested in two different contexts.

We continue similar with the T-SCE Dataset for Ages > 25 years,  and change it for Ages <= 25 years.

```
Structure (Type np.Array):
[
    Patient Level
    [   
        Patient Timestep Iterations
        [a,f,h,m], [],
    ], 
    [
        [], [],
    ], 

]
```

In [2]:
num_samples = 10000
noise = 0.03
infl_past = 0.6
infl_present = 0.4
n_timesteps = 50

def age(size):
    return np.random.uniform(low=15, high=65, size=size) # We need to change the Age Distribution otherwise it wont change anything..

def food_habit(age):
    return 0.5 * age

def health(age, food_habit):
    return -0.2 * age + 0.6 * food_habit

def mobility(health):
    return 0.5 * health

def add_noise(values, scale_factor):
    avg_value = np.mean(values)
    scale = avg_value * scale_factor
    return values + np.random.normal(loc=0, scale=scale, size=len(values))

# Generating the initial dataset for t=0
age_0 = age(num_samples)
food_habit_0 = food_habit(age_0)
food_habit_0 = add_noise(food_habit_0, noise)
health_0 = health(age_0, food_habit_0)
health_0 = add_noise(health_0, noise)
mobility_0 = mobility(health_0)
mobility_0 = add_noise(mobility_0, noise)

data_0 = pd.DataFrame({"A": age_0, "F": food_habit_0, "H": health_0, "M": mobility_0})

# Functions for generating data for t>0
def f1(Ap):
    return Ap + 1

def f2(Fp, Ac):
    calc = [] 
    for i in range(len(Fp)):
        if Ac[i] <= 25:
            calc.append(infl_past * Fp[i] + infl_present * (-0.5 * Ac[i]))
        else:
            calc.append(infl_past * Fp[i] + infl_present * (0.5 * Ac[i]))
    calc = np.array(calc)
    return calc

def f3(Hp, Ac, Fc):
    return infl_past * Hp + infl_present * (-0.2 * Ac + 0.6 * Fc)

def f4(Mp, Hc):
    return infl_past * Mp + infl_present * (0.5 * Hc)

data = [[i] for i in data_0.to_numpy()]
for _ in range(n_timesteps):
    last_data = [data[patientID][-1] for patientID in range(len(data))]
    Ap, Fp, Hp, Mp = zip(*last_data)
    Ac = f1(np.array(Ap))
    Fc = f2(np.array(Fp), Ac)
    Fc = add_noise(Fc, noise)
    Hc = f3(np.array(Hp), Ac, Fc)
    Hc = add_noise(Hc, noise)
    Mc = f4(np.array(Mp), Hc)
    Mc = add_noise(Mc, noise)
    next_timestep = [[a, f, h, m] for a, f, h, m in zip(Ac, Fc, Hc, Mc)]
    for patientID in range(len(data)):
        data[patientID].append(next_timestep[patientID])

extended_data = np.array(data)
pickle.dump(extended_data, open(os.path.join("tmp_data", "dyntsce_causalhans.pkl"), "wb"))