## Neural Network

In [42]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras.backend as K
from tqdm import tqdm

In [43]:
#Amount of features inputted in NN
NUMBER_FEATURES = 8

#Gaussian Noise
USE_GAUSSIAN_NOISE = False
VALUE_GAUSSIAN_NOISE = 70 # Only needed when Gaussian noise = True


config = dict(NUMBER_FEATURES = NUMBER_FEATURES, USE_GAUSSIAN_NOISE = USE_GAUSSIAN_NOISE, 
              VALUE_GAUSSIAN_NOISE = VALUE_GAUSSIAN_NOISE)

In [None]:
use_gaussian_noise = config["USE_GAUSSIAN_NOISE"]
value_gaussian_noise = config["VALUE_GAUSSIAN_NOISE"]

if use_gaussian_noise:
    inp = tf.keras.layers.GaussianNoise(value_gaussian_noise)(inp)



In [52]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras.backend as K
from tqdm import tqdm

#Amount of features inputted in NN
NUMBER_FEATURES = 8

#Gaussian Noise
USE_GAUSSIAN_NOISE = False
VALUE_GAUSSIAN_NOISE = 70 # Only needed when Gaussian noise = True


config = dict(NUMBER_FEATURES = NUMBER_FEATURES, USE_GAUSSIAN_NOISE = USE_GAUSSIAN_NOISE, 
              VALUE_GAUSSIAN_NOISE = VALUE_GAUSSIAN_NOISE)

def build_model(config):
    size = config["NUMBER_FEATURES"]

    inp = tf.keras.layers.Input(shape=(1,size), name = "input_features")
    
    inp2 = tf.keras.layers.Input(shape = (1,1), name = "FVC_Start_Weeks_from_start")
    
    inputs = [inp]
    outputs = []
    
    x = tf.keras.layers.Dense(100, activation='relu')(inp)
    x = tf.keras.layers.Dense(75, activation='relu')(x)
    x = tf.keras.layers.Dense(50, activation='relu')(x)
    x = tf.keras.layers.Dense(25, activation='relu')(x)
    x = tf.keras.layers.Dense(15, activation='relu')(x)
    x = tf.keras.layers.Dense(10, activation='relu')(x)

    
    # output : [slope, s, FVC_start, weeks_from_start]
    outputs += [tf.keras.layers.Dense(2, name = "Output_a_s")(x)]

    model = tf.keras.Model(inputs = inputs, outputs = outputs)
    
    def Laplace_log_likelihood(y_true, y_pred):
        # y_pred = [slope, s, FVC_start, weeks_from_start]
        tf.dtypes.cast(y_true, tf.float32)
        tf.dtypes.cast(y_pred, tf.float32)
        
        slope = y_pred[:,0]
        s = y_pred[:,1]
        
        FVC_now = y_true[:,0]
        weeks_from_start = y_true[:,1]
        FVC_start = y_true[:,2]
        
        sigma = s * weeks_from_start
        
        # Kan probleem worden by ReLu omdat slope negatief wordt door minimalisering Loss
        FVC_pred = weeks_from_start * slope + FVC_start

        ## ** Hier kan een fout komen doordat de afgeleide moeilijker te berekenen is
        sigma_clip = tf.maximum(tf.abs(sigma), 70)*2
        delta = tf.abs(FVC_now - FVC_pred)
        delta = tf.minimum(delta, 1000)
        ## **
        
        sq2 = tf.sqrt(tf.dtypes.cast(2, dtype=tf.float32))
        loss = (delta / sigma_clip)*sq2 + tf.math.log(sigma_clip * sq2)
        return K.mean(loss)
    
    opt = tf.keras.optimizers.Adam(1e-5)
    
    model.compile(optimizer=opt, loss=Laplace_log_likelihood)#, loss_weights = [...])

    return model

In [53]:
model = build_model(config)
#tf.keras.utils.plot_model(model)
model.summary()

## Preparing the Data

In [None]:
df = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv')
alldatapoints = []
for patient in df.Patient.unique():
    patientperweek = []
    weeks = df.loc[df.Patient == patient]['Weeks']
    for week1 in weeks:
        dfnew = df.loc[(df.Patient == patient) & (df.Weeks != week1)]
        dfnew = dfnew.assign(Targetweek = week1)
        dfnew = dfnew.assign(TargetFVC = df.loc[(df.Patient == patient)&(df.Weeks == week1)]['FVC'].values[0])
        patientperweek.append(dfnew)
    alldatapoints.append(pd.concat(patientperweek))

train = pd.DataFrame(pd.concat(alldatapoints))    
train["Sex"] = (train['Sex']=="Male").astype(int)
train = pd.concat([train,pd.get_dummies(train['SmokingStatus'])],axis = 1).reset_index(drop = True)
labels = pd.DataFrame(train[["TargetFVC","Targetweek","FVC"]])
labels = labels.astype("float32")
train = train.drop(["SmokingStatus", "TargetFVC", "Patient"],axis = 1)

for i in range(len(train)):
    train.loc[i, "Targetweek"] = train.loc[i, "Targetweek"] - train.loc[i, "Weeks"]

In [None]:
data = {"input_features": train[["Weeks", "FVC", "Percent", "Age", "Sex", 
                                 "Currently smokes", "Ex-smoker", "Never smoked"]]
        , "FVC_Start_Weeks_from_start": train["Targetweek"]}

In [None]:
# from sklearn.model_selection import train_test_split
# x_train, x_valid, y_train, y_valid = train_test_split(data["input_features"], labels, test_size=0.33, shuffle= True)

# Hierbegint nieuwe patient dus soort van stratified
x_train = data["input_features"][:10004]
y_train = labels[:10004]

x_val = data["input_features"][10004:]
y_val = labels[10004:]

In [None]:
model.fit(x_train, 
          y_train, validation_data = (x_val,y_val),epochs = 20)

In [None]:
a = model.predict(data["input_features"])
sns.distplot(a[:,0])
sns.distplot(a[:,1])