In [None]:
import pandas as pd 
import random 
import os
import numpy as np 
import matplotlib.pyplot as plt 
%matplotlib inline 
import statsmodels.api as sm 
import tensorflow as tf 
from sklearn import ensemble 
import tensorflow.keras.backend as K
import tensorflow.keras.layers as L
import tensorflow.keras.models as M
from tqdm import tqdm 
from sklearn.model_selection import train_test_split 
import seaborn as sns
from tensorflow import keras 
#! conda install -c conda-forge gdcm -y
#! pip install pylibjpeg pylibjpeg-libjpe

In [None]:
import tensorflow as tf
import tensorflow.keras.backend as K
import tensorflow.keras.layers as L
import tensorflow.keras.models as M

In [None]:
def seed_everything(seed=2020):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
seed_everything(42)

In [None]:
ROOT = "../input/osic-pulmonary-fibrosis-progression"
BATCH_SIZE=128

In [None]:
tr = pd.read_csv(f"{ROOT}/train.csv")
tr.drop_duplicates(keep=False, inplace=True, subset=['Patient','Weeks'])
chunk = pd.read_csv(f"{ROOT}/test.csv")

print("add infos")
sub = pd.read_csv(f"{ROOT}/sample_submission.csv")
sub['Patient'] = sub['Patient_Week'].apply(lambda x:x.split('_')[0])
sub['Weeks'] = sub['Patient_Week'].apply(lambda x: int(x.split('_')[-1]))
sub =  sub[['Patient','Weeks','Confidence','Patient_Week']]
sub = sub.merge(chunk.drop('Weeks', axis=1), on="Patient")

In [None]:
tr.head()

In [None]:
sub

In [None]:
tr['WHERE'] = 'train'
chunk['WHERE'] = 'val'
sub['WHERE'] = 'test'
data = tr.append([chunk, sub])

In [None]:
print(tr.shape, chunk.shape, sub.shape, data.shape)
print(tr.Patient.nunique(), chunk.Patient.nunique(), sub.Patient.nunique(), 
      data.Patient.nunique())
#

In [None]:
data['min_week'] = data['Weeks']
data.loc[data.WHERE=='test','min_week'] = np.nan
data['min_week'] = data.groupby('Patient')['min_week'].transform('min')
data[(data["Patient"]=="ID00426637202313170790466") & (data["WHERE"]!="test")].min()


In [None]:
base = data.loc[data.Weeks == data.min_week]
base = base[['Patient','FVC']].copy()
base.columns = ['Patient','min_FVC']
base['nb'] = 1
base['nb'] = base.groupby('Patient')['nb'].transform('cumsum')
base = base[base.nb==1]
base.drop('nb', axis=1, inplace=True)

In [None]:
data = data.merge(base, on='Patient', how='left')
data['base_week'] = data['Weeks'] - data['min_week']
del base
data.head()

In [None]:
COLS = ['Sex','SmokingStatus'] #,'Age'
FE = []
for col in COLS:
    for mod in data[col].unique():
        FE.append(mod)
        data[mod] = (data[col] == mod).astype(int)
#=================
data.head()

In [None]:
#
data['age'] = (data['Age'] - data['Age'].min() ) / ( data['Age'].max() - data['Age'].min() )
data['BASE'] = (data['min_FVC'] - data['min_FVC'].min() ) / ( data['min_FVC'].max() - data['min_FVC'].min() )
data['week'] = (data['base_week'] - data['base_week'].min() ) / ( data['base_week'].max() - data['base_week'].min() )
data['percent'] = (data['Percent'] - data['Percent'].min() ) / ( data['Percent'].max() - data['Percent'].min() )
FE += ['age','percent','week','BASE']
data.head()

In [None]:
tr = data.loc[data.WHERE=='train']
chunk = data.loc[data.WHERE=='val']
sub = data.loc[data.WHERE=='test']
del data

In [None]:
tr.shape, chunk.shape, sub.shape

In [None]:
C1, C2 = tf.constant(70, dtype='float32'), tf.constant(1000, dtype="float32")
def qloss(y_true, y_pred):
    # Pinball loss for multiple quantiles
    qs = [0.2, 0.50, 0.8]
    q = tf.constant(np.array([qs]), dtype=tf.float32)
    e = y_true - y_pred
    v = tf.maximum(q*e, (q-1)*e)
    return K.mean(v)
def quant_loss():
    def quantile_loss(y_true, y_pred):
        losses = []
        for i, q in enumerate([0.2, 0.50, 0.8]):
            errors = y_true - y_pred[:, i]
            losses.append(tf.max((q - 1) * errors, q * errors).unsqueeze(1))
        loss = tf.mean(tf.sum(tf.cat(losses, dim=1), dim=1))
        return loss

def mloss(_lambda):
    def loss(y_true, y_pred):
        return _lambda * qloss(y_true, y_pred) + (1 - _lambda)*score(y_true, y_pred)
    return loss

def score(y_true, y_pred):
    tf.dtypes.cast(y_true, tf.float32)
    tf.dtypes.cast(y_pred, tf.float32)
    sigma = y_pred[:, 2] - y_pred[:, 0]
    fvc_pred = y_pred[:, 1]
    
    #sigma_clip = sigma + C1
    sigma_clip = tf.maximum(sigma, C1)
    delta = tf.abs(y_true[:, 0] - fvc_pred)
    delta = tf.minimum(delta, C2)
    sq2 = tf.sqrt( tf.dtypes.cast(2, dtype=tf.float32) )
    metric = -(delta / sigma_clip)*sq2 -tf.math.log(sigma_clip* sq2)
    return K.mean(metric)

In [None]:
from keras.layers import *
from keras.models import Model
from keras.layers.merge import concatenate,Add
import tensorflow as tf

left_input = Input(shape=(9, ), name='left_input')
left_branch = Dense(100, input_dim=9, name='left_branch')(left_input)
left_branch= L.Dense(100, activation="relu", name="d1")(left_branch)
left_branch= L.Dense(100, activation="relu", name="d2")(left_branch)
left_branch= L.Dense(100, activation="relu", name="d3")(left_branch)

p1 = L.Dense(3, activation="linear", name="p1")(left_branch)
p2 = L.Dense(3, activation="relu", name="p2")(left_branch)

preds = L.Lambda(lambda left_branch: left_branch[0] + tf.cumsum(left_branch[1], axis=1), 
                 name="preds")([p1, p2])

model = Model(inputs=[left_input], outputs=preds)
model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.01, amsgrad=False),loss=mloss(0.8),metrics=[score])
model.summary()


In [None]:
X=np.array(tr[FE])
Y=tr.FVC

In [None]:
model.fit(x=X,y=Y,epochs=1000)


In [None]:
sub_subm=sub[["Patient","week"]]
sub_nn=sub_subm.copy()
pred= model.predict(sub[FE])
sub_nn["0.2q"]=pred[:,0]
sub_nn["0.5q"]=pred[:,1]
sub_nn["0.8q"]=pred[:,2]
sub_nn

In [None]:
left_input = Input(shape=(9, ), name='left_input')
left_branch = Dense(64, input_dim=9, name='left_branch')(left_input)
left_branch= L.Dense(128, activation="relu", name="d1")(left_branch)
left_branch=L.Dropout(0.2)(left_branch)
left_branch= L.Dense(256, activation="relu", name="d2")(left_branch)
left_branch=L.Dropout(0.2)(left_branch)
left_branch= L.Dense(512, activation="relu", name="d3")(left_branch)

p1 = L.Dense(3, activation="linear", name="p1")(left_branch)
p2 = L.Dense(3, activation="relu", name="p2")(left_branch)

preds = L.Lambda(lambda left_branch: left_branch[0] + tf.cumsum(left_branch[1], axis=1), 
                 name="preds")([p1, p2])

model_deep = Model(inputs=[left_input], outputs=preds)
model_deep.compile(optimizer=tf.keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.01, amsgrad=False),loss=mloss(0.8),metrics=[score])
model_deep.summary()
model_deep.fit(x=X,y=Y,epochs=1000)



In [None]:
sub_dp=sub_subm.copy()
pred= model_deep.predict(sub[FE])
sub_dp["0.2q"]=pred[:,0]
sub_dp["0.5q"]=pred[:,1]
sub_dp["0.8q"]=pred[:,2]
sub_dp

In [None]:
X=np.array(tr[['age','percent','week','BASE']])
left_input = Input(shape=(4, ), name='left_input')
left_branch = Dense(64, input_dim=9, name='left_branch')(left_input)
left_branch= L.Dense(128, activation="relu", name="d1")(left_branch)
left_branch=L.Dropout(0.2)(left_branch)
left_branch= L.Dense(256, activation="relu", name="d2")(left_branch)
left_branch=L.Dropout(0.2)(left_branch)
left_branch= L.Dense(512, activation="relu", name="d3")(left_branch)

p1 = L.Dense(3, activation="linear", name="p1")(left_branch)
p2 = L.Dense(3, activation="relu", name="p2")(left_branch)

preds = L.Lambda(lambda left_branch: left_branch[0] + tf.cumsum(left_branch[1], axis=1), 
                 name="preds")([p1, p2])

model_3 = Model(inputs=[left_input], outputs=preds)
model_3.compile(optimizer=tf.keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.01, amsgrad=False),loss=mloss(0.8),metrics=[score])
model_3.summary()
model_3.fit(x=X,y=Y,epochs=1000)


In [None]:

sub_3=sub_subm.copy()
pred= model_3.predict(sub[['age','percent','week','BASE']])
sub_3["0.2q"]=pred[:,0]
sub_3["0.5q"]=pred[:,1]
sub_3["0.8q"]=pred[:,2]
sub_3

In [None]:
X=np.array(tr[FE])
left_input = Input(shape=(9, ), name='left_input')
left_branch = Dense(128, input_dim=9, name='left_branch')(left_input)
left_branch= L.Dense(512, activation="relu", name="d1")(left_branch)
left_branch=L.Dropout(0.2)(left_branch)
left_branch= L.Dense(1080, activation="relu", name="d2")(left_branch)
left_branch=L.Dropout(0.2)(left_branch)
left_branch= L.Dense(256, activation="relu", name="d3")(left_branch)

p1 = L.Dense(3, activation="linear", name="p1")(left_branch)
p2 = L.Dense(3, activation="relu", name="p2")(left_branch)

preds = L.Lambda(lambda left_branch: left_branch[0] + tf.cumsum(left_branch[1], axis=1), 
                 name="preds")([p1, p2])

model_w = Model(inputs=[left_input], outputs=preds)
model_w.compile(optimizer=tf.keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.01, amsgrad=False),loss=mloss(0.8),metrics=[score])
model_w.summary()
model_w.fit(x=X,y=Y,epochs=1000)

In [None]:
sub_w=sub_subm.copy()
pred= model_w.predict(sub[FE])
sub_w["0.2q"]=pred[:,0]
sub_w["0.5q"]=pred[:,1]
sub_w["0.8q"]=pred[:,2]
sub_w

In [None]:
X=np.array(tr[FE])
left_input = Input(shape=(9, ), name='left_input')
left_branch = Dense(256, input_dim=9, name='left_branch')(left_input)
left_branch= L.Dense(256, activation="relu", name="d1")(left_branch)
left_branch=L.Dropout(0.2)(left_branch)
left_branch= L.Dense(256, activation="relu", name="d2")(left_branch)
left_branch=L.Dropout(0.2)(left_branch)
left_branch= L.Dense(256, activation="relu", name="d3")(left_branch)
left_branch= L.Dense(256, activation="relu", name="d4")(left_branch)


p1 = L.Dense(3, activation="linear", name="p1")(left_branch)
p2 = L.Dense(3, activation="relu", name="p2")(left_branch)

preds = L.Lambda(lambda left_branch: left_branch[0] + tf.cumsum(left_branch[1], axis=1), 
                 name="preds")([p1, p2])

model_56 = Model(inputs=[left_input], outputs=preds)
model_56.compile(optimizer=tf.keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.01, amsgrad=False),loss=mloss(0.8),metrics=[score])
model_56.summary()
model_56.fit(x=X,y=Y,epochs=1000)

sub_56=sub_subm.copy()
pred= model_w.predict(sub[FE])
sub_56["0.2q"]=pred[:,0]
sub_56["0.5q"]=pred[:,1]
sub_56["0.8q"]=pred[:,2]
sub_56

In [None]:
with open("../input/model-training-abdooo/predict_ct.npy","rb") as f:
    pred_ct=np.load(f)
x_pred=tr[["Patient","week"]]
pred=model.predict(tr[FE])
pred_deep=model_deep.predict(tr[FE])
pred_w=model_w.predict(tr[FE])
pred_56=model_56.predict(tr[FE])
pred_3=model_3.predict(tr[['age','percent','week','BASE']])
x_pred["0.2q"]=(pred[:,0]+pred_deep[:,0]+pred_56[:,0]+pred_ct[:,0])/4
x_pred["0.5q"]=(pred[:,1]+pred_deep[:,1]+pred_56[:,1]+pred_ct[:,1])/4
x_pred["0.8q"]=(pred[:,2]+pred_deep[:,2]+pred_56[:,2]+pred_ct[:,2])/4
x_pred

In [None]:
from sklearn.metrics import mean_absolute_error
sigma_opt = mean_absolute_error(Y, pred[:, 1])
unc = x_pred["0.8q"] - x_pred["0.2q"]
sigma_mean = np.mean(unc)
print(sigma_opt, sigma_mean)


idxs = np.random.randint(0, Y.shape[0], 100)
plt.plot(Y, label="ground truth")
plt.plot(x_pred["0.2q"], label="q20")
plt.plot(x_pred["0.5q"], label="q50")
plt.plot(x_pred["0.8q"], label="q80")
plt.legend(loc="best")
plt.show()
del pred

In [None]:
sub_subm

### PREDICTION

In [None]:
!conda install --offline ../input/download-pkg/gdcm-2.8.9-py37h71b2a6d_0.tar.bz2

!pip install '../input/download-pkg/pylibjpeg_libjpeg-1.1.0-cp37-cp37m-manylinux2010_x86_64.whl'

In [None]:
sub.head()

In [None]:
sub_final=sub_subm.copy()
sub_final["0.2q"]=(sub_dp["0.2q"]+sub_nn["0.2q"]+sub_3["0.2q"]+sub_w["0.2q"]+sub_56["0.2q"])/5
sub_final["0.5q"]=(sub_dp["0.5q"]+sub_nn["0.5q"]+sub_3["0.5q"]+sub_w["0.5q"]+sub_56["0.5q"])/5
sub_final["0.8q"]=(sub_dp["0.8q"]+sub_nn["0.8q"]+sub_3["0.8q"]+sub_w["0.8q"]+sub_56["0.8q"])/5

sub['FVC1'] = 0.996*sub_final["0.5q"]
sub['Confidence1'] = sub_final["0.8q"]-sub_final["0.2q"]
sub_final

In [None]:
subm = sub[['Patient_Week','FVC','Confidence','FVC1','Confidence1']].copy()

In [None]:
subm.loc[~subm.FVC1.isnull()].head(10)

In [None]:
subm.loc[~subm.FVC1.isnull(),'FVC'] = subm.loc[~subm.FVC1.isnull(),'FVC1']
if sigma_mean<70:
    subm['Confidence'] = sigma_opt
else:
    subm.loc[~subm.FVC1.isnull(),'Confidence'] = subm.loc[~subm.FVC1.isnull(),'Confidence1']

In [None]:
subm.head()

In [None]:
subm.describe().T

In [None]:
otest = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/test.csv')
for i in range(len(otest)):
    subm.loc[subm['Patient_Week']==otest.Patient[i]+'_'+str(otest.Weeks[i]), 'FVC'] = otest.FVC[i]
    subm.loc[subm['Patient_Week']==otest.Patient[i]+'_'+str(otest.Weeks[i]), 'Confidence'] = 0.1

In [None]:
subm[["Patient_Week","FVC","Confidence"]].to_csv("/kaggle/working/submission.csv", index=False)

In [None]:
subm[["Patient_Week","FVC","Confidence"]]

In [None]:
pred_ct