In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import math
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import tensorflow as tf
%matplotlib inline
import os


In [None]:
import cv2
import pydicom
import matplotlib.pyplot as plt
FULL_IMG_SIZE = (1536,1536)

def crop_image(img: np.ndarray):
    edge_pixel_value = img[0, 0]
    mask = img != edge_pixel_value
    return img[np.ix_(mask.any(1),mask.any(0))]

def get_img(path):
    d = pydicom.dcmread(path)
    d = crop_image(d.pixel_array / 2**11)
    d = np.float32(cv2.resize(d, (512, 512)))
    return d

def check_img_num(imgs,big_img_num,count,full_imgs_num,imgs_num):
    if f'{big_img_num+count*full_imgs_num}.dcm' in imgs:
        return count,imgs_num
    elif (big_img_num+count*full_imgs_num)>imgs_num:
        return count,imgs_num
    else:
        count+=1
        return check_img_num(imgs,big_img_num,count,full_imgs_num,imgs_num)

def get_full_image(path):
    imgs = ([f for f in os.listdir(path)
                   if os.path.isfile(os.path.join(path, f))])
    if len(imgs)>700:
        imgs = imgs[:700]
    imgs_num = len(imgs)
    full_imgs_num =imgs_num//36+1
    full_imgs = [[] for i in range(full_imgs_num+1)]
    square_side = 6
    full_img = []
    for big_img_num in range(1,full_imgs_num+1):
        count = 0
        for i in range(square_side):
            row = []
            for k in range(square_side):
                count,imgs_num = check_img_num(imgs,big_img_num,count,full_imgs_num,imgs_num)
                if (big_img_num+count*full_imgs_num)<=imgs_num:
                    try:
                        img = get_img(f'{path}/{big_img_num+count*full_imgs_num}.dcm')
                        
                    except:
                        print(f'exception! {path}/{big_img_num+count*full_imgs_num}.dcm')
                        img = np.zeros((512,512))

                else:
                    img = np.zeros((512,512))

                if k==0:
                    row = img
                else:
                    row = np.concatenate((row,img), axis=1)
                count+=1
                
            if i==0:
                full_imgs[big_img_num] = row
            else:
                full_imgs[big_img_num] = np.concatenate((full_imgs[big_img_num],row), axis=0)

        full_imgs[big_img_num] = np.float32(cv2.resize(full_imgs[big_img_num], FULL_IMG_SIZE))
        full_imgs[big_img_num] = np.float32(cv2.cvtColor(full_imgs[big_img_num], cv2.COLOR_BGR2RGB))
        
        full_imgs[big_img_num] = full_imgs[big_img_num]-full_imgs[big_img_num].min()
        full_imgs[big_img_num] = full_imgs[big_img_num]/full_imgs[big_img_num].max()

    return full_imgs
        
                      

In [None]:
path = '../input/osic-pulmonary-fibrosis-progression/train'

df = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv')
df.head()

In [None]:
def get_lr_labels(f):
    modelLR = LinearRegression()
    test = df[((df['Patient']==f))][['Weeks','Percent']]
    p90 = np.percentile(test['Percent'], 90)
    p10 = np.percentile(test['Percent'], 10)
    filtred = test[((test['Percent']<p90) & (test['Percent']>p10))][['Weeks','Percent']]

    X_test=np.array(filtred.Weeks).reshape(-1, 1)
    modelLR.fit(X_test, filtred.Percent)
    return [modelLR.intercept_/100-1,modelLR.coef_[0]]


In [None]:
def vectorise_data(df):

    def vectorise_age(series):
        if series['Age']<=50:
            series['Age_vect'] = [0,0,0]
        elif 50<series['Age']<=60:
            series['Age_vect'] = [0,0,1]
        elif 60<series['Age']<=70:
            series['Age_vect'] = [0,1,0]
        elif 70<series['Age']<=80:
            series['Age_vect'] = [0,1,1]
        elif 80<series['Age']<=90:
            series['Age_vect'] = [1,0,0]
        elif 90<series['Age']<=100:
            series['Age_vect'] = [1,0,1]
        elif 100<series['Age']<=110:
            series['Age_vect'] = [1,1,0]
        else:
            series['Age_vect'] = [1,1,1]
        return series['Age_vect']
    df['Age_vect'] = df.apply(vectorise_age,axis = 1)
            
    df['Sex'] = df['Sex'].map( {'Female': 0, 'Male': 1} ).astype(int)
    
    SmokingStatus_mapping = {"Never smoked": [0,0], "Ex-smoker": [0,1], "Currently smokes": [1,0]}
    df['SmokingStatus'] = df['SmokingStatus'].map(SmokingStatus_mapping)


In [None]:
vectorise_data(df)

df.head()

In [None]:
from tensorflow.keras.layers import (
    Dense, Dropout, Activation, Flatten, Input, BatchNormalization, GlobalAveragePooling2D, Add, Conv2D, AveragePooling2D, 
    LeakyReLU, Concatenate 
)

def get_efficientnet(model, shape):
    models_dict = {
#         'b0': efn.EfficientNetB0(input_shape=shape,weights='imagenet',include_top=False),
#         'b1': efn.EfficientNetB1(input_shape=shape,weights='imagenet',include_top=False),
#         'b2': efn.EfficientNetB2(input_shape=shape,weights='imagenet',include_top=False),
#         'b3': efn.EfficientNetB3(input_shape=shape,weights='imagenet',include_top=False),
#         'b4': efn.EfficientNetB4(input_shape=shape,weights='imagenet',include_top=False),
        'b5': efn.EfficientNetB5(input_shape=shape,weights='imagenet',include_top=False),
#         'b6': efn.EfficientNetB6(input_shape=shape,weights='imagenet',include_top=False),
#         'b7': efn.EfficientNetB7(input_shape=shape,weights='imagenet',include_top=False)
    }
    return models_dict[model]

def build_model(shape, model_class=None):
    inp = Input(shape=shape)
    base_model = get_efficientnet(model_class, shape)
    base_model.trainable = False
    x = base_model(inp)
    x = GlobalAveragePooling2D()(x)
#     x = Dense(512, activation='relu')(x)
#     x = Dense(256, activation='relu')(x)
    
    inp_percent = Input(shape=(1,))
    inp_age = Input(shape=(3,))
    inp_sex = Input(shape=(1,))
    inp_smoking = Input(shape=(2,))
    inp_week = Input(shape=(1,))
    x = Concatenate()([x,inp_percent,inp_age,inp_sex,inp_smoking,inp_week]) 
    Dropout_percent = 0.38
    x = Dropout(Dropout_percent)(x)
    x = Dense(512, activation='relu')(x)
    x = Dense(256, activation='relu')(x)
    x = Dense(2, activation='tanh')(x)
    
    model = tf.keras.Model([inp, inp_percent,inp_age,inp_sex,inp_smoking,inp_week] , x)
    return model

model_classes = ['b5'] #['b0','b1','b2','b3',b4','b5','b6','b7']
# model = build_model(shape=(*FULL_IMG_SIZE, 3), model_class='b5')
model = tf.keras.models.load_model('../input/notebook-mine/model_')
# model.load_weights('weights/model_weights/')

In [None]:
loss = tf.keras.losses.MeanSquaredError()
model.compile(
    optimizer=tf.keras.optimizers.Adam(lr=0.0001),
    loss = loss
    )

In [None]:
def make_subm_batch(input,imgs):
    batch_len = len(imgs)
    
    batch = pd.DataFrame({
     'Percent': [input['Percent'] for i in range(batch_len)],
     'Age_vect': [input['Age_vect'] for i in range(batch_len)],
     'Sex': [input['Sex'] for i in range(batch_len)],
     'SmokingStatus': [input['SmokingStatus'] for i in range(batch_len)],
     'Weeks': [input['Weeks'] for i in range(batch_len)]
    })
    return batch
def make_subs(input,path):
    
    imgs = get_full_image(path+'/'+input['Patient'])
    imgs = imgs[1:]
    
    batch = make_subm_batch(input,imgs)
    imgs=np.asarray(imgs)
    print('big imgs',len(imgs))
    batch_pred = model.predict([imgs,batch['Percent'],np.asarray(list(batch['Age_vect'])),batch['Sex'],np.asarray(list(batch['SmokingStatus'])),batch['Weeks']])
    batch_pred = np.array(batch_pred)
    deviation = batch_pred[:,0].mean()
    incline = batch_pred[:,1].mean()
    deviation+=1
    deviation = batch['Percent'].mean()/100
    norm = input['FVC']*100/input['Percent']
    
    known_fvc_delta = input['FVC']-(input['Weeks']*incline+deviation*100)/100*norm
    
    fvcs = []
    for week in range(-12,134):
        fvc = (week*incline+deviation*100)/100*norm+known_fvc_delta
        fvcs.append(fvc)
    subs = pd.DataFrame({
         'Patient_Week': [input['Patient']+'_'+ str(i) for i in range(-12,134)],
         'FVC': fvcs,
         'Confidence': [100 for i in range(-12,134)],
        })
    return subs


In [None]:
test_path = '../input/osic-pulmonary-fibrosis-progression/test' 
test_df = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/test.csv')
vectorise_data(test_df)
subs = None
for _,input in test_df.iterrows():
    print(input['Patient'])
    if subs is None:
        subs = make_subs(input,test_path)
    else:
        subs = pd.concat([subs,make_subs(input,test_path)])


In [None]:
subs.to_csv('submission.csv', index=False)

In [None]:
# model.save('model_')