In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import pickle
from IPython.display import display

from tensorflow import keras
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras import models
from tensorflow.keras import callbacks

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GroupKFold

In [4]:
dataset_dir="../input/ranzcr-clip-catheter-line-classification/"
train=pd.read_csv(f"{dataset_dir}train.csv")
n_folds=10

npz=np.load("../input/effnet_best_output.npz")
features_list=[npz[uid] for uid in tqdm(train["StudyInstanceUID"])]
features=np.array(features_list)

100%|██████████| 30083/30083 [00:48<00:00, 621.20it/s]


In [3]:
def normalize(features,save_scaler=True):
    scaler=MinMaxScaler()
    scaler.fit(features)
    if save_scaler:
        pickle.dump(scaler,open("./models/minmaxscaler_effnet_best.pickle","wb"))
    X=scaler.transform(features)
    return X

X=pd.DataFrame(normalize(features,save_scaler=True))

## Dropout
直前の層に対してドロップアウトを適用する, 与えられた確率に従ってノードの一部を無効化する

## Sigmoid
活性化関数の1つ, \[0,1]の範囲を出力する

In [6]:
def get_fold(train):
    fold=train.copy()
    splitter=GroupKFold(n_splits=n_folds)
    for n,(train_idx,val_idx) in enumerate(splitter.split(train,groups=train["PatientID"])):
        fold.loc[val_idx,"folds"]=n
    fold["folds"]=fold["folds"].astype(int)
    return fold

fold=get_fold(train)

In [7]:
target_fold=0
train_idx=(fold["folds"]!=target_fold)
val_idx=(fold["folds"]==target_fold)

X_train=X[train_idx]
X_val=X[val_idx]
display(X_train)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2550,2551,2552,2553,2554,2555,2556,2557,2558,2559
0,0.637893,0.131938,0.104526,0.063978,0.072000,0.029705,0.060279,0.622218,0.247831,0.295754,...,0.215813,0.064823,0.248789,0.222172,0.018390,0.115120,0.523893,0.046372,0.015596,0.088412
1,0.024593,0.048640,0.046269,0.021631,0.201261,0.559943,0.127162,0.080956,0.017294,0.063737,...,0.035085,0.395627,0.100634,0.029711,0.008340,0.471350,0.099166,0.095559,0.350444,0.269512
2,0.450534,0.011428,0.271354,0.319025,0.032320,0.020797,0.016488,0.423592,0.192775,0.378316,...,0.273190,0.098082,0.599870,0.176126,0.269714,0.020955,0.590382,0.680041,0.260838,0.350846
3,0.354269,0.044961,0.113954,0.234826,0.074535,0.033150,0.047977,0.286969,0.139299,0.225637,...,0.140248,0.147160,0.476094,0.229894,0.165611,0.026377,0.267940,0.351040,0.215919,0.058292
4,0.457785,0.012760,0.302983,0.305707,0.050934,0.712111,0.018399,0.521195,0.248089,0.446346,...,0.342668,0.061332,0.531756,0.183002,0.227087,0.373302,0.671849,0.622573,0.217722,0.469270
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30078,0.005345,0.013495,0.097013,0.038049,0.033289,0.342629,0.089725,0.226423,0.023725,0.402143,...,0.208133,0.212049,0.092566,0.008017,0.203990,0.489665,0.103791,0.049335,0.280249,0.107862
30079,0.458205,0.059922,0.298080,0.332596,0.054697,0.579604,0.044275,0.480555,0.256309,0.362362,...,0.294675,0.045970,0.626358,0.224780,0.268795,0.384345,0.598721,0.654069,0.359651,0.298022
30080,0.007546,0.098454,0.047721,0.012569,0.100689,0.032487,0.075171,0.195573,0.010546,0.178106,...,0.055352,0.225094,0.072842,0.013784,0.005990,0.251083,0.061592,0.054397,0.353068,0.035504
30081,0.500305,0.042702,0.189077,0.358666,0.015022,0.049566,0.008496,0.269850,0.173320,0.250008,...,0.168985,0.061154,0.493127,0.166074,0.195504,0.025113,0.394580,0.609913,0.219251,0.190061


In [8]:
input_shape=(2560,)
encoding_dim=100

model=keras.Sequential([
    Dense(encoding_dim*18,activation="relu",input_shape=input_shape),
    Dense(encoding_dim,activation="relu"),
    Dense(encoding_dim*18,activation="relu"),
    Dropout(0.1),
    Dense(input_shape[0],activation="sigmoid")
])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1800)              4609800   
_________________________________________________________________
dense_1 (Dense)              (None, 100)               180100    
_________________________________________________________________
dense_2 (Dense)              (None, 1800)              181800    
_________________________________________________________________
dropout (Dropout)            (None, 1800)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 2560)              4610560   
Total params: 9,582,260
Trainable params: 9,582,260
Non-trainable params: 0
_________________________________________________________________


In [9]:
adam=keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer=adam,loss="mse")

fit_callbacks=[
    callbacks.EarlyStopping(monitor="val_loss",patience=50,mode="min"),
    callbacks.ReduceLROnPlateau(monitor="val_loss",patience=30,min_lr=1e-7,mode="min",factor=0.5,verbose=1),
    callbacks.ModelCheckpoint("./models/autoencoder_best/ckpt",monitor="val_loss",mode="min",
        save_weights_only=True,save_best_only=True)
]

model.fit(X_train,X_train,epochs=150,shuffle=True,validation_data=(X_val,X_val),callbacks=fit_callbacks)

score=model.evaluate(X_val,X_val,verbose=0)
print(f"\nscore: {score}")

Epoch 1/200
 63/847 [=>............................] - ETA: 1:07 - loss: 0.0445

## score
lr e-3: 0.0138  
lr e-4, layers \*2: 0.013181226328015327  
lr e-3, layers \*4 \*2 : 0.014  
lr e-4, layers \*4: 0.013148400001227856  
lr e-4, layers \*4, normalize: 0.0025459351018071175    
lr e-4, layers \*8, normalize: 0.0024564287159591913  
lr 1e-4, layers \*16, norm: 0.0023377728648483753  
lr 1e-4, layers \*16 \*4, norm: 0.002483354415744543  
lr 1e-3, layers \*16 \*4, norm: 0.0037389046046882868  
lr 1e-3, layers \*16 \*2, norm: 0.0035295113921165466  
lr 1e-4, layers \*16 \*2, norm: 0.0023819475900381804  
lr 1e-4, layers \*18, norm: **0.0021908902563154697**  
lr 1e-4, layers \*20, norm: 0.002232010243460536  
lr 1e-4, layers none, norm: 0.0029496345669031143  
lr 1e-4, layers \*18 dropout, norm:  
lr 1e-4, layers \*18, dropout 0.2, norm: 0.0023323060013353825  
lr 1e-4, layers \*18, dropout 0.1, norm: 0.002210501581430435  
lr reducer, layers dropuout norm same: 0.0022298386320471764  
lr reducer ,epochs 500, others same: 0.0021655228920280933  
groupkfold, others same: 0.0022280211560428143  
groupkfold, dropout 0.05, others same: 0.002246299758553505  
groupkfold, batchnorm, without dropout: 0.003952326253056526  
groupkfold, without batchnorm dropout: 0.0022722359281033278  
groupkfold splits10, dropout 0.1, no batchnorm, others same: **0.002182652475312352**