In [27]:
!pip install -q efficientnet

In [28]:
import os
import gc
import re

import cv2
import math
import numpy as np
import scipy as sp
import pandas as pd

import tensorflow as tf
from tensorflow import keras
import tensorflow_addons as tfa
from IPython.display import SVG
import efficientnet.tfkeras as efn
from keras.utils import plot_model
import tensorflow.keras.layers as L
from keras.utils import model_to_dot
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from kaggle_datasets import KaggleDatasets
from tensorflow.keras.applications import DenseNet121

import seaborn as sns
from tqdm import tqdm
import matplotlib.cm as cm
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MultiLabelBinarizer

tqdm.pandas()
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

np.random.seed(0)
tf.random.set_seed(0)

import warnings
warnings.filterwarnings("ignore")

In [29]:
EPOCHS = 40
SAMPLE_LEN = 100
IMSIZES = (224, 240, 260, 300, 380, 456, 528, 32)
im_size = IMSIZES[7]
SEED = 42
LR = 0.0001
IMAGE_PATH = "../input/depression-detection/train/train/"
TEST_PATH = ".../input/depression-detection/test/"
TRAIN_PATH = "../input/depression-detection/train_dataset_new.csv"
SUB_PATH = "../input/depression-detection/test_dataset_new.csv"

sub = pd.read_csv(SUB_PATH)
test_data = sub.copy()
train_data = pd.read_csv(TRAIN_PATH)

In [30]:
test_data

Unnamed: 0,images,label
0,./data/test/test_d (1).png,1
1,./data/test/test_d (10).png,1
2,./data/test/test_d (100).png,1
3,./data/test/test_d (1000).png,1
4,./data/test/test_d (1001).png,1
...,...,...
4040,./data/test/test_nd (995).png,0
4041,./data/test/test_nd (996).png,0
4042,./data/test/test_nd (997).png,0
4043,./data/test/test_nd (998).png,0


In [31]:
train_data

Unnamed: 0,images,label
0,./data/train/train_d (1).png,1
1,./data/train/train_d (10).png,1
2,./data/train/train_d (100).png,1
3,./data/train/train_d (1000).png,1
4,./data/train/train_d (1001).png,1
...,...,...
16136,./data/train/train_nd (995).png,0
16137,./data/train/train_nd (996).png,0
16138,./data/train/train_nd (997).png,0
16139,./data/train/train_nd (998).png,0


In [32]:
def change_train_path(image):
    return image[13:] 

def change_test_path(image):
    return image[12:] 

train_data["images"] = train_data["images"].progress_apply(change_train_path)
test_data["images"] = test_data["images"].progress_apply(change_test_path)

100%|██████████| 16141/16141 [00:00<00:00, 345698.75it/s]
100%|██████████| 4045/4045 [00:00<00:00, 359631.16it/s]


In [33]:
print(train_data)
test_data

                   images  label
0         train_d (1).png      1
1        train_d (10).png      1
2       train_d (100).png      1
3      train_d (1000).png      1
4      train_d (1001).png      1
...                   ...    ...
16136  train_nd (995).png      0
16137  train_nd (996).png      0
16138  train_nd (997).png      0
16139  train_nd (998).png      0
16140  train_nd (999).png      0

[16141 rows x 2 columns]


Unnamed: 0,images,label
0,test_d (1).png,1
1,test_d (10).png,1
2,test_d (100).png,1
3,test_d (1000).png,1
4,test_d (1001).png,1
...,...,...
4040,test_nd (995).png,0
4041,test_nd (996).png,0
4042,test_nd (997).png,0
4043,test_nd (998).png,0


In [34]:
# def load_image(image_id):
#     image = cv2.imread(IMAGE_PATH + image_id)
# #     return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#     return image

# # train_images = train_data["images"].progress_apply(load_image)

In [35]:
# train_images[0:10]

In [36]:
image = cv2.imread(IMAGE_PATH + 'train_d (1).png')
image.shape

(48, 48, 3)

In [37]:
## TPU setup

AUTO = tf.data.experimental.AUTOTUNE
# tpu = tf.distribute.cluster_resolver.TPUClusterResolver()

# tf.config.experimental_connect_to_cluster(tpu)
# tf.tpu.experimental.initialize_tpu_system(tpu)
# strategy = tf.distribute.experimental.TPUStrategy(tpu)

# BATCH_SIZE = 16 * strategy.num_replicas_in_sync

BATCH_SIZE = 400


In [38]:
# GCS_DS_PATH = KaggleDatasets().get_gcs_path()
# GCS_DS_PATH = KaggleDatasets().get_gcs_path('depression-detection')
# GCS_DS_PATH

# step = tf.Variable(0, name="step")
# checkpoint = tf.train.Checkpoint(step=step)
# options = tf.train.CheckpointOptions(experimental_io_device="/job:localhost")
# checkpoint.save("/tmp/ckpt", options=options)

In [39]:
#Training Preprocessing for labels
# train_data['labels'] = train_data['labels'].apply(lambda string: string.split(' '))
# mlb = MultiLabelBinarizer()
# df = pd.DataFrame(mlb.fit_transform(train_data['labels']),columns=mlb.classes_)
# train_data = pd.concat([train_data,df],axis = 1 )
# # train_data['labels'] = LabelEncoder().fit_transform(train_data['labels'])


def format_path(st):
    return IMAGE_PATH + st

test_paths = test_data['images'].apply(format_path).values
train_paths = train_data['images'].apply(format_path).values

print(train_paths[0:5])
print(test_paths[0:5])

# train_labels = np.float32(train_data.loc[:,'complex':'scab'].values)
# train_labels = train_data.labels.values
train_labels = np.float32(train_data.loc[:,'label'])
train_labels = train_data['label'].values

train_paths, valid_paths, train_labels, valid_labels =\
train_test_split(train_paths, train_labels, test_size=0.05, random_state=42)
train_labels

['../input/depression-detection/train/train/train_d (1).png'
 '../input/depression-detection/train/train/train_d (10).png'
 '../input/depression-detection/train/train/train_d (100).png'
 '../input/depression-detection/train/train/train_d (1000).png'
 '../input/depression-detection/train/train/train_d (1001).png']
['../input/depression-detection/train/train/test_d (1).png'
 '../input/depression-detection/train/train/test_d (10).png'
 '../input/depression-detection/train/train/test_d (100).png'
 '../input/depression-detection/train/train/test_d (1000).png'
 '../input/depression-detection/train/train/test_d (1001).png']


array([1, 1, 1, ..., 1, 0, 1])

In [40]:
train_paths[0:5]

array(['../input/depression-detection/train/train/train_d (3108).png',
       '../input/depression-detection/train/train/train_d (3362).png',
       '../input/depression-detection/train/train/train_d (8571).png',
       '../input/depression-detection/train/train/train_nd (6212).png',
       '../input/depression-detection/train/train/train_nd (4178).png'],
      dtype=object)

In [41]:
def decode_image(filename, label=None, image_size=(im_size, im_size)):
    bits = tf.io.read_file(filename)
#     with open(IMAGE_PATH , "rb") as filename: # <= change here
#     image = filename.read().
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, image_size)
    
    if label is None:
        return image
    else:
        return image, label

def data_augment(image, label=None):
    image = tf.image.random_flip_left_right(image, seed=SEED)
    image = tf.image.random_flip_up_down(image, seed=SEED)
    
    k = np.random.randint(0, 4)
    image = tf.image.rot90(image, k=k)
    
    image = tf.image.random_hue(image, .1, seed=SEED)
    image = tf.image.random_saturation(image, .8, 1.2, seed=SEED)
    image = tf.image.random_contrast(image, .8, 1.2, seed=SEED)
    image = tf.image.random_brightness(image, .1, seed=SEED)
    
    
    if label is None:
        return image
    else:
        return image, label
    
    

train_dataset = (
    tf.data.Dataset
    .from_tensor_slices((train_paths, train_labels))
    .map(decode_image, num_parallel_calls=AUTO)
    .map(data_augment, num_parallel_calls=AUTO)
    .repeat()
    .shuffle(1024)
    .batch(BATCH_SIZE)
    .prefetch(AUTO)
)

valid_dataset = (
    tf.data.Dataset
    .from_tensor_slices((valid_paths, valid_labels))
    .map(decode_image, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
    .cache()
    .prefetch(AUTO)
)


In [42]:
train_dataset

<PrefetchDataset shapes: ((None, 32, 32, 3), (None,)), types: (tf.float32, tf.int64)>

In [43]:
# def build_lrfn(lr_start=0.00001, lr_max=0.00005, 
#                lr_min=0.00001, lr_rampup_epochs=5, 
#                lr_sustain_epochs=0, lr_exp_decay=.8):
#     lr_max = lr_max * strategy.num_replicas_in_sync

#     def lrfn(epoch):
#         if epoch < lr_rampup_epochs:
#             lr = (lr_max - lr_start) / lr_rampup_epochs * epoch + lr_start
#         elif epoch < lr_rampup_epochs + lr_sustain_epochs:
#             lr = lr_max
#         else:
#             lr = (lr_max - lr_min) *\
#                  lr_exp_decay**(epoch - lr_rampup_epochs\
#                                 - lr_sustain_epochs) + lr_min
#         return lr
#     return lrfn




# lrfn = build_lrfn()
# STEPS_PER_EPOCH = train_labels.shape[0] // BATCH_SIZE
# lr_schedule = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=1)

In [44]:
model_name = "DenseNet121"
experiment_number = "1"
print("{}_EXP{}_final.h5".format(model_name,experiment_number))
print(f'{model_name}_EXP{experiment_number}_{SEED}.h5')

DenseNet121_EXP1_final.h5
DenseNet121_EXP1_42.h5


In [45]:
# Function to create our model
def get_model():
#     with strategy.scope():
        tf.random.set_seed(SEED)
        model = tf.keras.applications.DenseNet169(weights="imagenet", include_top=False, input_shape=(im_size, im_size,3))
        final_model =  tf.keras.Sequential([
        model,
        tf.keras.layers.GlobalAveragePooling2D(),
        keras.layers.Dense(1, 
        kernel_initializer=keras.initializers.RandomUniform(seed=SEED),
        bias_initializer=keras.initializers.Zeros(), name='dense_top', activation='sigmoid')
        ])

        #         opt = tf.keras.optimizers.Adam(learning_rate = LR)

#         f1 = tfa.metrics.F1Score(num_classes=2, average='macro')

        final_model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer='adam', 
              metrics= 'accuracy')

        return final_model

In [46]:
model = get_model()
checkpoint = tf.keras.callbacks.ModelCheckpoint(f'{model_name}_EXP{experiment_number}_{SEED}.h5', 
                                                    monitor = 'val_loss', 
                                                    verbose = 1, 
                                                    save_best_only = True,
                                                    save_weights_only = True, 
                                                    mode = 'min')
model.summary()

# for decayed learning rate
# history = model.fit(train_dataset,
#                     epochs=EPOCHS,
#                     callbacks=[lr_schedule,checkpoint],
#                     steps_per_epoch=STEPS_PER_EPOCH,
#                     validation_data=valid_dataset)

history = model.fit(train_dataset,
                    epochs=5,
                    callbacks=[checkpoint],
                    steps_per_epoch=300,
                    validation_data=valid_dataset)

model.save("{}_EXP{}_final.h5".format(model_name,experiment_number))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
densenet169 (Functional)     (None, 1, 1, 1664)        12642880  
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1664)              0         
_________________________________________________________________
dense_top (Dense)            (None, 1)                 1665      
Total params: 12,644,545
Trainable params: 12,486,145
Non-trainable params: 158,400
_________________________________________________________________
Epoch 1/5

Epoch 00001: val_loss improved from inf to 0.49442, saving model to DenseNet121_EXP1_42.h5
Epoch 2/5

Epoch 00002: val_loss did not improve from 0.49442
Epoch 3/5

Epoch 00003: val_loss did not improve from 

## Alternative

In [47]:
def format_path(st):
    return '../input/depression-detection/test/test/'+str(st)

def decode_image(filename, label=None, image_size=(im_size, im_size)):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, image_size)
    
    if label is None:
        return image
    else:
        return image, label

def data_augment(image, label=None):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    
    if label is None:
        return image
    else:
        return image, label
    
    
test_paths = test_data['images'].apply(format_path).values


test_dataset = (
    tf.data.Dataset
    .from_tensor_slices(test_paths)
    .map(decode_image, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
)

# valid_dataset = (
#     tf.data.Dataset
#     .from_tensor_slices((valid_paths, valid_labels))
#     .map(decode_image, num_parallel_calls=AUTO)
#     .batch(BATCH_SIZE)
#     .cache()
#     .prefetch(AUTO)
# )

In [48]:
# TTA = 3 
# preds = []

# for i in range(TTA):
# #     test_set.reset()
#     preds.append(final_model.predict(test_dataset))
    
# preds = np.mean(np.array(preds), axis=0)


preds = model.predict(test_dataset, verbose=1)



In [49]:
preds = (preds > 0.5).astype(np.float32)
preds.shape

(4045, 1)

In [50]:
test = np.array(test_data['label']).reshape(test_data['label'].shape[0],1)
test.shape

(4045, 1)

In [51]:
preds

array([[1.],
       [0.],
       [1.],
       ...,
       [1.],
       [0.],
       [1.]], dtype=float32)

In [52]:
#calculating various metrics
print("\n-------------------------Accuracy Score----------------------------\n\t\t\t     ",metrics.accuracy_score(test, preds, normalize= True))


-------------------------Accuracy Score----------------------------
			      0.8674907292954265
