# Importing libraries

In [1]:
import os
import funcs 
import load_data
import tensorflow as tf
import mlflow
import subprocess
import git
import numpy as np
import pandas as pd
from tqdm import tqdm

Using TensorFlow backend.


# Set up the GPU

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"]="0"
config = tf.compat.v1.ConfigProto(device_count={"GPU":1, "CPU": 10})
config.gpu_options.allow_growth = True  
config.log_device_placement = True  
sess = tf.compat.v1.Session(config=config)
tf.compat.v1.keras.backend.set_session(sess)

Device mapping:
/job:localhost/replica:0/task:0/device:XLA_CPU:0 -> device: XLA_CPU device

  and should_run_async(code)


# MLflow set up

In [3]:
server, artifact = funcs.mlflow_settings()
mlflow.set_tracking_uri(server)


# Creating/Setting the experiment
experiment_name = '/chexpert_d1'

# Line below should be commented if the experiment is already created
# If kept commented during the first run of a new experiment, the set_experiment 
# will automatically create the new experiment with local artifact storage

mlflow.create_experiment(name=experiment_name, artifact_location=artifact)
mlflow.set_experiment(experiment_name=experiment_name)


# Loading the optimization parameters aturomatically from keras
mlflow.keras.autolog()

# Starting the MLflow 
mlflow.start_run()

# Creating a ssh-tunnel to server in the background

In [None]:
command = 'ssh -N -L 5000:localhost:5432 <username>@<remote-server-address> &'
ssh_session = subprocess.Popen('exec ' + command, stdout=subprocess.PIPE, shell=True)

# Model optimization

## Reading Terminal Inputs

In [5]:
# epochs, batch_size = funcs.reading_terminal_inputs()
epochs, batch_size = 3, 32

## Selecting the dataset 

In [None]:
dataset = 'chexpert' # 'nih'
dir = '/groups/jjrodrig/projects/chest/dataset/' + dataset + '/'

## Saving the Git commit  (only in Jupyter notebook)
This is only needed for jupyter notebook

You can annotate runs with arbitrary tags. Tag keys that start with mlflow. are reserved for internal use. The following tags are set automatically by MLflow, when appropriate:

In [6]:
repo = git.Repo(search_parent_directories=True)
git_commit_hash = repo.head.object.hexsha
print('git commit hash', git_commit_hash)

mlflow.set_tag('mlflow.source.git.commit', git_commit_hash)

git commit hash 90d4abf577a3b4038a811d89bd87df3b4a7ae707


#### 3.4.3 Optimization

In [7]:
train_dataset, valid_dataset, Info = load_data.load(dir=dir, dataset=dataset, batch_size=30, mode='train_val')

funcs.optimize(dir, train_dataset, valid_dataset, epochs, Info)

  and should_run_async(code)
train size: (478, 23)
valid size: (120, 23)
test size: (202, 23)
Found 478 validated image filenames.
Found 120 validated image filenames.
Found 202 validated image filenames.
  tensor_proto.tensor_content = nparray.tostring()
Epoch 1/3
  if not isinstance(values, collections.Sequence):
  if not isinstance(wrapped_dict, collections.Mapping):
Epoch 2/3
Epoch 3/3


## Closing the mlflow session

In [None]:
mlflow.end_run()

print('Finished')

## CLosing the ssh session

In [None]:
ssh_session.kill()

# Evaluation

### Loading the model

In [22]:
dir = '/groups/jjrodrig/projects/chest/dataset/nih/'

# Loading the data
test_generator, Info = load_data.load(dir=dir, dataset='nih', batch_size=30, mode='test')

# Loading the model
model = tf.keras.models.load_model(dir + 'model/model.h5')

# Compiling the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss=funcs.weighted_bce_loss(Info.class_weights), metrics=[tf.keras.metrics.binary_accuracy])


  and should_run_async(code)
train size: (61470, 23)
valid size: (15367, 23)
test size: (23163, 23)
Found 23163 validated image filenames.
  tensor_proto.tensor_content = nparray.tostring()


### Measuring loss & Accuracy for all test samples (average over all classes)

In [2]:
score = {}
NUM_CLASSES = 14

for name in tqdm(test_generator.filenames): 

    x_test, y_test = next(test_generator)

    # Estimating the loss & accuracy for instance
    eval = model.evaluate(x=x_test,y=y_test,verbose=0)

    # predicting the labels for instance
    pred = model.predict(x=x_test,verbose=0)

    # Measuring the loss for each class
    loss_per_class = [ tf.keras.losses.binary_crossentropy(y_test[...,d],pred[...,d]) for d in range(NUM_CLASSES)]

    # saving all the infos
    score[name] = {'loss_avg':eval[0], 'acc_avg':eval[1], 'predictions':pred, 'truth':y_test, 'loss':np.array(loss_per_class)}

''

### Converting the outputs to a dataframe

In [21]:
df = pd.DataFrame.from_dict(score).T

# saving the dataframe as csv file to add to mlflow as an artifact
df.to_json(dir + 'model/test_results.json')

''

# ssh tunneling:

- Step 1 (before running the code): Connecting to remote server through ssh tunneling
        
        ssh -L 5000:localhost:5432 <username>@<remote-server-address>

- Step 2 (after running the code): Connecting to remote postgres server
        
        mlflow ui --backend-store-uri postgresql://<postgres-username>:<pass>@localhost:5000/<database> --port 6789             
        