In [1]:
import pandas as pd
import numpy as np
import os
from importlib import reload

import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

from sklearn.model_selection import train_test_split
import skimage
from skimage import io
from skimage.transform import resize

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import time
import os
import copy

# local imports
import model as _model
import utils as _tools

In [2]:
"""
control the generation of trainning and dev datasets
"""

train, dev, _ = _tools.make_trainning_data(sample=50000, 
                                           return_frames=True, 
                                           state=1729)

print()
print("Trainning label distribution")
print(train['Cardiomegaly'].value_counts(normalize=True, dropna=False))

print()
print("Development label distribution")
print(dev['Cardiomegaly'].value_counts(normalize=True, dropna=False))

sampling 50000 records
train.shape: (37500, 19)
dev.shape: (12500, 19)
valid.shape: (234, 19)
saved: ./train.csv
saved: ./dev.csv
saved: ./valid.csv

Trainning label distribution
 NaN    0.792187
 1.0    0.121787
 0.0    0.049280
-1.0    0.036747
Name: Cardiomegaly, dtype: float64

Development label distribution
 NaN    0.79312
 1.0    0.12280
 0.0    0.04952
-1.0    0.03456
Name: Cardiomegaly, dtype: float64


In [None]:
"""
build and train the model
"""

reload(_model)

# build the models
resnet = _model.TransferModel(use_cpu=False)

# train + evaluate the model
resnet.train()


-------------------------------
Cardiomegaly Model epoch 1/20


  _warn_prf(average, modifier, msg_start, len(result))


Trainning loss: 0.6191 accuracy: 82.78 %
Validation loss: 0.5925 accuracy: 84.79 %

-------------------------------
Cardiomegaly Model epoch 2/20
Trainning loss: 0.5727 accuracy: 83.39 %
Validation loss: 0.5686 accuracy: 85.43 %

-------------------------------
Cardiomegaly Model epoch 3/20
Trainning loss: 0.5421 accuracy: 83.24 %
Validation loss: 0.5456 accuracy: 81.68 %

-------------------------------
Cardiomegaly Model epoch 4/20
Trainning loss: 0.5223 accuracy: 83.50 %
Validation loss: 0.5323 accuracy: 82.06 %

-------------------------------
Cardiomegaly Model epoch 5/20
Trainning loss: 0.5019 accuracy: 83.61 %
Validation loss: 0.5260 accuracy: 83.24 %

-------------------------------
Cardiomegaly Model epoch 6/20
Trainning loss: 0.4867 accuracy: 83.92 %
Validation loss: 0.5187 accuracy: 82.23 %

-------------------------------
Cardiomegaly Model epoch 7/20
Trainning loss: 0.4731 accuracy: 84.61 %
Validation loss: 0.5158 accuracy: 82.43 %

-------------------------------
Cardiome

In [None]:
# get results on dev set
results = resnet.evaluate_model(resnet.best_model, 
                                resnet.dataloader_dev, 
                                resnet.dev_map)
print(results.shape)

# get distributions of true labels
print()
print(results['y_true'].value_counts(normalize=True))

# get distributions of pred labels
print()
print(results['y_pred'].value_counts(normalize=True))

outpath = f"results/dev_results.csv"
results.to_csv(outpath, index=False)

In [None]:
results.head()

In [None]:
matplotlib.rcParams['figure.dpi'] = 150
results['y_prob'].hist(edgecolor='black', bins=30)
plt.title('Distribution of Propensities')

In [None]:
_time = list(range(len(resnet.train_loss_history)))

matplotlib.rcParams['figure.dpi'] = 150
plt.plot(_time, resnet.train_loss_history, c='black', label="Trainning")
plt.plot(_time, resnet.dev_loss_history, c='black', ls=":", label="Testing")
plt.title("Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Cross Entropy Loss")
plt.legend()
outpath = f"results/training_loss.png"
plt.savefig(outpath, bbox_inches='tight')

In [None]:
_time = list(range(len(resnet.train_acc_history)))

matplotlib.rcParams['figure.dpi'] = 150
plt.plot(_time, resnet.train_acc_history,  c='black', label="Trainning")
plt.plot(_time, resnet.dev_acc_history, c='black', ls=":", label="Testing")
plt.title("Trainning Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
outpath = f"results/training_accuracy.png"
plt.savefig(outpath, bbox_inches='tight')

In [None]:
reload(_tools)

res = _tools.get_classification_metrics(results)
res

In [None]:
"""
save the model
"""

outpath = f"models/{resnet.condition}_resnet18.pth"
torch.save(resnet.model.state_dict(), outpath)
print(f"saved: {outpath}")

In [None]:
# get results on valid set
results = resnet.evaluate_model(resnet.best_model, 
                                resnet.dataloader_valid, 
                                resnet.valid_map)
print(results.shape)

# get distributions of true labels
print()
print(results['y_true'].value_counts(normalize=True))

# get distributions of pred labels
print()
print(results['y_pred'].value_counts(normalize=True))

outpath = f"results/validation_results.csv"
results.to_csv(outpath, index=False)

print()
res = _tools.get_classification_metrics(results)
res

In [None]:
!git add .

In [None]:
!git commit -m "model updates"