In [1]:
import pandas as pd
import numpy as np
import os
from importlib import reload

import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

from sklearn.model_selection import train_test_split
import skimage
from skimage import io
from skimage.transform import resize

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import time
import os
import copy

# local imports
import model as _model
import utils as _tools

In [2]:
"""
control the generation of trainning and dev datasets
"""

train, dev, _ = _tools.make_trainning_data(sample=10000, 
                                           return_frames=True, 
                                           state=1729)

print()
print("Trainning label distribution")
print(train['Cardiomegaly'].value_counts(normalize=True, dropna=False))

print()
print("Development label distribution")
print(dev['Cardiomegaly'].value_counts(normalize=True, dropna=False))

sampling 10000 records
train.shape: (7500, 19)
dev.shape: (2500, 19)
valid.shape: (234, 19)
saved: ./train.csv
saved: ./dev.csv
saved: ./valid.csv

Trainning label distribution
 NaN    0.790000
 1.0    0.125067
 0.0    0.049600
-1.0    0.035333
Name: Cardiomegaly, dtype: float64

Development label distribution
 NaN    0.8052
 1.0    0.1148
 0.0    0.0480
-1.0    0.0320
Name: Cardiomegaly, dtype: float64


In [None]:
"""
build and train the model
"""

reload(_model)

# build the models
resnet = _model.TransferModel()

# train the model
resnet.train()


-------------------------------
Cardiomegaly Model epoch 1/10
Trainning loss: 0.6594 accuracy: 82.91 %


  _warn_prf(average, modifier, msg_start, len(result))


Validation loss: 0.6373 accuracy: 85.08 %

-------------------------------
Cardiomegaly Model epoch 2/10
Trainning loss: 0.6288 accuracy: 82.96 %
Validation loss: 0.6234 accuracy: 83.92 %

-------------------------------
Cardiomegaly Model epoch 3/10
Trainning loss: 0.6079 accuracy: 83.96 %
Validation loss: 0.6170 accuracy: 83.48 %

-------------------------------
Cardiomegaly Model epoch 4/10
Trainning loss: 0.5916 accuracy: 83.80 %
Validation loss: 0.6097 accuracy: 84.76 %

-------------------------------
Cardiomegaly Model epoch 5/10
Trainning loss: 0.5779 accuracy: 84.41 %
Validation loss: 0.6041 accuracy: 84.20 %

-------------------------------
Cardiomegaly Model epoch 6/10
Trainning loss: 0.5581 accuracy: 85.24 %
Validation loss: 0.6003 accuracy: 83.44 %

-------------------------------
Cardiomegaly Model epoch 7/10
Trainning loss: 0.5432 accuracy: 85.28 %
Validation loss: 0.5953 accuracy: 85.00 %

-------------------------------
Cardiomegaly Model epoch 8/10
Trainning loss: 0.5

In [None]:
# get results on dev set
results = resnet.evaluate_model(resnet.best_model, 
                                resnet.dataloader_dev, 
                                resnet.dev_map)
print(results.shape)

# get distributions of true labels
print()
print(results['y_true'].value_counts(normalize=True))

# get distributions of pred labels
print()
print(results['y_pred'].value_counts(normalize=True))

outpath = f"results/dev_results.csv"
results.to_csv(outpath, index=False)

In [None]:
results.head()

In [None]:
matplotlib.rcParams['figure.dpi'] = 150
results['y_prob'].hist(edgecolor='black', bins=30)
plt.title('Distribution of Propensities')

In [None]:
_time = list(range(len(resnet.train_loss_history)))

matplotlib.rcParams['figure.dpi'] = 150
plt.plot(_time, resnet.train_loss_history, c='black', label="Trainning")
plt.plot(_time, resnet.dev_loss_history, c='black', ls=":", label="Testing")
plt.title("Trainning Loss")
plt.xlabel("Epoch")
plt.ylabel("Cross Entropy Loss")
plt.legend()

In [None]:
_time = list(range(len(resnet.train_acc_history)))

matplotlib.rcParams['figure.dpi'] = 150
plt.plot(_time, resnet.train_acc_history,  c='black', label="Trainning")
plt.plot(_time, resnet.dev_acc_history, c='black', ls=":", label="Testing")
plt.title("Trainning Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()

In [None]:
reload(_tools)

res = _tools.get_classification_metrics(results)
res

In [None]:
"""
save the model
"""

outpath = f"models/{resnet.condition}_resnet18.pth"
torch.save(resnet.model.state_dict(), outpath)
print(f"saved: {outpath}")

In [None]:
# get results on valid set
results = resnet.evaluate_model(resnet.best_model, 
                                resnet.dataloader_valid, 
                                resnet.valid_map)
print(results.shape)

# get distributions of true labels
print()
print(results['y_true'].value_counts(normalize=True))

# get distributions of pred labels
print()
print(results['y_pred'].value_counts(normalize=True))

outpath = f"results/validation_results.csv"
results.to_csv(outpath, index=False)

print()
res = _tools.get_classification_metrics(results)
res

In [None]:
!git add .

In [None]:
!git commit -m "model updates"