In [1]:
import pandas as pd
import numpy as np
import os
from importlib import reload

import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

from sklearn.model_selection import train_test_split
import skimage
from skimage import io
from skimage.transform import resize

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import time
import os
import copy

# local imports
import model as _model
import utils as _tools

In [2]:
"""
control the generation of trainning and dev datasets
"""

train, dev, _ = _tools.make_trainning_data(sample=100, 
                                           return_frames=True, 
                                           state=1729)

print()
print("Trainning label distribution")
print(train['Cardiomegaly'].value_counts(normalize=True, dropna=False))

print()
print("Development label distribution")
print(dev['Cardiomegaly'].value_counts(normalize=True, dropna=False))

sampling 100 records
train.shape: (75, 19)
dev.shape: (25, 19)
valid.shape: (234, 19)
saved: ./train.csv
saved: ./dev.csv
saved: ./valid.csv

Trainning label distribution
 NaN    0.853333
 1.0    0.093333
-1.0    0.040000
 0.0    0.013333
Name: Cardiomegaly, dtype: float64

Development label distribution
NaN    0.76
1.0    0.16
0.0    0.08
Name: Cardiomegaly, dtype: float64


In [3]:
"""
build and train the model
"""

reload(_model)

# build the models
resnet = _model.TransferModel(use_cpu=False)

# train + evaluate the model
resnet.train()


-------------------------------
Cardiomegaly Model epoch 1/2
Trainning loss: 0.5838 accuracy: 86.67 %
Validation loss: 0.6703 accuracy: 80.00 %

-------------------------------
Cardiomegaly Model epoch 2/2
Trainning loss: 0.6356 accuracy: 85.33 %
Validation loss: 0.6771 accuracy: 88.00 %

Best dev Acc: 0.880000


In [4]:
# get results on dev set
results = resnet.evaluate_model(resnet.best_model, 
                                resnet.dataloader_dev, 
                                resnet.dev_map)
print(results.shape)

# get distributions of true labels
print()
print(results['y_true'].value_counts(normalize=True))

# get distributions of pred labels
print()
print(results['y_pred'].value_counts(normalize=True))

outpath = f"results/dev_results.csv"
results.to_csv(outpath, index=False)

RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same

In [None]:
results.head()

In [None]:
matplotlib.rcParams['figure.dpi'] = 150
results['y_prob'].hist(edgecolor='black', bins=30)
plt.title('Distribution of Propensities')

In [None]:
_time = list(range(len(resnet.train_loss_history)))

matplotlib.rcParams['figure.dpi'] = 150
plt.plot(_time, resnet.train_loss_history, c='black', label="Trainning")
plt.plot(_time, resnet.dev_loss_history, c='black', ls=":", label="Testing")
plt.title("Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Cross Entropy Loss")
plt.legend()
outpath = f"results/training_loss.png"
plt.savefig(outpath, bbox_inches='tight')

In [None]:
_time = list(range(len(resnet.train_acc_history)))

matplotlib.rcParams['figure.dpi'] = 150
plt.plot(_time, resnet.train_acc_history,  c='black', label="Trainning")
plt.plot(_time, resnet.dev_acc_history, c='black', ls=":", label="Testing")
plt.title("Trainning Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
outpath = f"results/training_accuracy.png"
plt.savefig(outpath, bbox_inches='tight')

In [None]:
reload(_tools)

res = _tools.get_classification_metrics(results)
res

In [None]:
"""
save the model
"""

outpath = f"models/{resnet.condition}_resnet18.pth"
torch.save(resnet.model.state_dict(), outpath)
print(f"saved: {outpath}")

In [None]:
# get results on valid set
results = resnet.evaluate_model(resnet.best_model, 
                                resnet.dataloader_valid, 
                                resnet.valid_map)
print(results.shape)

# get distributions of true labels
print()
print(results['y_true'].value_counts(normalize=True))

# get distributions of pred labels
print()
print(results['y_pred'].value_counts(normalize=True))

outpath = f"results/validation_results.csv"
results.to_csv(outpath, index=False)

print()
res = _tools.get_classification_metrics(results)
res

In [None]:
!git add .

In [None]:
!git commit -m "model updates"