In [1]:
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim

from code.mydatasets import load_seizure_dataset
from code.utils import train, evaluate
from code.plots import plot_learning_curves, plot_confusion_matrix
from code.mymodels import MyMLP, MyCNN, MyRNN

In [None]:
# Set a correct path to the seizure data file you downloaded
PATH_TRAIN_FILE = "data/seizure/seizure_train.csv"
PATH_VALID_FILE = "data/seizure/seizure_validation.csv"
PATH_TEST_FILE = "data/seizure/seizure_test.csv"

# Path for saving model
PATH_OUTPUT = "output/seizure/"
os.makedirs(PATH_OUTPUT, exist_ok=True)

In [None]:
# Some parameters
MODEL_TYPE = 'MLP'  # TODO: Change this to 'MLP', 'CNN', or 'RNN' according to your task
NUM_EPOCHS = 1
BATCH_SIZE = 32
USE_CUDA = False  # Set 'True' if you want to use GPU
NUM_WORKERS = 0  # Number of threads used by DataLoader. You can adjust this according to your machine spec.


In [None]:
device = torch.device("cuda" if USE_CUDA and torch.cuda.is_available() else "cpu")
torch.manual_seed(1)
if device == "cuda":
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [None]:
train_dataset = load_seizure_dataset(PATH_TRAIN_FILE, MODEL_TYPE)
valid_dataset = load_seizure_dataset(PATH_VALID_FILE, MODEL_TYPE)
test_dataset = load_seizure_dataset(PATH_TEST_FILE, MODEL_TYPE)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)


In [None]:
if MODEL_TYPE == 'MLP':
    model = MyMLP()
    save_file = 'MyMLP.pth'
elif MODEL_TYPE == 'CNN':
    model = MyCNN()
    save_file = 'MyCNN.pth'
elif MODEL_TYPE == 'RNN':
    model = MyRNN()
    save_file = 'MyRNN.pth'
else:
    raise AssertionError("Wrong Model Type!")


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion.to(device)

best_val_acc = 0.0
train_losses, train_accuracies = [], []
valid_losses, valid_accuracies = [], []

In [None]:
for epoch in range(NUM_EPOCHS):
    train_loss, train_accuracy = train(model, device, train_loader, criterion, optimizer, epoch)
    valid_loss, valid_accuracy, valid_results = evaluate(model, device, valid_loader, criterion)

    train_losses.append(train_loss)
    valid_losses.append(valid_loss)

    train_accuracies.append(train_accuracy)
    valid_accuracies.append(valid_accuracy)

    is_best = valid_accuracy > best_val_acc  # let's keep the model that has the best accuracy, but you can also use another metric.
    if is_best:
        best_val_acc = valid_accuracy
        torch.save(model, os.path.join(PATH_OUTPUT, save_file))


In [None]:
plot_learning_curves(train_losses, valid_losses, train_accuracies, valid_accuracies)

best_model = torch.load(os.path.join(PATH_OUTPUT, save_file))
test_loss, test_accuracy, test_results = evaluate(best_model, device, test_loader, criterion)

class_names = ['Seizure', 'TumorArea', 'HealthyArea', 'EyesClosed', 'EyesOpen']
plot_confusion_matrix(test_results, class_names)


In [2]:
import numpy as np
import pandas as pd
from scipy import sparse
import torch
from torch.utils.data import TensorDataset, Dataset
import torch.nn as nn

In [None]:
df = pd.read_csv(PATH_TRAIN_FILE)
df.head()

In [None]:
df.y = df.y.apply(lambda x: x-1)

In [None]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [None]:
class MyMLP(nn.Module):
    def __init__(self):
        super(MyMLP, self).__init__()
        
        #initialize the architecture of the MLP
        #One hidden layer with 16 nodes
        self.hidden = nn.Linear(178, 16, bias=True)
        self.sigmoid = nn.Sigmoid()
        self.output = nn.Linear(16, 5, bias = True)
    def forward(self, x):
        h1_linear = self.hidden(x)
        h1_activation = self.sigmoid(linear)
        output = self.output(h1_activation)
        return output

In [None]:
train_x = torch.from_numpy(X).double().unsqueeze(2)
train_y = torch.from_numpy(y).double()
dataset = TensorDataset(train_x, train_y)

In [None]:
train_x.shape

In [3]:
testModel = MyMLP()

In [None]:
ds_loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)

In [None]:
dataiter = iter(ds_loader)
X_samples, y_samples = dataiter.next()

print(X_samples.shape)
print(y_samples)

In [4]:
from torchsummary import summary

In [7]:
summary(testModel, (1,178))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                [-1, 1, 16]           2,864
           Sigmoid-2                [-1, 1, 16]               0
            Linear-3                 [-1, 1, 5]              85
Total params: 2,949
Trainable params: 2,949
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.01
Estimated Total Size (MB): 0.01
----------------------------------------------------------------


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MyRNN().to(device)

In [None]:
from ptflops import get_model_complexity_info



flops, params = get_model_complexity_info(model, ( 1, 178), as_strings=True, print_per_layer_stat=True)
print('Flops:  ' + flops)
print('Params: ' + params)

In [None]:
import os
import pickle
import pandas as pd
from scipy.sparse import csr_matrix

##### DO NOT MODIFY OR REMOVE THIS VALUE #####
checksum = '169a9820bbc999009327026c9d76bcf1'
##### DO NOT MODIFY OR REMOVE THIS VALUE #####

PATH_TRAIN = "data/mortality/train/"
PATH_VALIDATION = "../data/mortality/validation/"
PATH_TEST = "../data/mortality/test/"
PATH_OUTPUT = "../data/mortality/processed/"


In [None]:
def convert_icd9(icd9_object):

    icd9_str = str(icd9_object)
	# TODO: Extract the the first 3 or 4 alphanumeric digits prior to the decimal point from a given ICD-9 code.
	# TODO: Read the homework description carefully.
    if icd9_str.lower().startswith('e'): return icd9_str.split('.')[0][:4]
    else: return icd9_str.split('.')[0][:3]



In [None]:
example = pd.DataFrame(['V1046', 'V090', '135', '1890', '19889', 'E9352', 'E935'], columns=['ICD9_CODE'])

In [None]:
example = example.ICD9_CODE.apply(convert_icd9)

In [None]:
def build_codemap(df_icd9, transform):
	"""
	:return: Dict of code map {main-digits of ICD9: unique feature ID}
	"""
	# TODO: We build a code map using ONLY train data. Think about how to construct validation/test sets using this.
	df_digits = df_icd9['ICD9_CODE'].apply(transform)

	
    
    
	return {str(x):pos for pos,x in enumerate(df_digits.unique())}

In [None]:
def create_dataset(path, codemap, transform):
	"""
	:param path: path to the directory contains raw files.
	:param codemap: 3-digit ICD-9 code feature map
	:param transform: e.g. convert_icd9
	:return: List(patient IDs), List(labels), Visit sequence data as a List of List of List.
	"""
	# TODO: 1. Load data from the three csv files
	# TODO: Loading the mortality file is shown as an example below. Load two other files also.
	df_mortality = pd.read_csv(os.path.join(path, "MORTALITY.csv"))

	# TODO: 2. Convert diagnosis code in to unique feature ID.
	# TODO: HINT - use 'transform(convert_icd9)' you implemented and 'codemap'.

	# TODO: 3. Group the diagnosis codes for the same visit.

	# TODO: 4. Group the visits for the same patient.

	# TODO: 5. Make a visit sequence dataset as a List of patient Lists of visit Lists
	# TODO: Visits for each patient must be sorted in chronological order.

	# TODO: 6. Make patient-id List and label List also.
	# TODO: The order of patients in the three List output must be consistent.
	patient_ids = [0, 1, 2]
	labels = [1, 0, 1]
	seq_data = [[[0, 1], [2]], [[1, 3, 4], [2, 5]], [[3], [5]]]
	return patient_ids, labels, seq_data

In [None]:
df_mortality = pd.read_csv(os.path.join(PATH_TRAIN, "MORTALITY.csv"))
df_diagnosis = pd.read_csv(os.path.join(PATH_TRAIN, "DIAGNOSES_ICD.csv"))
df_admissions = pd.read_csv(os.path.join(PATH_TRAIN, "ADMISSIONS.csv"))

In [None]:
df_diagnosis.head()

In [None]:
codemap = build_codemap(df_diagnosis, convert_icd9)

In [None]:
df_diagnosis.ICD9_CODE = df_diagnosis.ICD9_CODE.apply(convert_icd9).apply(lambda x: codemap[x])

In [None]:
df_diagnosis.head(1)

In [None]:
df_admissions[df_admissions.HADM_ID == 172335]

In [None]:
df_admissions.head()

In [None]:
diag_admiss = pd.merge(left=df_diagnosis.loc[:,['SUBJECT_ID','HADM_ID','ICD9_CODE']], right=df_admissions.loc[:,['SUBJECT_ID','HADM_ID','ADMITTIME']], how='inner', on=['SUBJECT_ID', 'HADM_ID']).drop('HADM_ID', axis=1).sort_values(['SUBJECT_ID', 'ADMITTIME'])

In [None]:
diag_admiss.groupby(['SUBJECT_ID','ADMITTIME'])['ICD9_CODE'].apply(list).reset_index().groupby('SUBJECT_ID')['ICD9_CODE'].apply(list).index

In [None]:
#PATH_TRAIN_SEQS = os.path.join(project_root, "data/mortality/processed/mortality.seqs.train")
train_seqs = pickle.load(open("data/mortality/processed/mortality.seqs.train", 'rb'))
codemap = pickle.load(open("data/mortality/processed/mortality.codemap.train", 'rb'))

In [None]:
len(sorted(codemap.keys()))

In [None]:
data = []

In [None]:
for i in train_seqs:
    matrix = np.zeros((len(i), 911))
    for pos, j in enumerate(i):
        for k in j:
            matrix[pos, int(k)] = 1
    data.append(csr_matrix(matrix))

In [None]:
test = data[0]

In [None]:
train_seqs[0]

In [None]:
test = pd.merge(df_diagnosis,df_mortality, on='SUBJECT_ID', how='outer')#.dropna(subset=['ICD9_CODE']).loc[:, ['SUBJECT_ID', 'MORTALITY']].drop_duplicates()
test.ICD9_CODE = test.ICD9_CODE.apply(convert_icd9)

In [None]:
test[test.ICD9_CODE=='656']

In [None]:
len(df_mortality.MORTALITY)

In [None]:
test.resize((3,911))

In [None]:
t = np.array([[1,3,4],[4,5,6]])

In [None]:
t.resize(3,3)

In [None]:
t = [(np.array([1,2,3]), 1), (np.array([[2,3,4],[2,3,4]]), 0)]

In [None]:
for x in t:
    x[0].resize(3,3)

In [None]:
t.sort(key = lambda x: x[0].shape[0])

In [None]:
t