# ICU survivor prediction
# Machine Learning, Exercise 2
王敏行 id:2018012386 wangmx18@mails.tsinghua.edu.cn


### Exp.4 MLP

In [4]:
import pandas as pd
import numpy as np

ts = pd.read_csv('data1forEx1to4/test1_icu_data.csv')
tr = pd.read_csv('data1forEx1to4/train1_icu_data.csv')
Y_tr = pd.read_csv('data1forEx1to4/train1_icu_label.csv')
Y_ts = pd.read_csv('data1forEx1to4/test1_icu_label.csv')

x_ts = np.array(ts)
x_tr = np.array(tr)
y_ts = np.array(Y_ts).ravel()
y_tr = np.array(Y_tr).ravel()

In [5]:
# Codes framework is built by TA in the course NMDA.
# refer to this program: https://github.com/ddbourgin/numpy-ml/blob/master/numpy_ml/neural_nets/
# Minxing Wang @ Dec 2021
import numpy as np
import matplotlib.pyplot as plt

class Network(object):

    def __init__(self, hidden_size, input_size = 108, output_size = 2, std = 1e-4):
        self.params = {}
        self.params['W1'] = np.random.randn(input_size, hidden_size)*std
        self.params['W2'] = np.random.randn(hidden_size, output_size)*std
        self.params['b1']=np.zeros(hidden_size,)#写成一维array，可以在forward_pass里面方便和二维数组加和
        self.params['b2']=np.zeros(output_size,)       
        return
    
    def relu(self, x):
        return np.clip(x,0,np.inf)
    
    def softmax(self,x):
        return np.exp(x)/np.sum(np.exp(x), axis=-1, keepdims=True)

    def cross_entropy(self, y_pred, y):
        # prevent taking the log of 0
        eps = np.finfo(float).eps
        return -np.sum(y * np.log(y_pred+eps))

    def forward_pass(self, X, y = None, wd_decay = 0.0):
    
        loss = None
        predict = None
        self.var = {}
        self.var['z1'] = np.dot(X, self.params['W1'])+self.params['b1']
        self.var['h'] = self.relu(self.var['z1'])
        self.var['c'] = np.dot(self.var['h'], self.params['W2']) + self.params['b2']
        self.var['pred'] = self.softmax(self.var['c'])
        ###

        
        if y is None:

            predict = np.argmax(self.var['pred'], axis=1)#求每一行的最大值的索引
            return predict
        else:
            n = X.shape[0]
            m = self.var['c'].shape[1]
            y_true_onehot = np.zeros((n,m))
            for i in range(n):
                y_true_onehot[i,y[i]] = 1
            loss = self.cross_entropy(self.var['pred'], y_true_onehot) / n
            for para in self.params:
                loss += 0.5*wd_decay*((self.params[para]**2).sum())
            return loss


    def back_prop(self, X, y, wd_decay = 0.0):
        grads = {}
        self.var = {}
        self.var['z1'] = np.dot(X, self.params['W1'])+self.params['b1']
        self.var['h'] = self.relu(self.var['z1'])
        self.var['c'] = np.dot(self.var['h'], self.params['W2']) + self.params['b2']
        self.var['pred'] = self.softmax(self.var['c'])
        grads = {}
        n = X.shape[0]
        m = self.var['pred'].shape[1]
        y_true_onehot = np.zeros((n,m))
        for i in range(n):
            y_true_onehot[i,y[i]] = 1            
        dy = self.var['pred']-y_true_onehot

        grads['b2'] = np.sum(dy, axis=0, keepdims=True) / n + wd_decay*self.params['b2']
        grads['W2'] = np.dot((self.var['h'].T), dy) / n + wd_decay*self.params['W2']
        dz1 = np.dot(dy, self.params['W2'].T) * (self.var['h'] > 0)
        grads['b1'] = np.sum(dz1, axis=0, keepdims=True) / n + wd_decay*self.params['b1']
        grads['W1'] = np.dot(X.T, dz1) / n + wd_decay*self.params['W1']
        return grads
 
    def numerical_gradient(self, X, y, wd_decay = 0.0, delta = 1e-6):
        grads = {}
            
        for param_name in self.params:
            grads[param_name] = np.zeros(self.params[param_name].shape)
            itx = np.nditer(self.params[param_name], flags=['multi_index'], op_flags=['readwrite'])
            while not itx.finished:
                idx = itx.multi_index
                grads[param_name][idx] = 0
                param_temp = self.params[param_name][idx]
                self.params[param_name][idx] = param_temp + delta
                loss_up = self.forward_pass(X,y,wd_decay=wd_decay)
                self.params[param_name][idx] = param_temp - delta
                loss_low = self.forward_pass(X,y,wd_decay=wd_decay)
                grads[param_name][idx] = (loss_up - loss_low)/delta/2
                self.params[param_name][idx] = param_temp  
                
                itx.iternext()
        return grads
    
    def get_acc(self, X, y):
        pred = self.forward_pass(X)
        return np.mean(pred == y)
    
    def train(self, X, y, X_val, y_val,
                learning_rate=0, lr_decay=1,
                momentum=0, do_early_stopping=False, stopping_patience=0,
                wd_decay=0, num_iters=10,
                batch_size=4, verbose=False, print_every=10):

        num_train = X.shape[0]
        iterations_per_epoch = max(num_train // batch_size, 1)#应该是整除而不是除以

        loss_history = []
        acc_history = []
        val_acc_history = []
        val_loss_history = []
        

        epoch = 0
        grad_last = {'W1':0,'W2':0,'b1':0,'b2':0}
        grad_tmp={}
        train_array = np.arange(num_train)
        np.random.shuffle(train_array)

        for it in range(num_iters):
            batch_num = it % iterations_per_epoch
            epoch = it //iterations_per_epoch
            #learning_rate w/ or w/o decay:
            learning_rate_tmp = learning_rate/(1+lr_decay*epoch)
            # learning_rate_tmp = learning_rate
            batch_idx = train_array[(batch_num * batch_size):min((batch_num + 1) * batch_size,num_train)]
            X_batch = X[batch_idx]
            y_batch = y[batch_idx]
            grad_tmp = self.back_prop(X_batch, y_batch, wd_decay)
            
            for param_name in self.params:
                self.params[param_name] = self.params[param_name] - learning_rate_tmp * (momentum * grad_last[param_name] + (1 - momentum) * grad_tmp[param_name])
            grad_last = grad_tmp

            loss = self.forward_pass(X_batch,y_batch)
            val_loss = self.forward_pass(X_val,y_val)
            loss_history.append(loss)
            val_loss_history.append(val_loss)
            # train_acc = self.get_acc(X_batch, y_batch)
            # val_acc = self.get_acc(X_val, y_val)
            # acc_history.append(train_acc)
            # val_acc_history.append(val_acc)
            
            if verbose and it % print_every == 0:
                print('iteration %d / %d: training loss %f val loss: %f' % (it, num_iters, loss, val_loss))
 
            if it % iterations_per_epoch == 0:
                
                train_acc = self.get_acc(X_batch, y_batch)
                val_acc = self.get_acc(X_val, y_val)
                acc_history.append(train_acc)
                val_acc_history.append(val_acc)

            if do_early_stopping:
                n_compare = 50
                n1 = 2*n_compare+1
                n2 = n_compare+1
                if (np.mean(val_loss_history[-n1:-n2])-np.mean(val_loss_history[-n2:-1])<stopping_patience) and (it>n_compare*3):
                    print('iteration %d / %d: training loss %f val loss: %f' % (it, num_iters, loss, val_loss))
                    print('ITERATION STOPPED, it = %d' % (it))
                    break


        return {
          'loss_history': loss_history,
          'val_loss_history': val_loss_history,
          'acc_history': acc_history,
          'val_acc_history': val_acc_history,
        }

In [8]:
hidden_size = 10

net = Network(hidden_size = hidden_size)

stats = net.train(x_tr, y_tr, x_ts, y_ts,
            learning_rate=0.5, momentum=0, wd_decay=0.02, lr_decay=0.5, 
            num_iters=200, batch_size=100,
            do_early_stopping=False,
            print_every=10, verbose=True)

iteration 0 / 200: training loss 0.687501 val loss: 0.692321
iteration 10 / 200: training loss 0.692359 val loss: 0.694276
iteration 20 / 200: training loss 0.695147 val loss: 0.693849
iteration 30 / 200: training loss 0.684529 val loss: 0.694607
iteration 40 / 200: training loss 0.697467 val loss: 0.694567
iteration 50 / 200: training loss 0.691635 val loss: 0.693200
iteration 60 / 200: training loss 0.692490 val loss: 0.693478
iteration 70 / 200: training loss 0.695560 val loss: 0.694063
iteration 80 / 200: training loss 0.686981 val loss: 0.693777
iteration 90 / 200: training loss 0.696314 val loss: 0.694042
iteration 100 / 200: training loss 0.693209 val loss: 0.693149
iteration 110 / 200: training loss 0.692532 val loss: 0.693419
iteration 120 / 200: training loss 0.695551 val loss: 0.694059
iteration 130 / 200: training loss 0.688880 val loss: 0.693403
iteration 140 / 200: training loss 0.695583 val loss: 0.693748
iteration 150 / 200: training loss 0.694048 val loss: 0.693194
ite

In [9]:
pred = net.forward_pass(x_tr)