In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns

import collections

In [2]:
planets_train = pd.read_csv("C:/Users/user/.jupyter/주피터 파일/data/planets_train.csv")
planets_train.drop(['Unnamed: 0'], axis = 1, inplace = True)

planets_test = pd.read_csv("C:/Users/user/.jupyter/주피터 파일/data/planets_test.csv")
planets_test.drop(['Unnamed: 0'], axis = 1, inplace = True)

In [3]:
train_x = planets_train.iloc[:, 1:6]
train_y = planets_train.iloc[:, 0]

test_x = planets_test.iloc[:, 1:6]
test_y = planets_test.iloc[:, 0]

In [4]:
train_y = train_y.where((train_y == 'Radial Velocity') | (train_y == 'Transit'), 'Others')
test_y = test_y.where((test_y == 'Radial Velocity') | (test_y == 'Transit'), 'Others')

In [5]:
train_x[['orbital_period', 'distance', 'mass']] = np.log(train_x[['orbital_period', 'distance', 'mass']])
test_x[['orbital_period', 'distance', 'mass']] = np.log(test_x[['orbital_period', 'distance', 'mass']])

In [6]:
train_x = train_x.interpolate()
test_x = test_x.interpolate()

In [7]:
from sklearn.preprocessing import RobustScaler, LabelEncoder

train_x = RobustScaler().fit_transform(train_x)
test_x = RobustScaler().fit_transform(test_x)

In [8]:
from imblearn.over_sampling import SMOTE

x_train, y_train = SMOTE(random_state=0).fit_resample(train_x, train_y)
x_test, y_test = SMOTE(random_state=0).fit_resample(test_x, test_y)

In [9]:
y_train = LabelEncoder().fit_transform(y_train)
y_test = LabelEncoder().fit_transform(y_test)

In [10]:
from sklearn.model_selection import train_test_split

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size = 0.2, shuffle = True)

In [11]:
y_train_oh = pd.get_dummies(y_train).values
y_val_oh = pd.get_dummies(y_val).values
y_test_oh = pd.get_dummies(y_test).values

In [None]:
class Linear() :
    
    def __init__(self, n, m, init_method='he') :
        self.p_ = {}

        if init_method == 'xavier' : 
            self.p_['W'], self.p_['b'] = \
                self.init_xavier_weights(n, m)

        elif init_method == 'he' :
            self.p_['W'], self.p_['b'] = \
                self.init_he_weights(n, m) 

        self.dp_ = {}
        self.dp_['W'], self.dp_['b'] = (None, None)
        self.X = None
        
    def init_xavier_weights(self, n, m) :
        W = np.sqrt(1./n)*np.random.randn(n, m)
        b = np.zeros(m)

        return (W, b)

    def init_he_weights(self, n, m) :
        W = np.sqrt(2./n) * np.random.randn(n, m)
        b = np.zeros(m)

        return (W, b)
    
    def forward(self, X) :
        W,b = self.load()
        self.X = X 
        Y = np.dot(X, W) + b

        return Y
    
    def backward(self, dY) :
        W,b = self.load()
        X = self.X
        self.dp_['W'] = np.dot(X.T, dY)
        self.dp_['b'] = np.sum(dY, axis = 0)
        dX = np.dot(dY, W.T)
        
        return dX

    def load(self) :
        
        return (self.p_['W'], self.p_['b'])

In [None]:
class ReLU() :
    
    def __init__(self) :
        self.p_ = {}
        self.mask = None

    def forward(self, X) :
        Y = X.copy()
        mask = (X<0)
        self.mask = mask
        Y[mask] = 0.

        return Y

    def backward(self, dY) :
        mask = self.mask
        dX = dY.copy()
        dX[mask] = 0.

        return dX

In [None]:
eps = 1e-8
class CrossEntropyWithSoftmax() :
    
    def __init__(self,reduction=True) :
        self.p_ = {}
        self.T = None
        self.Y = None
        self.reduction = reduction

    def forward(self,T,X) :
        self.T = T
        expX = np.exp(X-np.max(X))
        Y = expX/(eps+np.sum(expX,axis=1,keepdims=True))
        self.Y = Y
        loss = T*np.log(Y)
        
        if self.reduction :
            loss = -np.sum(loss)

        else :
            loss = -np.sum(loss,axis=1)

        return (loss, Y)

    def backward(self,dY) :
        T = self.T; Y = self.Y
        
        return Y - T

In [1]:
def accuracy(y_batch,t_batch) :
    ylb = np.argmax(y_batch,axis=1)
    tlb = np.argmax(t_batch,axis=1)
    
    return np.mean(ylb == tlb) * 100


def numerical_gradient(f, x):     
    
    h = 1e-4                               #유한차분법을 이용해서 수치적으로 도함수를 구함
    grad = np.zeros_like(x)                #(배열의 iterator를 이용했기 때문에 x가 몇 차원 벡터인가에 관계없이 작동)
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]

        x[idx] = float(tmp_val) + h; fxh1 = f(x) # f(x+h)
        x[idx] = tmp_val - h; fxh2 = f(x) # f(x-h)

        grad[idx] = (fxh1 - fxh2) / (2*h)
        
        x[idx] = tmp_val
        it.iternext() 
    
    return grad

In [1]:
class FNN() :
    
    def __init__(self):
        self.p_ = collections.OrderedDict()
        self.p_['linear1'] = Linear(5, 64, 'he')
        self.p_['relu1'] = ReLU()
        self.p_['linear2'] = Linear(64, 256, 'he')
        self.p_['relu2'] = ReLU()
        self.p_['linear3'] = Linear(256, 3, 'he')
        #self.p['softmax'] = CrossEntropyWithSoftmax()
        
    def forward(self,x) :
        p_ = self.p_
        
        for layer in p_.keys() :
            x = p_[layer].forward(x)
    
        return x

    def backward(self,dy) :
        p_ = self.p_

        for layer in reversed(p_.keys()) :
            dy = p_[layer].backward(dy)

        return dy

ModuleNotFoundError: No module named 'Layers'

In [13]:
fnn = FNN()
loss_layer = CrossEntropyWithSoftmax()

alpha = 0.001

batch_size = 16

epoch_tnum = 50

N = len(x_train)

iter_tnum = N / batch_size

In [14]:
def forward(fnn, loss_layer, data_x, data_t) :
    data_x = np.array(data_x); data_t = np.array(data_t)
    
    x_batch = data_x.copy()
    x_batch = x_batch.reshape([len(data_x),-1])

    t_batch = data_t

    y_batch = fnn.forward(x_batch) 
    loss, _ = loss_layer.forward(t_batch,y_batch)
    loss = loss/len(x_batch)

    acc = accuracy(y_batch,t_batch)

    return (loss,acc)

def backward(fnn, loss_layer) :
    dY = loss_layer.backward(1)
    fnn.backward(dY)

In [16]:
for epoch in range(epoch_tnum) :
    
    for j in range(int(iter_tnum)) :
        
        iter_n = batch_size*j
        x_batch = x_train[iter_n:(iter_n+batch_size)] 
        t_batch = y_train_oh[iter_n:(iter_n+batch_size)]
        
        loss, _ = forward(fnn, loss_layer, x_batch, t_batch)
        
        backward(fnn, loss_layer)
        
        for layer in fnn.p_.keys() :
            layer_ = fnn.p_[layer]
            p_ = layer_.p_ 
        
            for w in p_.keys() :
                dw = layer_.dp_[w]
                p_[w] -= alpha*dw

    
    train_loss,train_acc = forward(fnn,loss_layer,x_train,y_train_oh)
    validate_loss,validate_acc = forward(fnn,loss_layer,x_val,y_val_oh)
    
    print('*** %2dth epoch'%epoch)
    print('train set -> loss: %f, acc: %5.2f%%'%(train_loss,train_acc))
    print('validation set -> loss: %f, acc: %5.2f%%'%(validate_loss,validate_acc))
        
    #if np.mean(pre_validate_loss) < validate_loss :
    #    break

    #pre_validate_loss[:-1] = pre_validate_loss[1:]
    #pre_validate_loss[-1] = validate_loss


***  0th epoch
train set -> loss: 0.629320, acc: 72.99%
validation set -> loss: 0.706350, acc: 66.26%
***  1th epoch
train set -> loss: 0.593873, acc: 75.13%
validation set -> loss: 0.664343, acc: 69.51%
***  2th epoch
train set -> loss: 0.569120, acc: 75.94%
validation set -> loss: 0.636578, acc: 71.14%
***  3th epoch
train set -> loss: 0.550342, acc: 76.96%
validation set -> loss: 0.616621, acc: 71.95%
***  4th epoch
train set -> loss: 0.535476, acc: 78.19%
validation set -> loss: 0.601654, acc: 72.76%
***  5th epoch
train set -> loss: 0.523079, acc: 79.00%
validation set -> loss: 0.589907, acc: 73.98%
***  6th epoch
train set -> loss: 0.512147, acc: 79.20%
validation set -> loss: 0.579874, acc: 74.39%
***  7th epoch
train set -> loss: 0.502560, acc: 79.20%
validation set -> loss: 0.571044, acc: 75.20%
***  8th epoch
train set -> loss: 0.494095, acc: 79.41%
validation set -> loss: 0.563748, acc: 76.02%
***  9th epoch
train set -> loss: 0.486481, acc: 79.92%
validation set -> loss: 0.

In [17]:
test_loss,test_acc = forward(fnn,loss_layer,x_test,y_test_oh)
print('test set -> loss: %f, acc: %5.2f%%'%(test_loss,test_acc))

test set -> loss: 0.525676, acc: 74.02%


In [18]:
for i in range(50, 58) :
    
    x = x_test[i].reshape([1,-1])
    t = y_test_oh[i]

    z = fnn.forward(x)
    _,y = loss_layer.forward(t,z)

    t = np.argmax(t)
    y = np.argmax(y)

    print('answer: %d, prediction: %d'%(t,y))

answer: 2, prediction: 2
answer: 1, prediction: 0
answer: 1, prediction: 1
answer: 1, prediction: 1
answer: 2, prediction: 2
answer: 2, prediction: 2
answer: 0, prediction: 2
answer: 2, prediction: 2


In [19]:
for epoch in range(1) : 
    for j in range(1) :
        iter_n = batch_size*j
        x_batch = x_train[iter_n:(iter_n+batch_size)] 
        t_batch = y_train_oh[iter_n:(iter_n+batch_size)]

        loss,_ = forward(fnn,loss_layer,x_batch,t_batch)
        backward(fnn,loss_layer)

        def target(w) :
            x_batch_ = np.array(x_batch)
            x_batch_ = x_batch_.reshape([len(x_batch_),-1])
            y_batch = fnn.forward(x_batch_)
            loss_val,_ = loss_layer.forward(t_batch,y_batch)
        
            return loss_val

        for layer in fnn.p_.keys() :
            layer_ = fnn.p_[layer]
            p_ = layer_.p_ 

            for w in p_.keys() :
                dw = layer_.dp_[w]
                
                dw_app = numerical_gradient(target,p_[w])
                tmp = np.abs( dw - dw_app )
                print(w) 
                print(p_[w]); print(p_[w].shape);
                print('diff')
                print(dw); print(dw.shape); 
                print('diff_app')
                print(dw_app); print(dw_app.shape)
                print('error')
                print(np.mean(tmp))
                
                #p_[w] -= alpha*dw

W
[[-1.14286183  0.47759247  0.47892181 -0.11407374  0.13394792 -0.96848702
   0.01158519 -0.53124578 -0.34806247  1.26316137 -0.57969254  0.36869112
   0.3540832  -0.32359511 -0.21321298 -0.55950046 -1.04224788  0.1404621
   1.14630409 -0.01752242  0.04685329  0.18829884  0.3298602   0.23483144
  -0.32889477 -1.01635779 -0.4318072  -0.414882   -0.07916116  1.45498178
  -1.42322893  0.20651289 -0.27815457  0.14238042 -1.10723537  0.01001169
   0.72933776 -0.3139411  -0.77465671  0.4292563   0.42965672  0.59181088
   0.39234634 -1.02024976  0.9623399   0.3454533  -0.16173625  0.33619844
  -0.058863   -0.05621226  0.20105221  1.15947972 -0.27586188  0.78582953
  -0.01766978  0.79801928  0.36085131 -0.98592974  0.60913166 -0.16830428
  -0.0566107   0.51938761 -1.54080924  0.84327492]
 [-1.1055245   0.39855328 -1.32365481  0.00762268 -0.70097784 -0.68644126
  -0.24839226  0.47432676 -0.47823091 -0.30520717  0.19615425  0.57111237
  -0.49616314  0.11145063  0.97728917  0.18617801  0.1024807

W
[[ 0.04711017  0.13844636  0.10437315 ...  0.04151415  0.07220098
   0.3020839 ]
 [ 0.16272305  0.04626466  0.02998346 ... -0.15593835  0.23449851
   0.15869633]
 [-0.03487639 -0.38647396 -0.06591295 ... -0.11888907  0.05664081
  -0.09267381]
 ...
 [ 0.32380254  0.39318719 -0.21049839 ...  0.1040542   0.01637583
  -0.24716519]
 [ 0.06530581 -0.03157949  0.17238253 ...  0.05687277  0.10312576
  -0.01195861]
 [-0.17438764 -0.16905879  0.1373826  ... -0.19381999 -0.47388617
  -0.01580177]]
(64, 256)
diff
[[ 0.03046881 -0.08404363  0.05672207 ...  0.00906107  0.00377781
   0.00350961]
 [ 0.00246203  0.04486659 -0.069783   ...  0.05069412  0.02080836
   0.00312758]
 [ 0.14022693 -0.02443852  0.01117303 ...  0.08942886 -0.01407325
   0.00056559]
 ...
 [ 0.08720878 -0.23154723  0.08409016 ...  0.03252919  0.06728105
   0.00426753]
 [-0.08091674  0.31932445 -0.12393659 ...  0.15039323  0.01161424
   0.0079311 ]
 [-0.06560188  0.00133013 -0.00126692 ...  0.06527448 -0.03000989
   0.0129747 ]]

 [ 9.72931843e-01 -6.52676921e-01 -3.20272385e-01]]
(256, 3)
diff_app
[[ 8.55461328e-02 -7.84526271e-02 -7.06827078e-03]
 [-2.85297214e-01  5.71418985e-01 -2.86022783e-01]
 [-1.43399796e-01  1.35772248e-01  7.64944437e-03]
 [ 6.04693735e-02  2.42066743e-01 -3.02539901e-01]
 [ 3.51649282e-01 -3.20052413e-01 -3.16106127e-02]
 [ 4.99886896e-01 -4.56826155e-01 -4.30634434e-02]
 [-1.16361442e-01  3.04786757e-01 -1.88412919e-01]
 [ 3.11329154e-01  2.94660706e-01 -6.05998905e-01]
 [-4.45184411e-01  4.44856737e-01  3.26785790e-04]
 [ 5.99517122e-01 -3.02148532e-01 -2.97371486e-01]
 [ 8.00172785e-01 -1.96113757e-01 -6.04073099e-01]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 4.53953872e-01 -4.50910260e-01 -3.04704900e-03]
 [ 3.06379032e-01  1.22206916e-01 -4.28592933e-01]
 [ 4.85653061e-02 -3.81492486e-02 -1.04175927e-02]
 [-3.10022550e-02  3.01325093e-02  8.68430203e-04]
 [ 1.24973455e-01 -1.22566532e-01 -2.40614088e-03]
 [-1.26155501e+00  1.40739606e+00 -1.45771514e-01]
 [ 1.2644521