# 任务说明

## 2.  非线性多分类器

鸢尾花数据集`iris.csv`含有150条记录，每条记录包含萼片长度`sepal length`、萼片宽度`sepal width`、
花瓣长度`petal length`和花瓣宽度`petal width`四个数值型特征，以及它的所属类别`class`（可能为`Iris-setosa`,`Iris-versicolor`,`Iris-virginica`三者之一）。

**任务：请利用该数据集训练出一个良好的非线性分类器。**

In [1]:
import numpy as np
import pandas as pd

In [2]:
# Read data from local path
path = 'Dataset/iris.csv'
data = pd.read_csv(path)
data.head()

Unnamed: 0,sepal length,sepal width,petal length,petal width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
# data preprocess
X = data.drop(['class'],axis=1)
X = np.array(X)
labels = data['class']
y = []
for label in labels.unique():
    y.append([int(each==label) for each in labels])
y = np.array(y).transpose()

In [4]:
# define 2 activation function
class Sigmoid(object):
    @classmethod
    def forward(self,x):
        return 1/(1+np.exp(-x))
    @classmethod
    def backward(self,y):
        return (1-y)*y

class Softmax(object):
    @classmethod
    def forward(self,x):
        return np.exp(x)/np.sum(np.exp(x))
    @classmethod
    def backward(self,y):
        return np.diag(y) - np.outer(y,y)

In [5]:
# define the model
class NonlinearClassifier(object):
    '''
    2-layer neural network multiclass-classifier. 
    
    input -> FC -> hidden layer -> FC -> class score -> softmax -> probability
    
    m: the number of input features.
    n: the number of class.
    hidden: the number of hidden layer.
    
    w1,b1: weight and bias between input and hidden layer
    w2,b2: weight and bias between hidden layer and class score
    '''
    def __init__(self, m, hidden, n):
        '''
        Randomly initialize the parameters
        '''
        self.w1 = np.random.random([m,hidden])
        self.b1 = np.random.random(hidden)
        self.w2 = np.random.random([hidden,n])
        self.b2 = np.random.random(n)
        
    def forward(self,x):
        '''
        Forward propagation through all the layers to get the prediction of probability
        '''
        z1 = x.dot(self.w1) + self.b1
        a1 = Sigmoid.forward(z1)
        z2 = a1.dot(self.w2) + self.b2
        output = Softmax.forward(z2)
        return z1,a1,z2,output
    
    def loss(self,output,y):
        '''
        Cross entropy loss for each sample
        '''
        loss = 0
        for i in range(output.shape[0]):
            loss += -y[i]*np.log(output[i])
        return loss
    
    def train(self, X, y, lr = 0.003,iteration = 1000, print_every = None):
        '''
        Use batch gradient decent for optimizer.
        dx means dloss/dx
        '''
    
        for ii in range(iteration):
            delta_w1 = 0
            delta_b1 = 0
            delta_w2 = 0
            delta_b2 = 0
            loss = 0
            
            for i in range(X.shape[0]):
                x = X[i]
                z1, a1, z2, output = self.forward(x)
                
                loss += self.loss(output,y[i])
                dz2 = output - y[i]
                dw2 = np.outer(a1, dz2)
                db2 = dz2
                da1 = self.w2.dot(dz2)
                dz1 = Sigmoid.backward(a1)*da1
                dw1 = np.outer(x,dz1)
                db1 = dz1
                
                delta_w1 += dw1
                delta_b1 += db1
                delta_w2 += dw2
                delta_b2 += db2
              
            if(print_every != None and ii%print_every == 0):
                print('loss:',loss)
                
            self.w1 -= lr*delta_w1
            self.b1 -= lr*delta_b1
            self.w2 -= lr*delta_w2
            self.b2 -= lr*delta_b2
                
    def predict(self,X):
        '''
        transform probability to the label vector
        '''
        prediction = []
        for i in range(X.shape[0]):
            x = X[i]
            _, _, _, output = self.forward(x)
            maxn = np.argmax(output)
            prediction.append([int(j==maxn) for j in range(output.shape[0])])
        return np.array(prediction)

In [6]:
def score(prediction,y):
    '''
    Use precision for score
    '''
    total = y.shape[0]
    correct = 0
    for i in range(total):
        if (y[i] == prediction[i]).all():
            correct += 1
    return correct/total

In [7]:
hidden_dim = 5
model = NonlinearClassifier(X.shape[1],hidden_dim,y.shape[1])
model.train(X,y,lr=0.001,iteration = 3000,print_every=500)

loss: 167.0489792333817
loss: 68.43315497830923
loss: 50.834311845734575
loss: 45.65566343282708
loss: 41.772677788353135
loss: 39.06841936661903


In [8]:
prediction = model.predict(X)
precision = score(prediction,y)
print(precision)

0.96
