# 监督学习

与或非在数学中的表示：
- 与： A^B
- 或： AvB
- 非： ¬A

## 神经网络迷你项目
### 创建一个感知器（perceptron）
参考文章：http://blog.csdn.net/Dream_angel_Z/article/details/48915561

In [None]:
# ----------
# 
# In this exercise, you will add in code that decides whether a perceptron will fire based
# on the threshold. Your code will go in lines 32 and 34. 
#
# ----------
import numpy as np

class Perceptron:
    """
    This class models an artificial neuron with step activation function.
    """
    def __init__(self, weights = np.array([1]), threshold = 0):
        """
        Initialize weights and threshold based on input arguments. Note that no
        type-checking is being performed here for simplicity.
        """
        self.weights = weights
        self.threshold = threshold
    
    def activate(self,inputs):
        """
        Takes in @param inputs, a list of numbers equal to length of weights.
        @return the output of a threshold perceptron with given inputs based on
        perceptron weights and threshold.
        """ 

        # The strength with which the perceptron fires.
        # 求加权之久的和
        strength = np.dot(self.weights, inputs)
        print 'strength', strength

        # TODO: return 0 or 1 based on the threshold
        # 小于阈值（Threshold）为 0，大于阈值（Threshold）为 1
        if strength <= self.threshold :
            self.result = 0 # TODO
        else:
            self.result = 1 # TODO    
        return self.result


def test():
    """
    A few tests to make sure that the perceptron class performs as expected.
    Nothing should show up in the output if all the assertions pass.
    """
    p1 = Perceptron(np.array([1, 2]), 0.)
    assert p1.activate(np.array([ 1,-1])) == 0 # < threshold --> 0
    assert p1.activate(np.array([-1, 1])) == 1 # > threshold --> 1
    assert p1.activate(np.array([ 2,-1])) == 0 # on threshold --> 0

if __name__ == "__main__":
    test()

### 感知更新规则

In [10]:
# ----------
#
# In this exercise, you will update the perceptron class so that it can update
# its weights.
#
# Finish writing the update() method so that it updates the weights according
# to the perceptron update rule. Updates should be performed online, revising
# the weights after each data point.
#
# YOUR CODE WILL GO IN LINES 51 AND 59.
# ----------

import numpy as np

class Perceptron:
    """
    This class models an artificial neuron with step activation function.
    """
    def __init__(self, weights = np.array([1]), threshold = 0):
        """
        Initialize weights and threshold based on input arguments. Note that no
        type-checking is being performed here for simplicity.
        """
        self.weights = weights.astype(float) 
        self.threshold = threshold


    def activate(self, values):
        """
        Takes in @param values, a list of numbers equal to length of weights.
        @return the output of a threshold perceptron with given inputs based on
        perceptron weights and threshold.
        """
        # First calculate the strength with which the perceptron fires
        strength = np.dot(values,self.weights)
        # Then return 0 or 1 depending on strength compared to threshold  
        # 小于阈值（Threshold）为 0，大于阈值（Threshold）为 1
        return int(strength > self.threshold)


    def update(self, values, train, eta=.1):
        """
        Takes in a 2D array @param values consisting of a LIST of inputs and a
        1D array @param train, consisting of a corresponding list of expected
        outputs. Updates internal weights according to the perceptron training
        rule using these values and an optional learning rate, @param eta.
        """

        # For each data point:
        for data_point in xrange(len(values)):
            # TODO: Obtain the neuron's prediction for the data_point --> values[data_point]
            prediction = self.activate(values[data_point])# TODO
            print 'prediction value：',prediction
            # Get the prediction accuracy calculated as (expected value - predicted value)
            # expected value = train[data_point], predicted value = prediction
            error = train[data_point] - prediction
            # TODO: update self.weights based on the multiplication of:
            # - prediction accuracy(error)
            # - learning rate(eta)
            # - input value(values[data_point])
            weight_update = error * eta * values[data_point]# TODO
            print 'weight update：', weight_update
            self.weights += weight_update
        print 'final weight value：', self.weights

def test():
    """
    A few tests to make sure that the perceptron class performs as expected.
    Nothing should show up in the output if all the assertions pass.
    """
    def sum_almost_equal(array1, array2, tol = 1e-6):
        return sum(abs(array1 - array2)) < tol

    p1 = Perceptron(np.array([1,1,1]),0)
    p1.update(np.array([[2,0,-3]]), np.array([1]))
    assert sum_almost_equal(p1.weights, np.array([1.2, 1, 0.7]))

    p2 = Perceptron(np.array([1,2,3]),0)
    p2.update(np.array([[3,2,1],[4,0,-1]]),np.array([0,0]))
    assert sum_almost_equal(p2.weights, np.array([0.7, 1.8, 2.9]))

    p3 = Perceptron(np.array([3,0,2]),0)
    p3.update(np.array([[2,-2,4],[-1,-3,2],[0,2,1]]),np.array([0,1,0]))
    assert sum_almost_equal(p3.weights, np.array([2.7, -0.3, 1.7]))

if __name__ == "__main__":
    test()

prediction value： 0
weight update： [ 0.2  0.  -0.3]
final weight value： [ 1.2  1.   0.7]
prediction value： 1
weight update： [-0.3 -0.2 -0.1]
prediction value： 0
weight update： [ 0.  0. -0.]
final weight value： [ 0.7  1.8  2.9]
prediction value： 1
weight update： [-0.2  0.2 -0.4]
prediction value： 0
weight update： [-0.1 -0.3  0.2]
prediction value： 1
weight update： [-0.  -0.2 -0.1]
final weight value： [ 2.7 -0.3  1.7]


In [11]:
print(np.dot([1,2,3],[1,1,-5]))
print(np.dot([1,2,3],[3,-4,2]))

-12
1


In [12]:
print(np.dot([-12,1],[2,-1]))

-25


### 创建 XOR 网络
关于 XOR 的资料可以参考：
- [WHY  DO  NEURONS  MAKE  NETWORKS](http://toritris.weebly.com/perceptron-5-xor-how--why-neurons-work-together.html)

A xor B = (AvB)^¬(A^B)

在画直线进行切分时，斜率以及截距可以按照如下方式计算：![img](http://wx4.sinaimg.cn/large/69d4185bly1fnqcizkr2kj208y07mdfv.jpg)

In [3]:
# ----------
#
# In this exercise, you will create a network of perceptrons that can represent
# the XOR function, using a network structure like those shown in the previous
# quizzes.
#
# You will need to do two things:
# First, create a network of perceptrons with the correct weights
# Second, define a procedure EvalNetwork() which takes in a list of inputs and
# outputs the value of this network.
#
# ----------

import numpy as np

class Perceptron:
    """
    This class models an artificial neuron with step activation function.
    """

    def __init__(self, weights = np.array([1]), threshold = 0):
        """
        Initialize weights and threshold based on input arguments. Note that no
        type-checking is being performed here for simplicity.
        """
        self.weights = weights
        self.threshold = threshold


    def activate(self, values):
        """
        Takes in @param values, a list of numbers equal to length of weights.
        @return the output of a threshold perceptron with given inputs based on
        perceptron weights and threshold.
        """
               
        # First calculate the strength with which the perceptron fires
        strength = np.dot(values,self.weights)
        
        # Then return 0 or 1 depending on strength compared to threshold  
        return int(strength > self.threshold)

            
# Part 1: Set up the perceptron network
Network = [
    # input layer, declare input layer perceptrons here
    [ Perceptron([0.6, 0.6], 1) , Perceptron([1.1, 1.1], 1) ], \
    # output node, declare output layer perceptron here
    [ Perceptron([-2, 1.1], 1) ]
]

# Part 2: Define a procedure to compute the output of the network, given inputs
def EvalNetwork(inputValues, Network):
    """
    Takes in @param inputValues, a list of input values, and @param Network
    that specifies a perceptron network. @return the output of the Network for
    the given set of inputs.
    """
    
    # YOUR CODE HERE
    # 计算第一层
    first_layer = Network[0]
    first_per1 = first_layer[0]
    first_per2 = first_layer[1]
    first_layer_value1 = (1 if np.dot(inputValues, first_per1.weights) > first_per1.threshold else 0)
    first_layer_value2 = (1 if np.dot(inputValues, first_per2.weights) > first_per2.threshold else 0)
    
    # 用于存储第二层输入的值（从第一层计算得来）
    second_input = [first_layer_value1, first_layer_value2]
    
    # 计算第二层
    second_layer = Network[1]
    second_per = second_layer[0]
    
    OutputValue = (1 if np.dot(second_input, second_per.weights) > second_per.threshold else 0)
    
    # Be sure your output value is a single number
    return OutputValue


def test():
    """
    A few tests to make sure that the perceptron class performs as expected.
    """
    print "0 XOR 0 = 0?:", EvalNetwork(np.array([0,0]), Network)
    print "0 XOR 1 = 1?:", EvalNetwork(np.array([0,1]), Network)
    print "1 XOR 0 = 1?:", EvalNetwork(np.array([1,0]), Network)
    print "1 XOR 1 = 0?:", EvalNetwork(np.array([1,1]), Network)

if __name__ == "__main__":
    test()

0 XOR 0 = 0?: 0
0 XOR 1 = 1?: 1
1 XOR 0 = 1?: 1
1 XOR 1 = 0?: 0


### 激活函数沙盒

In [None]:
# ----------
# 
# Python Neural Networks code originally by Szabo Roland and used with
# permission
#
# Modifications, comments, and exercise breakdowns by Mitchell Owen,
# (c) Udacity
#
# Retrieved originally from http://rolisz.ro/2013/04/18/neural-networks-in-python/
#
#
# Neural Network Sandbox
#
# Define an activation function activate(), which takes in a number and
# returns a number.
# Using test run you can see the performance of a neural network running with
# that activation function, where the inputs are 8x8 images of digits (0-9) and
# the outputs are digit predictions made by the network.
#
# ----------

import numpy as np


def activate(strength):
    # Try out different functions here. Input strength will be a number, with
    # another number as output.
    return np.power(strength,2)
    np.sign()
def activation_derivative(activate, strength):
    #numerically approximate
    return (activate(strength+1e-5)-activate(strength-1e-5))/(2e-5)

运行代码可得到如下结果：
```
Random snapshot from epoch 201:
Our weights [[ -9.15412386e+16  -3.17340236e+09  -6.52835751e+10  -8.44266623e+10
   -9.87309840e+07  -7.23846086e+08  -2.22877650e+09  -4.51944048e+05
   -1.46888966e+06  -2.41556726e+08]
 [ -4.22017886e+16  -1.46298277e+09  -3.00966392e+10  -3.89218696e+10
   -4.55163616e+07  -3.33703148e+08  -1.02749707e+09  -2.08352617e+05
   -6.77178688e+05  -1.11361022e+08]
 [ -1.73857613e+14  -6.02701206e+06  -1.23988343e+08  -1.60345416e+08
   -1.87512511e+05  -1.37474828e+06  -4.23295317e+06  -8.58415825e+02
   -2.78982301e+03  -4.58771183e+05]
 [ -3.19174104e+15  -1.10646073e+08  -2.27622292e+09  -2.94367923e+09
   -3.44242349e+06  -2.52381252e+07  -7.77100847e+07  -1.57576991e+04
   -5.12154032e+04  -8.42228633e+06]
 [ -2.79121062e+14  -9.67611388e+06  -1.99058054e+08  -2.57427800e+08
   -3.01043350e+05  -2.20710009e+06  -6.79582737e+06  -1.37803435e+03
   -4.47892975e+03  -7.36537457e+05]
 [ -1.02696000e+14  -3.56009745e+06  -7.32387082e+07  -9.47144770e+07
   -1.10761869e+05  -8.12050123e+05  -2.50036413e+06  -5.07175025e+02
   -1.64796543e+03  -2.70991825e+05]
 [ -1.09952001e+18  -3.81163665e+10  -7.84133996e+11  -1.01406542e+12
   -1.18587747e+09  -8.69425921e+09  -2.67702775e+10  -5.42839053e+06
   -1.76431264e+07  -2.90138585e+09]
 [ -6.63976933e+14  -2.30176692e+07  -4.73521973e+08  -6.12372709e+08
   -7.16126474e+05  -5.25027961e+06  -1.61660054e+07  -3.27807808e+03
   -1.06541895e+04  -1.75208567e+06]
 [ -1.88069903e+18  -6.51970066e+10  -1.34123984e+12  -1.73453128e+12
   -2.02841111e+09  -1.48712936e+10  -4.57898305e+10  -9.28511459e+06
   -3.01780871e+07  -4.96274147e+09]
 [ -1.47496930e+15  -5.11318302e+07  -1.05188951e+09  -1.36033483e+09
   -1.59081495e+06  -1.16630579e+07  -3.59114316e+07  -7.28177579e+03
   -2.36676408e+04  -3.89211187e+06]
 [ -7.79936521e+15  -2.70375673e+08  -5.56219745e+09  -7.19319929e+09
   -8.41193539e+06  -6.16720950e+07  -1.89893016e+08  -3.85056572e+04
   -1.25150242e+05  -2.05807696e+07]
 [ -4.09342625e+18  -1.41904225e+11  -2.91926900e+12  -3.77528555e+12
   -4.41492825e+09  -3.23680410e+10  -9.96636310e+10  -2.02094702e+07
   -6.56839679e+07  -1.08016306e+10]
 [ -1.68293077e+18  -5.83410991e+10  -1.20019937e+12  -1.55213355e+12
   -1.81510992e+09  -1.33074762e+10  -4.09747192e+10  -8.30872159e+06
   -2.70046566e+07  -4.44087554e+09]
 [ -9.82386268e+16  -3.40557649e+09  -7.00598863e+10  -9.06035301e+10
   -1.05954392e+08  -7.76804494e+08  -2.39183942e+09  -4.85009743e+05
   -1.57635722e+06  -2.59229626e+08]
 [ -3.05815878e+15  -1.06015261e+08  -2.18095736e+09  -2.82047897e+09
   -3.29834962e+06  -2.41818475e+07  -7.44577254e+07  -1.50982674e+04
   -4.90719651e+04  -8.06979294e+06]
 [ -1.08049513e+15  -3.74568428e+07  -7.70566207e+08  -9.96519153e+08
   -1.16535842e+06  -8.54382317e+06  -2.63070743e+07  -5.33423413e+03
   -1.73378516e+04  -2.85118335e+06]]

 are being modified with deltas [[-0. -0. -0. -0. -0. -0. -0. -0. -0. -0.]]

 using the results matrix [[  9.73359541e+37   3.79693929e+36   4.15680126e+30   1.23646194e+32
    4.00387542e+31   2.72682070e+29   7.20902374e+39   1.26295109e+31
    6.53768438e+40   2.46017869e+31   1.21468016e+34   3.87694376e+41
    3.19507500e+40   1.82431892e+37   3.73760357e+33   7.76605073e+31]]

Confusion matrix: rows indicate true labels, columns indicate predictions.
[[54  0  0  0  0  0  0  0  0  0]
 [41  0  0  0  0  0  0  0  0  0]
 [44  0  0  0  0  0  0  0  0  0]
 [43  0  0  0  0  0  0  0  0  0]
 [44  0  0  0  0  0  0  0  0  0]
 [43  0  0  0  0  0  0  0  0  0]
 [47  0  0  0  0  0  0  0  0  0]
 [42  0  0  0  0  0  0  0  0  0]
 [45  0  0  0  0  0  0  0  0  0]
 [47  0  0  0  0  0  0  0  0  0]]

Classification report for above confusion matrix:
             precision    recall  f1-score   support

          0       0.12      1.00      0.21        54
          1       0.00      0.00      0.00        41
          2       0.00      0.00      0.00        44
          3       0.00      0.00      0.00        43
          4       0.00      0.00      0.00        44
          5       0.00      0.00      0.00        43
          6       0.00      0.00      0.00        47
          7       0.00      0.00      0.00        42
          8       0.00      0.00      0.00        45
          9       0.00      0.00      0.00        47

avg / total       0.01      0.12      0.03       450
```

### Sigmoid 编程练习

In [9]:
# ----------
# 
# As with the previous perceptron exercises, you will complete some of the core
# methods of a sigmoid unit class.
#
# There are two functions for you to finish:
# First, in activate(), write the sigmoid activation function.
# Second, in update(), write the gradient descent update rule. Updates should be
#   performed online, revising the weights after each data point.
# 
# ----------

import numpy as np


class Sigmoid:
    """
    This class models an artificial neuron with sigmoid activation function.
    """

    def __init__(self, weights = np.array([1])):
        """
        Initialize weights based on input arguments. Note that no type-checking
        is being performed here for simplicity of code.
        """
        self.weights = weights

        # NOTE: You do not need to worry about these two attribues for this
        # programming quiz, but these will be useful for if you want to create
        # a network out of these sigmoid units!
        self.last_input = 0 # strength of last input
        self.delta      = 0 # error signal

    def activate(self, values):
        """
        Takes in @param values, a list of numbers equal to length of weights.
        @return the output of a sigmoid unit with given inputs based on unit
        weights.
        """
        
        # YOUR CODE HERE
        
        # First calculate the strength of the input signal.
        strength = np.dot(values, self.weights)
        self.last_input = strength
        
        # TODO: Modify strength using the sigmoid activation function and
        # return as output signal.
        # HINT: You may want to create a helper function to compute the
        #   logistic function since you will need it for the update function.
        result = 1.0/(1 + np.exp(-strength))
#         print values, self.weights, strength, result
        return result
    
    def logistic(self, strength):
        return 1.0/(1 + np.exp(-strength))
        
    def update(self, values, train, eta=.1):
        """
        Takes in a 2D array @param values consisting of a LIST of inputs and a
        1D array @param train, consisting of a corresponding list of expected
        outputs. Updates internal weights according to gradient descent using
        these values and an optional learning rate, @param eta.
        """
        print values, train
        # TODO: for each data point...
        for X, y_true in zip(values, train):
            # obtain the output signal for that point
            y_pred = self.activate(X)

            # YOUR CODE HERE

            # TODO: compute derivative of logistic function at input strength
            # Recall: d/dx logistic(x) = logistic(x)*(1-logistic(x))
            dx = self.logistic(self.last_input) * (1 - self.logistic(self.last_input))
            # TODO: update self.weights based on learning rate, signal accuracy,
            # function slope (derivative) and input value
            self.weights = self.weights + eta * (y_true - y_pred) * dx * X

def test():
    """
    A few tests to make sure that the perceptron class performs as expected.
    Nothing should show up in the output if all the assertions pass.
    """
    def sum_almost_equal(array1, array2, tol = 1e-5):
        return sum(abs(array1 - array2)) < tol

    u1 = Sigmoid(weights=[3,-2,1])
    assert abs(u1.activate(np.array([1,2,3])) - 0.880797) < 1e-5
    
    u1.update(np.array([[1,2,3]]),np.array([0]))
    assert sum_almost_equal(u1.weights, np.array([2.990752, -2.018496, 0.972257]))

    u2 = Sigmoid(weights=[0,3,-1])
    u2.update(np.array([[-3,-1,2],[2,1,2]]),np.array([1,0]))
    assert sum_almost_equal(u2.weights, np.array([-0.030739, 2.984961, -1.027437]))

if __name__ == "__main__":
    test()

[[1 2 3]] [0]
[[-3 -1  2]
 [ 2  1  2]] [1 0]


### 神经网络模式识别

如下图：![img](https://ws1.sinaimg.cn/large/69d4185bly1fnqdn6khlkj20bx09ht8w.jpg)

每一层都在识别前一层所拥有的模式。
- The red spots are the low-level patterns (edges, diagonal lines, curves, etc) that are detected in the pattern of activation of the retina.  
- The green spots are the cells that pick up certain patterns of red spots.  For example; corner + curve + corner = wheel arch.
- The blue spots are the cells that pick up certain patterns in the green spots.  For example; wheel arch + door + bonnet + ....  = car.

如下图的细分图：![img](https://ws1.sinaimg.cn/large/69d4185bly1fnqdovgh8wj207u098dfu.jpg)

复杂的神经网络也是类似的处理，如下四层神经网络进行模式识别：![img](https://ws1.sinaimg.cn/large/69d4185bly1fnqeqczvquj20l40ekdh0.jpg)

## 基于实例的学习

### 几个概念

- 欧氏距离
- 曼哈顿距离


- 饥饿式学习
- 懒惰式学习

## 集成 B&B

## 朴素贝叶斯项目2

### 最大可能性假设

In [4]:
sample_memo = '''
Milt, we're gonna need to go ahead and move you downstairs into storage B. We have some new people coming in, 
and we need all the space we can get. So if you could just go ahead and pack up your stuff and move it down there, that would be terrific, OK?
Oh, and remember: next Friday... is Hawaiian shirt day. So, you know, if you want to, go ahead and wear a Hawaiian shirt and jeans.
Oh, oh, and I almost forgot. Ahh, I'm also gonna need you to go ahead and come in on Sunday, too...
Hello Peter, whats happening? Ummm, I'm gonna need you to go ahead and come in tomorrow. 
So if you could be here around 9 that would be great, mmmk... oh oh! and I almost forgot ahh, 
I'm also gonna need you to go ahead and come in on Sunday too, kay. We ahh lost some people this week and ah, 
we sorta need to play catch up.
'''

#
#   Maximum Likelihood Hypothesis
#
#
#   In this quiz we will find the maximum likelihood word based on the preceding word
#
#   Fill in the NextWordProbability procedure so that it takes in sample text and a word,
#   and returns a dictionary with keys the set of words that come after, whose values are
#   the number of times the key comes after that word.
#   
#   Just use .split() to split the sample_memo text into words separated by spaces.

def NextWordProbability(sampletext,word):
    sample = sampletext.split(" ")
    result = {}
    
    for index, item in enumerate(sample):
        if item == word:
            if sample[index + 1] not in result.keys():
                result[sample[index + 1]] = 1
            else:
                result[sample[index + 1]] += 1
    
    return result

print(NextWordProbability(sample_memo,"gonna"))
print(NextWordProbability(sample_memo,"ahead"))
print(NextWordProbability(sample_memo,"could"))
print(NextWordProbability(sample_memo,"be"))

{'need': 4}
{'and': 6}
{'be': 1, 'just': 1}
{'terrific,': 1, 'here': 1, 'great,': 1}
