In [1]:
import numpy as np
import matplotlib.pyplot as plt
import h5py
import scipy
from PIL import Image
from scipy import ndimage
from lr_utils import load_dataset

In [5]:
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()
# 训练数据集y是列向量


(209, 64, 64, 3)
(209,)
(1, 209)


In [11]:
# 将训练集初始化标准化
# 图片情况，num_px num_px 3
#num_px = train_set_x_orig[1]  # 获取图片大小
#rgb = train_set_x_orig[3]     # 获取图片颜色深度
train_set_x = train_set_x_orig.reshape(train_set_x_orig.shape[0],-1).T  #用reshape将数据集初始化成每一张图片，占据一列，一共209列，T的作用是转置
test_set_x  = test_set_x_orig.reshape(test_set_x_orig.shape[0],-1).T # 同样的方法初始化测试集输入
print("训练集的X.shape(维度)为" + str(train_set_x.shape))
print("测试集的X.shape(维度)为" + str(test_set_x.shape))

# 处理数据集的原理是：将数据集平均分配到209行，也就是209长图片，于是按照行的原则，平均分配，正好每一行是一个图片的所有数据。再用T进行转置
# 最终变成列向量

训练集的X.shape(维度)为(12288, 209)
测试集的X.shape(维度)为(12288, 50)


# 接下来就是对训练集进行标准化
对于图片而言，一般以255作为标准化的参数。机器学习中一个常见的预处理步骤是对数据集进行集中和标准化，这意味着从每个示例中减去整个numpy数组的平均值，然后用整个numpy数组的标准差除以每个示例。但是对于图像数据集来说，将数据集的每一行除以255(像素通道的最大值)会更简单、更方便，而且效果几乎一样。

In [13]:
train_set_x = train_set_x / 255
test_set_x = test_set_x /255

# 对数据进行完预处理后，开始创建自己的模型
+ 首先，需要完成对A，一次cost的计算以及总cost的计算
+ 其次，需要完成正向传播的w和b的计算
+ 最后，需要完成反向传播的dw和db的计算，以计算梯度下降的比率

#### 计算A，需要完成的是
$$z^{(i)} = w^T x^{(i)} + b \tag{1}$$
$$\hat{y}^{(i)} = a^{(i)} = sigmoid(z^{(i)})\tag{2}$$
#### 计算cost，需要计算的是
$$ \mathcal{L}(a^{(i)}, y^{(i)}) =  - y^{(i)}  \log(a^{(i)}) - (1-y^{(i)} )  \log(1-a^{(i)})\tag{3}$$
#### 计算总平均cost，需要计算的是
$$ J = \frac{1}{m} \sum_{i=1}^m \mathcal{L}(a^{(i)}, y^{(i)})\tag{6}$$

首先需要实现sigmoid()函数，是一个单调递增函数，在0的位置取得0.5的值，取值范围在0-1之间

In [17]:
# z是一个标量
def sigmoid(z):
    return 1/(1+np.exp(-z))# 求e次方

In [19]:
print(sigmoid(0))

0.5


In [25]:

# GRADED FUNCTION: initialize_with_zeros

def initialize_with_zeros(dim):
    """
    This function creates a vector of zeros of shape (dim, 1) for w and initializes b to 0.
    
    Argument:
    dim -- size of the w vector we want (or number of parameters in this case)
    
    Returns:
    w -- initialized vector of shape (dim, 1)
    b -- initialized scalar (corresponds to the bias)
    """
    
    ### START CODE HERE ### (≈ 1 line of code)
    w = np.zeros(shape=(dim, 1))
    b = 0
    ### END CODE HERE ###

    assert(w.shape == (dim, 1))
    assert(isinstance(b, float) or isinstance(b, int))
    
    return w, b

由逻辑回归的公式可知，计算的W是一个列向量，其转置是一个行向量。为了保证乘积是一个数值，于是就有其行向量的shape是(12288, 1)

W.shape : (12288,1)

b: 实数

In [21]:
def propagate(w, b, X, Y):
    # 正向计算cost
    m = X.shape[1] # 获取数据集的大小
    A = sigmoid(np.dot(w.T,X) + b)
    # cost = -Y*np.log(A)-(1-Y)*np.log(1-A)   这是错误的计算方法，因为此时的A，X中都包含了数据集的所有结果，所以，这样运行得到的，是整个
    # cost的值，而不是单个的，所以，对于整合cost的计算，要按照下面的公式
    cost = -(1/m)*np.sum(Y*np.log(A)+ (1-Y)*np.log(1-A))
    
    # 反向计算梯度
    dw = (1 / m) * np.dot(X, (A - Y).T)
    db = (1 / m) * np.sum(A - Y)
    
    #整合梯度函数
    grads = {
        "dw":dw,
        "db":db
    }
    cost = np.squeeze(cost)
    
    return cost,grads

正向计算完cost和反向计算完梯度，需要按照梯度，来计算新的w和b

In [22]:
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):
    """
    This function optimizes w and b by running a gradient descent algorithm
    
    Arguments:
    w -- weights, a numpy array of size (num_px * num_px * 3, 1)
    b -- bias, a scalar
    X -- data of shape (num_px * num_px * 3, number of examples)
    Y -- true "label" vector (containing 0 if non-cat, 1 if cat), of shape (1, number of examples)
    num_iterations -- number of iterations of the optimization loop
    learning_rate -- learning rate of the gradient descent update rule
    print_cost -- True to print the loss every 100 steps
    
    Returns:
    params -- dictionary containing the weights w and bias b
    grads -- dictionary containing the gradients of the weights and bias with respect to the cost function
    costs -- list of all the costs computed during the optimization, this will be used to plot the learning curve.
    
    Tips:
    You basically need to write down two steps and iterate through them:
        1) Calculate the cost and the gradient for the current parameters. Use propagate().
        2) Update the parameters using gradient descent rule for w and b.
    """
    costs = []
    for i in range(num_iterations):
        cost,grads = propagate(w,b,X,Y)
        dw = grads["dw"]
        db = grads["db"]
        
        # 计算新的wb
        w = w - learning_rate*dw
        b = b - learning_rate*db
        
        # 每一百次记录一下cost
        if i % 100 == 0:
            costs.append(cost)
        
        # 每一百输出一次
        if print_cost and i % 100 == 0:
            print ("Cost after iteration %i: %f" % (i, cost))
    params = {
        "w":w,
        "b":b
    }
    
    grads = {
        "dw":dw,
        "db":db
    }
    return params,grads,costs

计算完了相应的数据后，现在可以开始预测

In [24]:
def predict(w, b, X):
    '''
    Predict whether the label is 0 or 1 using learned logistic regression parameters (w, b)
    
    Arguments:
    w -- weights, a numpy array of size (num_px * num_px * 3, 1)
    b -- bias, a scalar
    X -- data of size (num_px * num_px * 3, number of examples)
    
    Returns:
    Y_prediction -- a numpy array (vector) containing all predictions (0/1) for the examples in X
    '''
    m = X.shape[1] # 获取测试集维度
    Y_prediction = np.zeros(1,m) # 格式化
    w = w.reshape(X.shape(0),1) # 格式化
    A = sigmoid(np.dot(w.T,X)+b)
    for i in range(A.shape[1]):
        Y_prediction[0,i] = 1 if A[0,i]>0.5 else 0
    return Y_prediction

所有的需要用到的函数都创建完毕了，现在就需要建立我们的模型了

In [27]:
def model(X_train, Y_train, X_test, Y_test, num_iterations=2000, learning_rate=0.5, print_cost=False):
    """
    Builds the logistic regression model by calling the function you've implemented previously
    
    Arguments:
    X_train -- training set represented by a numpy array of shape (num_px * num_px * 3, m_train)
    Y_train -- training labels represented by a numpy array (vector) of shape (1, m_train)
    X_test -- test set represented by a numpy array of shape (num_px * num_px * 3, m_test)
    Y_test -- test labels represented by a numpy array (vector) of shape (1, m_test)
    num_iterations -- hyperparameter representing the number of iterations to optimize the parameters
    learning_rate -- hyperparameter representing the learning rate used in the update rule of optimize()
    print_cost -- Set to true to print the cost every 100 iterations
    
    Returns:
    d -- dictionary containing information about the model.
    """
    """
    函数列表
    sigmoid(z)    # 预测函数
    initialize_with_zeros(dim) # 初始化wb,dim表示维度
    propagate(w,b,X,Y) # 计算梯度以及花费
    optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False) # 训练模型，计算新的w，新的b，以及cost，以及梯度
    predict(w,b,X)   # 对于给定的w,b，X预测结果
    """
    
    # 初始化w，b
    w, b = initialize_with_zeros(X_train.shape[0])
    params,grads,costs = optimize(w,b,X_train,Y_train,num_iterations,learning_rate,print_cost)
    w = params["w"]
    b = params["b"]
    Y_prediction_test = predict(w, b, X_test)
    Y_prediction_train = predict(w, b, X_train)
    
    print("训练的准确度: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100)) # mean函数求的是均值
    print("测试集的准确度: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100)) # abs函数求的是绝对值
    d = {"costs": costs,
     "Y_prediction_test": Y_prediction_test, 
     "Y_prediction_train" : Y_prediction_train, 
     "w" : w, 
     "b" : b,
     "learning_rate" : learning_rate,
     "num_iterations": num_iterations}

    return d

In [28]:
d = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 2000, learning_rate = 0.005, print_cost = True)

Cost after iteration 0: 0.693147
Cost after iteration 100: 0.584508
Cost after iteration 200: 0.466949
Cost after iteration 300: 0.376007
Cost after iteration 400: 0.331463
Cost after iteration 500: 0.303273
Cost after iteration 600: 0.279880
Cost after iteration 700: 0.260042
Cost after iteration 800: 0.242941
Cost after iteration 900: 0.228004
Cost after iteration 1000: 0.214820
Cost after iteration 1100: 0.203078
Cost after iteration 1200: 0.192544
Cost after iteration 1300: 0.183033
Cost after iteration 1400: 0.174399
Cost after iteration 1500: 0.166521
Cost after iteration 1600: 0.159305
Cost after iteration 1700: 0.152667
Cost after iteration 1800: 0.146542
Cost after iteration 1900: 0.140872


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()