In [4]:
import math
import numpy as np
import h5py
import matplotlib.pyplot as plt
import scipy
import pandas as pd
from PIL import Image
from scipy import ndimage
import tensorflow as tf
from tensorflow.python.framework import ops
#from cnn_utils import *

np.random.seed(1)

In [7]:
def load_train_dataset():
    """
    load data from fer2013_train.csv
    
    input: None
    output: train_set_x_orig,train_set_y_orig
    
    """
    train_dataset = pd.read_csv('fer2013_train.csv')
    num_of_instances = len(train_dataset)
    #print(num_of_instances)
    emotions = train_dataset['emotion']
    pixels = train_dataset['pixels']

    train_set_x_orig=[]
    train_set_y_orig=[]
    
    for emotion,pixel in zip(emotions,pixels):
        val = pixel.split(" ")
        pixels = np.array(val,'float32')
        train_set_x_orig.append(pixels)
        train_set_y_orig.append(emotion)

    train_set_x_orig = np.array(train_set_x_orig)
    train_set_y_orig = np.array(train_set_y_orig)
    train_set_x_orig = train_set_x_orig.reshape(-1,48,48,1)
    train_set_y_orig = train_set_y_orig.reshape(-1,1)
    #print(train_set_x_orig.shape)
    #print(train_set_y_orig.shape)
    return train_set_x_orig,train_set_y_orig

In [8]:
def convert_to_one_hot(Y, C):
    Y = np.eye(C)[Y.reshape(-1)].T
    return Y

In [9]:
def load_test_dataset():
    """
    load data from fer2013_test.csv
    
    input: None
    output: test_set_x_orig,test_set_y_orig
    
    """
    test_dataset = pd.read_csv('fer2013_test.csv')
    num_of_instances = len(test_dataset)
    #print(num_of_instances)
    emotions = test_dataset['emotion']
    pixels = test_dataset['pixels']

    test_set_x_orig=[]
    test_set_y_orig=[]

    for emotion,pixel in zip(emotions,pixels):
        val = pixel.split(" ")
        pixels = np.array(val,'float32')
        test_set_x_orig.append(pixels)
        test_set_y_orig.append(emotion)

    test_set_x_orig = np.array(test_set_x_orig)
    test_set_y_orig = np.array(test_set_y_orig)
    test_set_x_orig = test_set_x_orig.reshape(-1,48,48,1)
    test_set_y_orig = test_set_y_orig.reshape(-1,1)
    #print(test_set_x_orig.shape)
    #print(test_set_y_orig.shape)
    return test_set_x_orig,test_set_y_orig

In [10]:
X_train_orig, Y_train_orig = load_train_dataset()
X_test_orig, Y_test_orig = load_test_dataset()

In [11]:
X_train = X_train_orig/255.
X_test = X_test_orig/255.
Y_train = convert_to_one_hot(Y_train_orig, 3).T
Y_test = convert_to_one_hot(Y_test_orig, 3).T
print ("number of training examples = " + str(X_train.shape[0]))
print ("number of test examples = " + str(X_test.shape[0]))
print ("X_train shape: " + str(X_train.shape))
print ("Y_train shape: " + str(Y_train.shape))
print ("X_test shape: " + str(X_test.shape))
print ("Y_test shape: " + str(Y_test.shape))
conv_layers = {}

number of training examples = 28709
number of test examples = 7178
X_train shape: (28709, 48, 48, 1)
Y_train shape: (28709, 3)
X_test shape: (7178, 48, 48, 1)
Y_test shape: (7178, 3)


In [12]:
def create_placeholders(n_H0, n_W0, n_C0, n_y):
    """
    为session创建占位符
    
    参数：
        n_H0 - 实数，输入图像的高度
        n_W0 - 实数，输入图像的宽度
        n_C0 - 实数，输入的通道数
        n_y  - 实数，分类数
        
    输出：
        X - 输入数据的占位符，维度为[None, n_H0, n_W0, n_C0]，类型为"float"
        Y - 输入数据的标签的占位符，维度为[None, n_y]，维度为"float"
    """
    X = tf.placeholder(tf.float32,[None, n_H0, n_W0, n_C0])
    Y = tf.placeholder(tf.float32,[None, n_y])
    
    return X,Y

In [13]:
X , Y = create_placeholders(48,48,1,3)
print ("X = " + str(X))
print ("Y = " + str(Y))

X = Tensor("Placeholder:0", shape=(?, 48, 48, 1), dtype=float32)
Y = Tensor("Placeholder_1:0", shape=(?, 3), dtype=float32)


In [14]:
def init_parameters():
    """
    初始化权值矩阵，这里我们把权值矩阵硬编码：
    W1 : [4, 4, 1, 8]
    W2 : [2, 2, 8, 16]
    
    返回：
        包含了tensor类型的W1、W2的字典
    """
    tf.set_random_seed(1)
    
    W1 = tf.get_variable("W1",[3,3,1,16],initializer=tf.contrib.layers.xavier_initializer(seed=0))
    W2 = tf.get_variable("W2",[3,3,16,32],initializer=tf.contrib.layers.xavier_initializer(seed=0))
    W3 = tf.get_variable("W3",[3,3,32,64],initializer=tf.contrib.layers.xavier_initializer(seed=0))
    parameters = {"W1": W1,
                  "W2": W2,
                  "W3": W3}
    
    return parameters

In [15]:
def conv_forward(X, parameters):
    """
    实现模型的前向传播部分:
    卷积层1->最大池化层->卷积层2->最大池化层->卷积层3->最大池化->扁平化矩阵->全连接->全连接->Softmax
    
    参数:
    X -- 输入数据的占位符
    parameters -- 将W1,W2和W3打包在字典中的参数

    返回:
    layer6 -- CNN模型最后一个softmax节点的输出
    """
    
    # 从parameters中提取出三个参数 
    W1 = parameters['W1']
    W2 = parameters['W2']
    W3 = parameters['W3']
    
    # CONV2D: 卷积核W1，步数为1, 无padding填充
    layer1 = tf.nn.conv2d(X, W1, strides=[1, 1, 1, 1], padding='SAME')
    # RELU
    R1 = tf.nn.relu(layer1)
    # MAXPOOL: 2*2 窗格, 步数为2, 无padding填充
    Q1 = tf.nn.max_pool(R1, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding='SAME')
    Q1 = tf.nn.dropout(Q1,0.8)
    
    # CONV2D: 卷积核W2, 步数为1, 无padding填充
    layer2 = tf.nn.conv2d(Q1, W2, strides=[1, 1, 1, 1], padding='SAME')
    # RELU
    R2 = tf.nn.relu(layer2)
    # MAXPOOL: 2*2 窗格, 步数为2, 无padding填充
    Q2 = tf.nn.max_pool(R2, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding='SAME')
    Q2 = tf.nn.dropout(Q2,0.8)
    
    # CONV2D: 卷积核W3, 步数为1, 无padding填充
    layer3 = tf.nn.conv2d(Q2, W3, strides=[1, 1, 1, 1], padding='SAME')
    # RELU
    R3 = tf.nn.relu(layer3)
    # MAXPOOL: 2*2 窗格, 步数为2, 无padding填充
    Q3 = tf.nn.max_pool(R3, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding='SAME')
    Q3 = tf.nn.dropout(Q3,0.8)
    
    # 扁平化矩阵
    Q = tf.contrib.layers.flatten(Q3)
    # 全连接 ReLU激活函数
    layer4 = tf.contrib.layers.fully_connected(Q, 100, activation_fn=tf.nn.relu)
    layer4 = tf.nn.dropout(layer4,0.7)
    layer5 = tf.contrib.layers.fully_connected(layer4, 40, activation_fn=tf.nn.relu)
    layer5 = tf.nn.dropout(layer5,0.7)
    
    # softmax分类 无激活函数
    layer6 = tf.contrib.layers.fully_connected(layer5, 3, activation_fn=None)
    

    return layer6

In [16]:
def conv_cost(layer6,Y):
    """
    代价函数计算
    参数：
        layer6 - CNN模型中最后一层softmax输出，维度为（3，n）。
        Y - 实际标签向量的占位符，维度为（3，n）
    
    返回：
        cost - 真实结果与预测结果之间的成本
    
    """
    
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=layer6,labels=Y))
    
    return cost

In [17]:
def createbatch(X, Y, size = 64, seed = 0):
    """
    从（X,Y）创建最小批
    
    参数:
    X -- 输入数据, 数据维度为 (m, H, W, C)
    Y -- 标签向量 (0,1,2三种标签), 数据维度为 (m, n_y)
    size - 整形变量，最小批样本大小
    seed -- 随机数种子
    返回:
    mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)
    """
    m = X.shape[0]                  # X的列数
    mini_batches = []
    np.random.seed(seed)
    # 第一步: 打乱（X,Y）的顺序
    list_m = list(np.random.permutation(m))#list_m为一个长度为m的随机数组，且里面的数是0到m-1
    new_X = X[list_m,:,:,:]#每一列重新排序
    new_Y = Y[list_m,:]
    # 第二步: 分割batch
    part = math.floor(m/size) # 将训练集分割为part份
    for k in range(0, part):
        minibatch_X = new_X[size * k : k * size + size,:,:,:]
        minibatch_Y = new_Y[size * k : k * size + size,:]
        mini_batch = (minibatch_X, minibatch_Y)
        mini_batches.append(mini_batch)
    # 处理未处理完的最后一个batch
    if m % size != 0:
        minibatch_X = new_X[part * size : m,:,:,:]
        minibatch_Y = new_Y[part * size : m,:]
        mini_batch = (minibatch_X, minibatch_Y)
        mini_batches.append(mini_batch)
    return mini_batches

In [18]:
def CNN(X_train_data, Y_train_data, X_test_data, Y_test_data, l_rate=0.001, 
         nums=100,mini_batch=32,display_cost=False,display_pic=True):
    """
    使用TensorFlow实现三层的卷积神经网络
    
    卷积层1->最大池化层->卷积层2->最大池化层->卷积层3->最大池化->扁平化矩阵->全连接->全连接->Softmax
    
    参数：
        X_train_data - 训练集图片数据，尺寸大小为(28709, 64, 64, 3)
        X_test_data - 测试图片数据，尺寸大小为(28709, 64, 64, 3)
        Y_train_data - 训练集标签，尺寸大小为(7178, n_y = 3)       
        Y_test_data - 测试集标签标签，尺寸大小为(7178, n_y = 3)
        l_rate - CNN模型中的学习速率
        nums - 整个CNN模型的迭代次数
        mini_batch - 批大小
        display_cost - 是否打印代价函数，默认每100次运算显示一次
        display_pic - 是否打印代价函数图表
        
    返回：
        train_accuracy - 实数，训练集的准确度
        test_accuracy - 实数，测试集的准确度
        parameters - 学习后的参数
    """
    ops.reset_default_graph()  #重新运行CNN模型且不覆盖前面的tensorflow变量
    tf.set_random_seed(2)    #指定随机数种子
    seed = 3                 #指定numpy的随机种子
    (m , n_H0, n_W0, n_C0) = X_train_data.shape
    n_y = Y_train_data.shape[1]
    costs = []
    
    #创建占位符
    X , Y = create_placeholders(n_H0, n_W0, n_C0, n_y)
    
    #初始化参数
    parameters = init_parameters()
    
    #前向传播
    layer6 = conv_forward(X,parameters)
    
    #计算成本
    cost = conv_cost(layer6,Y)
    
    #反向传播，由于框架已经实现了反向传播，我们只需要选择一个优化器就行了
    conv_back = tf.train.AdamOptimizer(learning_rate=l_rate).minimize(cost)
    
    #全局初始化所有变量
    init = tf.global_variables_initializer()
    os.environ['CUDA_VISIBLE_DEVICES'] = "1,0"
    #开始运行
    with tf.Session() as sess:
        #初始化参数
        sess.run(init)
        #开始遍历数据集
        for epoch in range(nums):
            minibatch_cost = 0 #初始化成本为0
            num_minibatches = int(m / mini_batch) #获取数据块的数量
            seed = seed + 1
            minibatches =createbatch(X_train_data,Y_train_data,mini_batch,seed) 
            
            #对每个数据块进行处理
            for minibatch in minibatches:
                #选择一个数据块
                (minibatch_X,minibatch_Y) = minibatch
                #最小化这个数据块的成本
                _ , tempt_cost = sess.run([conv_back,cost],feed_dict={X:minibatch_X, Y:minibatch_Y})
                
                #累加数据块的成本值
                minibatch_cost += tempt_cost / num_minibatches
    
            #是否打印成本
            if display_cost:
                #每5代打印一次
                if epoch % 5 == 0:
                    print("当前是第 " + str(epoch) + " 代，成本值为：" + str(minibatch_cost))
            
            #记录成本
            if epoch % 1 == 0:
                costs.append(minibatch_cost)
        
        #数据处理完毕，绘制成本曲线
        if display_pic:
            plt.plot(np.squeeze(costs))
            plt.xlabel('迭代次数（*10）')
            plt.ylabel('代价函数值')           
            plt.title("学习速率 =" + str(l_rate))
            plt.show()
        
        #开始预测数据
        ## 计算当前的预测情况
        pre_num = tf.arg_max(layer6,1)
        current = tf.equal(pre_num , tf.arg_max(Y,1))
        
        ##计算准确度
        acc = tf.reduce_mean(tf.cast(current,"float"))
        print("corrent_prediction accuracy= " + str(acc))
        
        acc_train = acc.eval({X: X_train_data, Y: Y_train_data})
        acc_test = acc.eval({X: X_test_data, Y: Y_test_data})
        
        print("训练集准确度：" + str(acc_train))
        print("测试集准确度：" + str(acc_test)) 
        
        return (acc_train,acc_test,parameters)


In [19]:
_, _, parameters = CNN(X_train, Y_train, X_test, Y_test,num_epochs=200)


ImportError: cannot import name 'call_logit_fn' from 'tensorflow_estimator.python.estimator.estimator_lib' (G:\Anaconda3\lib\site-packages\tensorflow_estimator\python\estimator\estimator_lib.py)