代码1：核支持向量机手写字符识别

In [None]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV

digits=load_digits()
X_train,X_test,y_train,y_test=train_test_split(digits.data,digits.target,
test_size=0.25,random_state=33)
print("X train shape:{},X test shape:{}".format(X_train.shape, X_test.shape))
#X train shape:(1347, 64),X test shape:(450, 64),
# Normalization
ss=StandardScaler()
X_train=ss.fit_transform(X_train)
X_test=ss.fit_transform(X_test)
#Linear SVC
lsvc=LinearSVC(C=1, penalty="l2",loss='hinge')
lsvc.fit(X_train,y_train)
y_predict=lsvc.predict(X_test)
print('Accuracy of Linear SVC:',lsvc.score(X_test,y_test))
print(classification_report(y_test,y_predict,target_names
=digits.target_names.astype(str)))
#Polynomial Kernel
lsvc1=SVC(C=1,kernel='poly',degree=2)
#loss='squared_hinge'
lsvc1.fit(X_train,y_train)
y_predict=lsvc1.predict(X_test)
print('Accuracy of SVC of poly kernel:',lsvc1.score(X_test,y_test))
print(classification_report(y_test,y_predict,target_names
=digits.target_names.astype(str)))
#Gaussian Kernel
lsvc2=SVC(C=1,kernel='rbf',gamma=0.01)
lsvc2.fit(X_train,y_train)
y_predict=lsvc2.predict(X_test)
print('Accuracy of SVC of rbf kernel:',lsvc2.score(X_test,y_test))
print(classification_report(y_test,y_predict,target_names
      =digits.target_names.astype(str)))
#Accuracy of Linear SVC: 0.94
#Accuracy of SVC of poly kernel: 0.9777777777777777
#Accuracy of SVC of rbf kernel: 0.9844444444444445

代码2： 身高-体重数据的线性回归、岭回归、LASSO回归和核岭回归

In [None]:
import numpy as np
# 使用sk-learn的线性回归模型
def linear_regression_sk(X, y):
    from sklearn.linear_model import LinearRegression
    lin_reg = LinearRegression()
    lin_reg.fit(X, y)
    print('线性回归解：', 'b：', lin_reg.intercept_, 'w：', lin_reg.coef_)
# 岭回归，l2正则化
def ridge_regression_analysis(X, y):
    from sklearn.linear_model import Ridge
    ridge_reg = Ridge(alpha=1, solver='cholesky')
    ridge_reg.fit(X, y)
    print('岭回归解：', 'b：', ridge_reg.intercept_, 'w：', ridge_reg.coef_)
# Lasso 回归，l1正则化
def lasso_regression_analysis(X, y):
    from sklearn.linear_model import Lasso
    lasso_reg = Lasso(alpha=0.1)
    lasso_reg.fit(X, y)
    print('LASSO回归解：', 'b：', lasso_reg.intercept_, 'w：', lasso_reg.coef_)
# 高斯核岭回归
def kernel_regression_analysis(X, y):
    from sklearn.kernel_ridge import KernelRidge
    from sklearn.model_selection import GridSearchCV
    import matplotlib.pyplot as plt
    #网格搜索和交叉验证确定最优超参数
    kr = GridSearchCV(KernelRidge(kernel='rbf', gamma=20), cv=5,
                      param_grid={'alpha': [1e0, 0.1, 1e-2, 1e-3],
                                  'gamma': np.logspace(-2, 2, 5)})
    kr.fit(X, y)
    print('核岭回归误差:',np.linalg.norm(y-kr.predict(X)))
    X_plot = np.linspace(150, 190, 100)[:, None]
    y_kr = kr.predict(X_plot)
    plt.scatter(X, y, c='k', label='data', zorder=1)
    plt.plot(X_plot, y_kr, c='g',label='KRR')

if __name__ == '__main__':
    #身高体重数据
    data=np.array([[152,51],[156,53],[160,54],[164,55],
              [168,57],[172,62],[176,62],[180,65],
              [184,69],[188,72]])
    print(data.shape)
    X,y=data[:,0].reshape(-1,1),data[:,1]
    linear_regression_sk(X, y)
    ridge_regression_analysis(X, y)
    lasso_regression_analysis(X, y)
    kernel_regression_analysis(X, y)

代码3：鸢尾花数据集的感知机、逻辑回归和Softmax回归分类

In [None]:
import numpy as np
from sklearn import datasets
#鸢尾花数据集
iris = datasets.load_iris()
#print(iris.keys())
n_samples, n_features = iris.data.shape
print((n_samples, n_features))
#print(iris.data[0])
print(iris.target_names)
print("feature_names:",iris.feature_names)

# 感知机分类
def perceptron_classify(iris):
    from sklearn.linear_model import Perceptron
    X = iris["data"][:, 3:]  #花瓣宽度petal width
    y = (iris["target"] == 2).astype(np.int)
    perceptron_reg = Perceptron()
    perceptron_reg.fit(X, y)
    print('感知机解：', 'b：', perceptron_reg.intercept_, 'w：', perceptron_reg.coef_)
    print('perceptron accuracy:=',perceptron_reg.score(X, y))
# logistic回归分类
def logistic_classify(iris):
    from sklearn.linear_model import LogisticRegression
    X = iris["data"][:, 3:]  #花瓣宽度petal width
    y = (iris["target"] == 2).astype(np.int)
    log_reg = LogisticRegression()
    log_reg.fit(X, y)
    print('logistic回归解：', 'b：', log_reg.intercept_, 'w：', log_reg.coef_)
    print('logistic accuracy:=',log_reg.score(X, y))
# softmax回归多分类
def softmax_classify(iris):
    from sklearn.linear_model import LogisticRegression
    X = iris["data"][:, (2, 3)]  #花瓣长度petal length,花瓣宽度petal width
    y = iris["target"]
    softmax_reg = LogisticRegression(multi_class="multinomial", solver="lbfgs", C=10)
    softmax_reg.fit(X, y)
    print('softmax回归解：', 'b：', softmax_reg.intercept_, 'w：=\n', softmax_reg.coef_)
    print('softmax accuracy:=',softmax_reg.score(X, y))
    predict = softmax_reg.predict([[5, 2]])
    predict_pro = softmax_reg.predict_proba([[5, 2]])
    print('softmax回归预测为：', predict, '各类概率为', predict_pro)

if __name__ == '__main__':
    perceptron_classify(iris)
    logistic_classify(iris)
    softmax_classify(iris)
    
#logistic回归解： b： [-7.1947083] w： [[4.3330846]]
#softmax回归解：b： [ 18.87514796   6.3844344  -25.25958236]
#w=[[-4.58614563 -2.24129385],[0.16068263 -2.15860167],[4.425463  4.39989552]]

代码4： 二维卷积

In [1]:
#2D Convolution
import numpy as np
import scipy.signal
x=np.random.randint(1,5,size=(5,5))
h=np.array([[-1,-1,-1],[-1,8,-1],[-1,-1,-1]])
y=scipy.signal.convolve(x,h,mode='full', method='auto')
print("x=\n"+str(x))
print("h=\n"+str(h))
print("Convolution x*h=\n"+str(y))

x=
[[4 1 2 4 3]
 [1 3 3 2 3]
 [3 4 2 3 1]
 [1 2 2 2 3]
 [2 2 4 4 4]]
h=
[[-1 -1 -1]
 [-1  8 -1]
 [-1 -1 -1]]
Convolution x*h=
[[ -4  -5  -7  -7  -9  -7  -3]
 [ -5  27  -5   3  19  15  -6]
 [ -8  -7   4   3  -5  11  -7]
 [ -5  13  15  -5   6  -5  -7]
 [ -6  -5  -4  -7  -7  10  -8]
 [ -3  11   5  20  17  23  -7]
 [ -2  -4  -8 -10 -12  -8  -4]]


MNIST数据集

In [None]:
#MNIST Dataset
from tensorflow.keras.datasets import mnist
from matplotlib import pyplot as plt
import numpy as np
import tensorflow as tf
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print(X_train.shape)  # (60000, 28, 28)
print(y_train.shape)  # (60000,)
print(X_test.shape)   # (10000, 28, 28)
print(y_test.shape)   # (10000,)
print(type(X_train))  # <class 'numpy.ndarray'>
plt.imshow(X_train[0], cmap="Greys")  # The first image
plt.show()

代码5：3层感知机识别手写字符

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
from sklearn.model_selection import GridSearchCV
from keras.layers import Dropout
from keras import backend as K
from keras.datasets import mnist

def mnist_data_split():
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    img_rows, img_cols = 28, 28
    num_classes = 10
    x_train = x_train.reshape(60000, 784)
    x_test = x_test.reshape(10000, 784)
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255
    print("x_train shape: ", x_train.shape)
    print("train samples: ", x_train.shape[0])
    print("test samples: ", x_test.shape[0])
    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    y_train = y_train.astype('int32')
    y_test = y_test.astype('int32')
    print("X train shape:{},X test shape:{},y train shape:{},y test shape:{}"
          .format(x_train.shape, x_test.shape, y_train.shape, y_test.shape))
    return  x_train, x_test, y_train, y_test
x_train, x_test, y_train, y_test = mnist_data_split()
print ("inputshape is :{}".format(x_train.shape[1]))

def train_mnist_nnet_baseline( x_train, x_test, y_train, y_test):
    num_class = 10
    epochs = 32
    batch_size = 128
    input_shape = (784, )
    model = Sequential()
    model.add(Dense(64, input_shape=input_shape, activation="relu"))
    model.add(Dense(32, activation="relu"))
    model.add(Dense(num_class, activation="softmax"))
    model.compile(loss="categorical_crossentropy",
                  optimizer="adam",metrics=['accuracy'])
    history_callback = model.fit(x_train, y_train,verbose=0,batch_size
              =batch_size,validation_data=(x_test, y_test),epochs=epochs)
    score = model.evaluate(x_test, y_test, verbose=0)
    print("Test loss: {}".format(score[0]))
    print("Test accuracy: {}".format(score[1]))
    print(model.summary())
    return history_callback

%time callback = train_mnist_nnet_baseline(x_train, x_test, y_train, y_test)
#Test loss: 0.13420844078063965
#Test accuracy: 0.9732000231742859
#Total params: 52,650

代码6：7层卷积神经网络的手写字符识别

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense,Dropout,Activation,Flatten
from keras.optimizers import Adam
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from keras.layers import Conv2D,MaxPooling2D,ZeroPadding2D,GlobalAveragePooling2D
from keras.layers.advanced_activations import LeakyReLU
from keras.preprocessing.image import ImageDataGenerator
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print("X_train original shape", X_train.shape)
print("y_train original shape", y_train.shape)
print("X_test original shape", X_test.shape)
print("y_test original shape", y_test.shape)
#Reshape data: 3D-->>4D  (batch, height, width, channels)
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train/=255
X_test/=255
X_train.shape
#One-hot编码
number_of_classes = 10
Y_train = np_utils.to_categorical(y_train, number_of_classes)
Y_test = np_utils.to_categorical(y_test, number_of_classes)
#CNN模型架构
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(28,28,1)))
model.add(Activation('relu'))
BatchNormalization(axis=-1)
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
BatchNormalization(axis=-1)
model.add(Conv2D(64,(3, 3)))
model.add(Activation('relu'))
BatchNormalization(axis=-1)
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
BatchNormalization()
model.add(Dense(512))
model.add(Activation('relu'))
BatchNormalization()
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation('softmax'))
model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=Adam(),metrics=['accuracy'])
gen = ImageDataGenerator(rotation_range=8,width_shift_range=0.08,
       shear_range=0.3,height_shift_range=0.08, zoom_range=0.08)
test_gen = ImageDataGenerator()
train_generator = gen.flow(X_train,Y_train,batch_size=64)
test_generator = test_gen.flow(X_test, Y_test, batch_size=64)

model.fit_generator(train_generator,steps_per_epoch=60000//64, epochs=2,
                    validation_data=test_generator,validation_steps=10000//64)
score = model.evaluate(X_test, Y_test)
print("Test accuracy: ", score[1])
#X_train original shape (60000, 28, 28),X_test original shape (10000, 28, 28)
#Total params: 594,922, Test accuracy:  0.9932000041007996