<a href="https://colab.research.google.com/github/jmq19950824/Deep-Learning/blob/master/shd1994_DL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

batch_size = 128
num_classes = 10
epochs = 12

# input image dimensions
img_rows, img_cols = 28, 28

# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])


In [0]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from pandas import read_csv

# 导入数据并将分类转化为数字
dataset = read_csv('BK.csv', delimiter=';')
dataset['job'] = dataset['job'].replace(to_replace=['admin.', 'unknown', 'unemployed', 'management',
                                                    'housemaid', 'entrepreneur', 'student', 'blue-collar',
                                                    'self-employed', 'retired', 'technician', 'services'],
                                        value=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
dataset['marital'] = dataset['marital'].replace(to_replace=['married', 'single', 'divorced'], value=[0, 1, 2])
dataset['education'] = dataset['education'].replace(to_replace=['unknown', 'secondary', 'primary', 'tertiary'],
                                                    value=[0, 2, 1, 3])
dataset['default'] = dataset['default'].replace(to_replace=['no', 'yes'], value=[0, 1])
dataset['housing'] = dataset['housing'].replace(to_replace=['no', 'yes'], value=[0, 1])
dataset['loan'] = dataset['loan'].replace(to_replace=['no', 'yes'], value=[0, 1])
dataset['contact'] = dataset['contact'].replace(to_replace=['cellular', 'unknown', 'telephone'], value=[0, 1, 2])
dataset['poutcome'] = dataset['poutcome'].replace(to_replace=['unknown', 'other', 'success', 'failure'],
                                                  value=[0, 1, 2, 3])
dataset['month'] = dataset['month'].replace(to_replace=['jan', 'feb', 'mar', 'apr', 'may', 'jun',
                                                        'jul', 'aug', 'sep', 'oct', 'nov', 'dec'],
                                            value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
dataset['y'] = dataset['y'].replace(to_replace=['no', 'yes'], value=[0, 1])

# 分离输入输出
array = dataset.values
x = array[:, 0:16]
Y = array[:, 16]

# 设置随机种子
seed = 7
np.random.seed(seed)


# 构建模型函数
def create_model(units_list=[16], optimizer='adam', init='normal'):
    # 构建模型
    model = Sequential()

    # 构建第一个隐藏层和输入层
    units = units_list[0]
    model.add(Dense(units=units, activation='relu', input_dim=16, kernel_initializer=init))
    model.add(Dense(units=1, activation='sigmoid', kernel_initializer=init))

    # 编译模型
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model


model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=20)
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(model, x, Y, cv=kfold)
print('Accuracy: %.2f%% (%.2f)' % (results.mean() * 100, results.std()))

In [0]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from pandas import read_csv

# 导入数据并将分类转化为数字
dataset = read_csv('BK.csv', delimiter=';')
dataset['job'] = dataset['job'].replace(to_replace=['admin.', 'unknown', 'unemployed', 'management',
                                                    'housemaid', 'entrepreneur', 'student', 'blue-collar',
                                                    'self-employed', 'retired', 'technician', 'services'],
                                        value=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
dataset['marital'] = dataset['marital'].replace(to_replace=['married', 'single', 'divorced'], value=[0, 1, 2])
dataset['education'] = dataset['education'].replace(to_replace=['unknown', 'secondary', 'primary', 'tertiary'],
                                                    value=[0, 2, 1, 3])
dataset['default'] = dataset['default'].replace(to_replace=['no', 'yes'], value=[0, 1])
dataset['housing'] = dataset['housing'].replace(to_replace=['no', 'yes'], value=[0, 1])
dataset['loan'] = dataset['loan'].replace(to_replace=['no', 'yes'], value=[0, 1])
dataset['contact'] = dataset['contact'].replace(to_replace=['cellular', 'unknown', 'telephone'], value=[0, 1, 2])
dataset['poutcome'] = dataset['poutcome'].replace(to_replace=['unknown', 'other', 'success', 'failure'],
                                                  value=[0, 1, 2, 3])
dataset['month'] = dataset['month'].replace(to_replace=['jan', 'feb', 'mar', 'apr', 'may', 'jun',
                                                        'jul', 'aug', 'sep', 'oct', 'nov', 'dec'],
                                            value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
dataset['y'] = dataset['y'].replace(to_replace=['no', 'yes'], value=[0, 1])


X = dataset.iloc[:, 0:16]
Y = dataset.iloc[:, 16]

# 设置随机种子
seed = 7
np.random.seed(seed)


# 构建模型函数
def create_model(units_list=[30,8], optimizer='adam', init='normal'):
    # 构建模型
    model = Sequential()

    # 构建第一个隐藏层和输入层
    units = units_list[0]
    model.add(Dense(units=units, activation='relu', input_dim=16, kernel_initializer=init))
    for units in units_list[1:]:
      model.add(Dense(units=units, activation='relu', kernel_initializer=init))
    model.add(Dense(units=1, activation='sigmoid',kernel_initializer=init))
    # 编译模型
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model


model = KerasClassifier(build_fn=create_model, epochs=20, batch_size=10)
model.fit(X,Y)
pred=model.predict_proba(X)

kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(model,X, Y, cv=kfold)
print('Accuracy: %.2f%% (%.2f)' % (results.mean() * 100, results.std()))


In [0]:
pred

In [0]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from pandas import read_csv

# 导入数据并将分类转化为数字
dataset = read_csv('BK.csv', delimiter=';')
dataset['job'] = dataset['job'].replace(to_replace=['admin.', 'unknown', 'unemployed', 'management',
                                                    'housemaid', 'entrepreneur', 'student', 'blue-collar',
                                                    'self-employed', 'retired', 'technician', 'services'],
                                        value=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
dataset['marital'] = dataset['marital'].replace(to_replace=['married', 'single', 'divorced'], value=[0, 1, 2])
dataset['education'] = dataset['education'].replace(to_replace=['unknown', 'secondary', 'primary', 'tertiary'],
                                                    value=[0, 2, 1, 3])
dataset['default'] = dataset['default'].replace(to_replace=['no', 'yes'], value=[0, 1])
dataset['housing'] = dataset['housing'].replace(to_replace=['no', 'yes'], value=[0, 1])
dataset['loan'] = dataset['loan'].replace(to_replace=['no', 'yes'], value=[0, 1])
dataset['contact'] = dataset['contact'].replace(to_replace=['cellular', 'unknown', 'telephone'], value=[0, 1, 2])
dataset['poutcome'] = dataset['poutcome'].replace(to_replace=['unknown', 'other', 'success', 'failure'],
                                                  value=[0, 1, 2, 3])
dataset['month'] = dataset['month'].replace(to_replace=['jan', 'feb', 'mar', 'apr', 'may', 'jun',
                                                        'jul', 'aug', 'sep', 'oct', 'nov', 'dec'],
                                            value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
dataset['y'] = dataset['y'].replace(to_replace=['no', 'yes'], value=[0, 1])

# 分离输入输出
array = dataset.values
x = array[:, 0:16]
Y = array[:, 16]

# 设置随机种子
seed = 7
np.random.seed(seed)


# 构建模型函数
def create_model(units_list=[30,8], optimizer='adam', init='normal'):
    # 构建模型
    model = Sequential()

    # 构建第一个隐藏层和输入层
    units = units_list[0]
    model.add(Dense(units=units, activation='relu', input_dim=16, kernel_initializer=init))
    # 构建更多隐藏层
    for units in units_list[1:]:
        model.add(Dense(units=units, activation='relu', kernel_initializer=init))

    model.add(Dense(units=1, activation='sigmoid', kernel_initializer=init))

    # 编译模型
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model
    
new_X = StandardScaler().fit_transform(X)
model = KerasClassifier(build_fn=create_model, epochs=20, batch_size=10)
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(model, new_X, Y, cv=kfold)
print('Accuracy: %.2f%% (%.2f)' % (results.mean() * 100, results.std()))

# 调参选择最优模型
param_grid = {}
param_grid['units_list'] = [[16], [30], [16, 8], [30, 8]]
# 调参
grid = GridSearchCV(estimator=model, param_grid=param_grid)   #默认是3折交叉验证
results = grid.fit(new_X, Y)

pred1=results.predict_proba(new_X)

# 输出结果
print('Best: %f using %s' % (results.best_score_, results.best_params_))
means = results.cv_results_['mean_test_score']
stds = results.cv_results_['std_test_score']
params = results.cv_results_['params']

for mean, std, param in zip(means, stds, params):
    print('%f (%f) with: %r' % (mean, std, param))

In [0]:
from sklearn import datasets
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
from keras.models import model_from_json


# 导入数据
dataset = datasets.load_iris()

x = dataset.data
Y = dataset.target

# Convert labels to categorical one-hot encoding
Y_labels = to_categorical(Y, num_classes=3)

# 设定随机种子
seed = 7
np.random.seed(seed)
# 构建模型函数
def create_model(optimizer='rmsprop', init='glorot_uniform'):
    # 构建模型
    model = Sequential()
    model.add(Dense(units=4, activation='relu', input_dim=4, kernel_initializer=init))
    model.add(Dense(units=6, activation='relu', kernel_initializer=init))
    model.add(Dense(units=3, activation='softmax', kernel_initializer=init))

    # 编译模型
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

# 构建模型
model = create_model()
model.fit(x, Y_labels, epochs=200, batch_size=5, verbose=2)

scores = model.evaluate(x, Y_labels, verbose=1)
print('%s: %.2f%%' % (model.metrics_names[1], scores[1] * 100))

# 模型保存成Json文件
model_json = model.to_json()
with open('../input/model.json', 'w') as file:
    file.write(model_json)

# 保存模型的权重值
model.save_weights('../input/model.json.h5')


# 从Json加载模型
with open('../input/model.json', 'r') as file:
    model_json = file.read()

# 加载模型
new_model = model_from_json(model_json)
new_model.load_weights('../input/model.json.h5')

# 编译模型
new_model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

# 评估从Json加载的模型
scores = new_model.evaluate(x, Y_labels, verbose=0)
print('%s: %.2f%%' % (model.metrics_names[1], scores[1] * 100))

In [0]:
#隐藏层使用Dropout
from sklearn import datasets
import numpy as np
from keras.models import Sequential
from keras.layers import Dropout
from keras.layers import Dense
from keras.constraints import maxnorm
from keras.optimizers import SGD
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold


# 导入数据
dataset = datasets.load_iris()

x = dataset.data
Y = dataset.target

# 设定随机种子
seed = 7
np.random.seed(seed)

# 构建模型函数
def create_model(init='glorot_uniform'):
    # 构建模型
    model = Sequential()
    model.add(Dense(units=4, activation='relu', input_dim=4, kernel_initializer=init, kernel_constraint=maxnorm(3)))
    model.add(Dropout(rate=0.2))
    model.add(Dense(units=6, activation='relu', kernel_initializer=init, kernel_constraint=maxnorm(3)))
    model.add(Dropout(rate=0.2))
    model.add(Dense(units=3, activation='softmax', kernel_initializer=init))

    # 定义Dropout
    sgd = SGD(lr=0.01, momentum=0.8, decay=0.0, nesterov=False)

    # 编译模型
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

    return model

model = KerasClassifier(build_fn=create_model, epochs=200, batch_size=5, verbose=0)
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(model, x, Y, cv=kfold)
print('Accuracy: %.2f%% (%.2f)' % (results.mean()*100, results.std()))

In [0]:
from sklearn import datasets
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import SGD


# 导入数据
dataset = datasets.load_iris()

x = dataset.data
Y = dataset.target

# 设定随机种子
seed = 7
np.random.seed(seed)

# 构建模型函数
def create_model(init='glorot_uniform'):
    # 构建模型
    model = Sequential()
    model.add(Dense(units=4, activation='relu', input_dim=4, kernel_initializer=init))
    model.add(Dense(units=6, activation='relu', kernel_initializer=init))
    model.add(Dense(units=3, activation='softmax', kernel_initializer=init))

    #模型优化
    learningRate = 0.1
    momentum = 0.9
    decay_rate = 0.005
    sgd = SGD(lr=learningRate, momentum=momentum, decay=decay_rate, nesterov=False)

    # 编译模型
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

    return model

epochs = 200
model = KerasClassifier(build_fn=create_model, epochs=epochs, batch_size=5, verbose=1)
model.fit(x, Y)
model.predict(x)

In [0]:
from sklearn import datasets
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import SGD
from keras.callbacks import LearningRateScheduler
from math import pow, floor


# 导入数据
dataset = datasets.load_iris()

x = dataset.data
Y = dataset.target

# 设定随机种子
seed = 7
np.random.seed(seed)

# 计算学习率
def step_decay(epoch):
    init_lrate = 0.1
    drop = 0.5
    epochs_drop = 10
    lrate = init_lrate * pow(drop, floor(1 + epoch) / epochs_drop)
    return lrate

# 构建模型函数
def create_model(init='glorot_uniform'):
    # 构建模型
    model = Sequential()
    model.add(Dense(units=4, activation='relu', input_dim=4, kernel_initializer=init))
    model.add(Dense(units=6, activation='relu', kernel_initializer=init))
    model.add(Dense(units=3, activation='softmax', kernel_initializer=init))

    #模型优化
    learningRate = 0.1
    momentum = 0.9
    decay_rate = 0.0
    sgd = SGD(lr=learningRate, momentum=momentum, decay=decay_rate, nesterov=False)

    # 编译模型
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

    return model


lrate = LearningRateScheduler(step_decay)
epochs = 200
model = KerasClassifier(build_fn=create_model, epochs=epochs, batch_size=5, verbose=1, callbacks=[lrate])
model.fit(x, Y)
model.predict(x)

In [0]:

import numpy as np
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.optimizers import SGD
from keras.constraints import maxnorm
from keras.utils import np_utils
from keras import backend
backend.set_image_data_format('channels_first')

# 设定随机种子
seed = 7
np.random.seed(seed=seed)

# 导入数据
(X_train, y_train), (X_validation, y_validation) = cifar10.load_data()

# 格式化数据到0-1之前
X_train = X_train.astype('float32')
X_validation = X_validation.astype('float32')
X_train = X_train / 255.0
X_validation = X_validation / 255.0

# one-hot编码
y_train = np_utils.to_categorical(y_train)
y_validation = np_utils.to_categorical(y_validation)
num_classes = y_train.shape[1]

def create_model(epochs=25):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), input_shape=(3, 32, 32), padding='same', activation='relu', kernel_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    model.add(Conv2D(32, (3, 3), activation='relu', padding='same', kernel_constraint=maxnorm(3)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same', kernel_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same', kernel_constraint=maxnorm(3)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(128, (3, 3), activation='relu', padding='same', kernel_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    model.add(Conv2D(128, (3, 3), activation='relu', padding='same', kernel_constraint=maxnorm(3)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dropout(0.2))
    model.add(Dense(1024, activation='relu', kernel_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    model.add(Dense(512, activation='relu', kernel_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    model.add(Dense(10, activation='softmax'))
    lrate = 0.01
    decay = lrate / epochs
    sgd = SGD(lr=lrate, momentum=0.9, decay=decay, nesterov=False)
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model

epochs = 25
model = create_model(epochs)
model.fit(x=X_train, y=y_train, epochs=epochs, batch_size=32, verbose=2)
scores = model.evaluate(x=X_validation, y=y_validation, verbose=0)
print('Accuracy: %.2f%%' % (scores[1] * 100))  #77.91%

In [0]:
import keras
import numpy as np
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Dropout, Activation
from keras.layers import Dense,Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.initializers import RandomNormal
from keras import optimizers
from keras.callbacks import LearningRateScheduler

batch_size = 128
epochs = 200
iterations = 391
num_classes = 10
dropout = 0.5


def normalize_preprocessing(x_train, x_validation):
    x_train = x_train.astype('float32')
    x_validation = x_validation.astype('float32')
    mean = [125.307, 122.95, 113.865]
    std = [62.9932, 62.0887, 66.7048]
    for i in range(3):
        x_train[:, :, :, i] = (x_train[:, :, :, i] - mean[i]) / std[i]
        x_validation[:, :, :, i] = (x_validation[:, :, :, i] - mean[i]) / std[i]

    return x_train, x_validation


def scheduler(epoch):
    if epoch <= 60:
        return 0.05
    if epoch <= 120:
        return 0.01
    if epoch <= 160:
        return 0.002
    return 0.0004


def build_model():
    model = Sequential()

    model.add(Conv2D(192, (5, 5), padding='same', kernel_regularizer=keras.regularizers.l2(0.0001),
                     kernel_initializer=RandomNormal(stddev=0.01), input_shape=x_train.shape[1:],
                     activation='relu'))
    model.add(Conv2D(160, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(0.0001),
                     kernel_initializer=RandomNormal(stddev=0.05), activation='relu'))
    model.add(Conv2D(96, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(0.0001),
                     kernel_initializer=RandomNormal(stddev=0.05), activation='relu'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same'))

    model.add(Dropout(dropout))

    model.add(Conv2D(192, (5, 5), padding='same', kernel_regularizer=keras.regularizers.l2(0.0001),
                     kernel_initializer=RandomNormal(stddev=0.05), activation='relu'))
    model.add(Conv2D(192, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(0.0001),
                     kernel_initializer=RandomNormal(stddev=0.05), activation='relu'))
    model.add(Conv2D(192, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(0.0001),
                     kernel_initializer=RandomNormal(stddev=0.05), activation='relu'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same'))

    model.add(Dropout(dropout))

    model.add(Conv2D(192, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(0.0001),
                     kernel_initializer=RandomNormal(stddev=0.05), activation='relu'))
    model.add(Conv2D(192, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(0.0001),
                     kernel_initializer=RandomNormal(stddev=0.05), activation='relu'))
    model.add(Conv2D(10, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(0.0001),
                     kernel_initializer=RandomNormal(stddev=0.05), activation='relu'))

    model.add(GlobalAveragePooling2D())
    model.add(Dense(units=10,activation='softmax'))

    sgd = optimizers.SGD(lr=0.1, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model


if __name__ == '__main__':
    np.random.seed(seed=7)
    # load data
    (x_train, y_train), (x_validation, y_validation) = cifar10.load_data()
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_validation = keras.utils.to_categorical(y_validation, num_classes)

    x_train, x_validation = normalize_preprocessing(x_train, x_validation)

    # build network
    model = build_model()
    print(model.summary())

    # set callback
    change_lr = LearningRateScheduler(scheduler)
    cbks = [change_lr]

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=cbks,
              validation_data=(x_validation, y_validation), verbose=2)    #验证集上的精确率为88.94%

In [0]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


bankfulldata=pd.read_csv('bkf.csv',sep=';')
bankfulldata.shape
bankfulldata.columns
bankfulldata.iloc[1:5,]

print(np.unique(bankfulldata['job']))
print(np.unique(bankfulldata['marital']))
print(np.unique(bankfulldata['education']))
print(np.unique(bankfulldata['default']))
print(np.unique(bankfulldata['housing']))
print(np.unique(bankfulldata['loan']))
print(np.unique(bankfulldata['contact']))
print(np.unique(bankfulldata['month']))
print(np.unique(bankfulldata['poutcome']))
print(np.unique(bankfulldata['y']))
#data preprocessing
bankfulldata['job']=bankfulldata['job'].replace(['admin.','unknown','unemployed','management','housemaid','entrepreneur',
'student','blue-collar','self-employed','retired','technician','services'],[0,1,2,3,4,5,6,7,8,9,10,11])
bankfulldata['marital']=bankfulldata['marital'].replace(['married','single','divorced'],[0,1,2])
bankfulldata['education']=bankfulldata['education'].replace(['unknown','secondary','primary','tertiary'],[0,2,1,3])
bankfulldata['default']=bankfulldata['default'].replace(['no','yes'],[0,1])
bankfulldata['housing']=bankfulldata['housing'].replace(['no','yes'],[0,1])
bankfulldata['loan']=bankfulldata['loan'].replace(['no','yes'],[0,1])
bankfulldata['contact']=bankfulldata['contact'].replace(['cellular','unknown','telephone'],[0,1,2])
bankfulldata['month']=bankfulldata['month'].replace(['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec'],
[1,2,3,4,5,6,7,8,9,10,11,12])
bankfulldata['poutcome']=bankfulldata['poutcome'].replace(['unknown','other','success','failure'],[0,1,2,3])
bankfulldata['y']=bankfulldata['y'].replace(['no','yes'],[0,1])
#split the data
X=bankfulldata.iloc[:,0:16]
Y=bankfulldata.iloc[:,16]

random_state=np.random.seed(123)

#Transform x into standardization data
from sklearn.preprocessing import StandardScaler

X_scaler=StandardScaler()
X_scaler=X_scaler.fit_transform(X)

In [0]:
from sklearn.linear_model import LogisticRegression
clf1=LogisticRegression(C=1000,random_state=random_state)
clf1.fit(X_scaler,Y)
pred1=clf1.predict(X_scaler)
pred_pro1=clf1.predict_proba(X_scaler)
pred_pro1
r1=pd.concat([pd.DataFrame(Y),pd.DataFrame(pred1)],axis=1)
1-sum(r1.iloc[:,0]!=r1.iloc[:,1])/X.shape[0]  #预测精确率 89.08%

In [0]:
print('-----------------------------NB------------------------------------')
from sklearn.naive_bayes  import GaussianNB
clf2=GaussianNB()
clf2.fit(X_scaler,Y)
pred2=clf2.predict(X_scaler)
pred_pro2=clf2.predict_proba(X_scaler)
pred_pro2
r2=pd.concat([pd.DataFrame(Y),pd.DataFrame(pred2)],axis=1)
1-sum(r2.iloc[:,0]!=r2.iloc[:,1])/X.shape[0]  #预测精确率 83.02%

In [0]:
print('-----------------------------SVC-----------------------------------')
from sklearn.svm import SVC

clf3=SVC(C=10,kernel='rbf',probability=True,random_state=random_state)
clf3.fit(X_scaler,Y)
pred3=clf3.predict(X_scaler)
pred_pro3=clf3.predict_proba(X_scaler)
pred_pro3
r3=pd.concat([pd.DataFrame(Y),pd.DataFrame(pred3)],axis=1)
1-sum(r3.iloc[:,0]!=r3.iloc[:,1])/X.shape[0]  #预测精确率 92.65%


In [0]:
print('-----------------------------KNN------------------------------------')
from sklearn.neighbors import KNeighborsClassifier

clf4=KNeighborsClassifier(n_neighbors=12)
clf4.fit(X_scaler,Y)
pred4=clf4.predict(X_scaler)
pred_pro4=clf4.predict_proba(X_scaler)
pred_pro4
r4=pd.concat([pd.DataFrame(Y),pd.DataFrame(pred4)],axis=1)
1-sum(r4.iloc[:,0]!=r4.iloc[:,1])/X.shape[0]  #预测精确率90.55%

In [0]:
print('-----------------------------DT------------------------------------')
from sklearn.tree import DecisionTreeClassifier

clf5=DecisionTreeClassifier(criterion='gini',max_depth=8,min_samples_leaf=4,random_state=random_state)
clf5.fit(X_scaler,Y)
pred5=clf5.predict(X_scaler)
pred_pro5=clf5.predict_proba(X_scaler)
pred_pro5
r5=pd.concat([pd.DataFrame(Y),pd.DataFrame(pred5)],axis=1)
1-sum(r5.iloc[:,0]!=r5.iloc[:,1])/X.shape[0] #91.41%

In [0]:
#ensemble algorithms
print('-----------------------------RF------------------------------------')
from sklearn.ensemble import RandomForestClassifier

clf6=RandomForestClassifier(criterion='entropy',n_estimators=20,min_samples_leaf=5,random_state=random_state)#20
clf6.fit(X_scaler,Y)
pred6=clf6.predict(X_scaler)
pred_pro6=clf6.predict_proba(X_scaler)
pred_pro6
r6=pd.concat([pd.DataFrame(Y),pd.DataFrame(pred6)],axis=1)
1-sum(r6.iloc[:,0]!=r6.iloc[:,1])/X.shape[0]  #94.94%

In [0]:
print('----------------------ExtraTreesClassifier----------------------')
from sklearn.ensemble import ExtraTreesClassifier


clf7=ExtraTreesClassifier(criterion='gini',n_estimators=10,min_samples_leaf=5,random_state=random_state) #gini+10
clf7.fit(X_scaler,Y)
pred7=clf7.predict(X_scaler)
pred_pro7=clf7.predict_proba(X_scaler)
pred_pro7
r7=pd.concat([pd.DataFrame(Y),pd.DataFrame(pred7)],axis=1)
1-sum(r7.iloc[:,0]!=r7.iloc[:,1])/X.shape[0] #91.36%

In [0]:
print('-----------------------------AdaBoost---------------------------------')
from sklearn.ensemble import AdaBoostClassifier

#参数训练有点奇怪
clf8=AdaBoostClassifier(learning_rate=0.1,n_estimators=10,random_state=random_state)
clf8.fit(X_scaler,Y)
pred8=clf8.predict(X_scaler)
pred_pro8=clf8.predict_proba(X_scaler)
pred_pro8
r8=pd.concat([pd.DataFrame(Y),pd.DataFrame(pred8)],axis=1)
1-sum(r8.iloc[:,0]!=r8.iloc[:,1])/X.shape[0] #88.30%

In [0]:
#GBDT与AdaBoost结果一样
print('-----------------------------GBDT------------------------------------')
from sklearn.ensemble import GradientBoostingClassifier



clf9=GradientBoostingClassifier(learning_rate=0.01,n_estimators=10,random_state=random_state)  #
clf9.fit(X_scaler,Y)
pred9=clf9.predict(X_scaler)
pred_pro9=clf9.predict_proba(X_scaler)
pred_pro9
r9=pd.concat([pd.DataFrame(Y),pd.DataFrame(pred9)],axis=1)
1-sum(r9.iloc[:,0]!=r9.iloc[:,1])/X.shape[0]   #88.30%

In [0]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
#define the grid search method
def GridSearch(clf,params,X_scaler,Y):
    #here we use two cores and 10-folds
    model = GridSearchCV(clf, params, scoring='accuracy',cv=5,refit=True,verbose=2) #verbose=2表示输出完整日志信息
    model.fit(X_scaler, Y)
    #return the best estimator
    return model.best_params_,model.best_estimator_
 

In [0]:
import xgboost as xgb
clf10=xgb.XGBClassifier(learning_rate=0.05,n_estimators=50,random_state=123)
clf10.fit(X_scaler,Y)
pred10=clf10.predict(X_scaler)
pred_pro10=clf10.predict_proba(X_scaler)
pred_pro10
r10=pd.concat([pd.DataFrame(Y),pd.DataFrame(pred10)],axis=1)
1-sum(r10.iloc[:,0]!=r10.iloc[:,1])/X.shape[0]  #89.84%

In [0]:
print('-----------------------------LightGBM----------------------------------')
import lightgbm as lgb


clf11=lgb.LGBMClassifier(learning_rate=0.01,n_estimators=10,random_state=random_state)
clf11.fit(X_scaler,Y)
pred11=clf11.predict(X_scaler)
pred_pro11=clf11.predict_proba(X_scaler)
pred_pro11
r11=pd.concat([pd.DataFrame(Y),pd.DataFrame(pred11)],axis=1)
1-sum(r11.iloc[:,0]!=r11.iloc[:,1])/X.shape[0]  #88.30%



In [0]:
pip install catboost

In [0]:

print('-----------------------------CatBoost----------------------------------') #运算前需要先安装catboost
import catboost as cb


clf12=cb.CatBoostClassifier(learning_rate=0.05,n_estimators=50,random_state=random_state)
clf12.fit(X_scaler,Y)
pred12=clf12.predict(X_scaler)
pred_pro12=clf12.predict_proba(X_scaler)
pred_pro12
r12=pd.concat([pd.DataFrame(Y),pd.DataFrame(pred12)],axis=1)
1-sum(r12.iloc[:,0]!=r12.iloc[:,1])/X.shape[0] #90.19%

In [0]:
print('-----------------------------Stacking---------------------------------')
from mlxtend.classifier import StackingCVClassifier
stacking_clf=StackingCVClassifier([clf1,clf2,clf3,clf4,clf5,clf6,clf7,clf8,clf9,clf10,clf11,clf12],
                                meta_classifier=LogisticRegression(C=10000),cv=5,use_probas=True,verbose=2)
stacking_clf.fit(X_scaler,Y)
pred_stacking=stacking_clf.predict(X_scaler)
pred_proba_stacking=stacking_clf.predict_proba(X_scaler)
pred_proba_stacking
r_stacking=pd.concat([pd.DataFrame(Y),pd.DataFrame(pred_stacking)],axis=1)
1-sum(r_stacking.iloc[:,0]!=r_stacking.iloc[:,1])/X.shape[0]   #94.65%

In [0]:
#情感分析之IMDB影评
from keras.datasets import imdb
import numpy as np
from matplotlib import pyplot as plt

(x_train, y_train),(x_validation,y_validation) = imdb.load_data()

# 合并训练集和评估数据集
x = np.concatenate((x_train, x_validation), axis=0)
y = np.concatenate((y_train, y_validation), axis=0)

print('x shape is %s, y shape is %s' % (x.shape, y.shape))
print('Classes: %s' % np.unique(y))

print('Total words: %s' % len(np.unique(np.hstack(x))))

result = [len(word) for word in x]
print('Mean: %.2f words (STD: %.2f)' %(np.mean(result), np.std(result)))

# 图表展示
plt.subplot(121)
plt.boxplot(result)
plt.subplot(122)
plt.hist(result)
plt.show()

In [0]:
from keras.datasets import imdb
import numpy as np
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers.embeddings import Embedding
from keras.layers import LSTM
from keras.layers import Dense

seed = 7
top_words = 5000
max_words = 500
out_dimension = 32
batch_size = 128
epochs = 2

# 构建模型
def build_model():
    model = Sequential()
    model.add(Embedding(top_words, out_dimension, input_length=max_words))
    model.add(LSTM(units=100))
    model.add(Dense(units=1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    # 输出模型的概要信息
    model.summary()
    return model

if __name__ == '__main__':

    np.random.seed(seed=seed)
    # 导入数据
    (x_train, y_train), (x_validation, y_validation) = imdb.load_data(num_words=top_words)

    # 限定数据集的长度
    x_train = sequence.pad_sequences(x_train, maxlen=max_words)
    x_validation = sequence.pad_sequences(x_validation, maxlen=max_words)

    # 生产模型并训练模型
    model = build_model()
    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)
    scores = model.evaluate(x_validation, y_validation, verbose=2)
    print('Accuracy: %.2f%%' % (scores[1] * 100))