In [65]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction import DictVectorizer

import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.utils.data as Data
import torchvision
import numpy as np

In [7]:
bank_loan_data = pd.read_csv('D:/bank-additional/bank-additional-full.csv',sep=';')

In [57]:
labels = list(bank_loan_data['y'].apply(lambda x : {'yes':1,'no':0}[x]))

In [44]:
X = bank_loan_data.iloc[:,0:20]

In [46]:
# 将类别变量转化为dummy
type_to_columns = {}
for data_type in data_types:
    type_lst = []
    for column in X.columns:
        if type(X[column][0]) == data_type:
            type_lst.append(column)
    type_to_columns[data_type] = type_lst
print(type_to_columns)
vec = DictVectorizer(sparse=False)
X_dummy = vec.fit_transform(X.to_dict(orient='record'))   #对训练数据的特征进行提取

{numpy.float64: ['emp.var.rate',
  'cons.price.idx',
  'cons.conf.idx',
  'euribor3m',
  'nr.employed'],
 numpy.int64: ['age', 'duration', 'campaign', 'pdays', 'previous'],
 str: ['job',
  'marital',
  'education',
  'default',
  'housing',
  'loan',
  'contact',
  'month',
  'day_of_week',
  'poutcome']}

In [183]:
x_train, x_test, y_train, y_test = train_test_split(X_dummy, labels, random_state=1, train_size=0.9)



In [208]:
# NN模型构造 - BN处理
class fcNN(nn.Module):
    def __init__(self):
        super(fcNN, self).__init__()
        #需要将事先训练好的词向量载入
        self.conv1 = nn.Sequential( # 1*63
                      nn.Conv1d(in_channels=1, out_channels=32, kernel_size=7,stride=1, padding=3),
                      nn.BatchNorm1d(num_features=32, eps=1e-05, momentum=0.1, affine=True), # BN 处理
                      nn.ReLU(),
                      nn.MaxPool1d(kernel_size=2) # (6,31)
                     )
        self.conv2 = nn.Sequential( # 1*63
                      nn.Conv1d(in_channels=32, out_channels=64, kernel_size=7,stride=1, padding=3),
                      nn.BatchNorm1d(num_features=64, eps=1e-05, momentum=0.1, affine=True), # BN 处理
                      nn.ReLU(),
                      nn.MaxPool1d(kernel_size=2) # (6,31)
                     )
        self.fc1 = nn.Sequential(
                      nn.Linear(15, 31),
#                       nn.BatchNorm1d(num_features=6, eps=1e-05, momentum=0.1, affine=True), # BN 处理
                      nn.ReLU()
                     )
#         self.fc2 = nn.Sequential(
#                       nn.Linear(63, 127),
# #                       nn.BatchNorm1d(num_features=6, eps=1e-05, momentum=0.1, affine=True), # BN 处理
#                       nn.ReLU()
#                      )
        self.out = nn.Linear(64*31, len(set(labels)))

    def forward(self, x):
        #print(x.size())
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.fc1(x)
#         x = self.fc2(x)
        x = x.view(x.size(0), -1) # 将（batch，outchanel,w,h）展平为（batch，outchanel*w*h）
        output = self.out(x)
        return output

In [209]:
fcnn = fcNN()

In [210]:
# 构建NN模型超参数与数据封装入加载器 - LSC

#损失函数
loss_function = nn.CrossEntropyLoss()

x_train = torch.Tensor(x_train)
y_train = torch.LongTensor(y_train)
x_test = torch.Tensor(x_test)
y_test = torch.LongTensor(y_test)

train_dataset = torch.utils.data.TensorDataset(x_train, y_train)
test_dataset = torch.utils.data.TensorDataset(x_test, y_test)

train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=2000,shuffle=True)
test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=800,shuffle=True)

In [214]:
# 训练 - LSC

LR = 0.00001
optimizer = torch.optim.Adam(fcnn.parameters(), lr=LR)

use_gpu = True

if use_gpu:
    fcnn = fcnn.cuda()
else:
    fcnn = fcnn.cpu()

for epoch in range(30):
    print('epoch {}'.format(epoch + 1))
    # training-----------------------------
    fcnn.train()
    train_acc = 0.

    for step, (batch_x, batch_y) in enumerate(train_data_loader):
        batch_x, batch_y = Variable(batch_x), Variable(batch_y)
                
        batch_x=batch_x.view(batch_x.size(0),1,len(x_train[0]))
#         print(batch_x.size())
#         batch_y = batch_y.view(batch_y.size(0),1)
#         print(batch_y)
        
        if use_gpu:
            batch_x = batch_x.cuda()
            batch_y = batch_y.cuda()
            
        out = fcnn(batch_x)
        loss = loss_function(out, batch_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        pred = torch.max(out, 1)[1]
        num_correct = (pred == batch_y).sum()
        train_acc += num_correct.data
        
#         print('Step:',step+1,'Finished! Loss:',loss.detach().cpu().numpy())
    print('Train Acc: {:.6f}'.format(train_acc.cpu().numpy() / (len(train_dataset))))
#     print(classification_report(L_train_real,L_train_pred))

    # evaluation--------------------------------
    fcnn.eval()
    with torch.no_grad():
        eval_acc = 0.

        for batch_x, batch_y in test_data_loader:
            batch_x, batch_y = Variable(batch_x), Variable(batch_y)
            
            batch_x=batch_x.view(batch_x.size(0),1,len(x_train[0]))

            if use_gpu:
                batch_x = batch_x.cuda()
                batch_y = batch_y.cuda()

            out = fcnn(batch_x)
            loss = loss_function(out, batch_y)
            
            pred = torch.max(out, 1)[1]
            num_correct = (pred == batch_y).sum()
            eval_acc += num_correct
            
        print('Test Acc: {:.6f}'.format(eval_acc.cpu().numpy() / (len(test_dataset))))
#         print(classification_report(L_val_real,L_val_pred))

epoch 1
Train Acc: 0.906499
Test Acc: 0.901190
epoch 2
Train Acc: 0.905905
Test Acc: 0.900947
epoch 3
Train Acc: 0.906607
Test Acc: 0.900947
epoch 4
Train Acc: 0.906175
Test Acc: 0.900947
epoch 5
Train Acc: 0.906391
Test Acc: 0.900704
epoch 6
Train Acc: 0.905743
Test Acc: 0.901432
epoch 7
Train Acc: 0.906310
Test Acc: 0.901190
epoch 8
Train Acc: 0.906094
Test Acc: 0.900704
epoch 9
Train Acc: 0.906175
Test Acc: 0.901190
epoch 10
Train Acc: 0.906040
Test Acc: 0.901432
epoch 11
Train Acc: 0.906067
Test Acc: 0.901190
epoch 12
Train Acc: 0.906148
Test Acc: 0.901190
epoch 13
Train Acc: 0.906121
Test Acc: 0.901190
epoch 14
Train Acc: 0.905959
Test Acc: 0.901432
epoch 15
Train Acc: 0.906418
Test Acc: 0.901432
epoch 16
Train Acc: 0.906715
Test Acc: 0.901190
epoch 17
Train Acc: 0.906256
Test Acc: 0.901432
epoch 18
Train Acc: 0.906229
Test Acc: 0.901190
epoch 19
Train Acc: 0.906040
Test Acc: 0.901190
epoch 20
Train Acc: 0.906121
Test Acc: 0.901432
epoch 21
Train Acc: 0.906283
Test Acc: 0.901190
e