In [157]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim

In [158]:
# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# hyper-parameters about DNN model
input_size = 30
hidden_size = 180
num_classes = 2
# hyper-parameters about optimizer
learning_rate = 0.001
momentum = 0.9
# Hyper-parameters about training control
batch_size = 32
num_iters = 300
iters_retrain = 25
num_retrains = num_iters // iters_retrain

In [159]:
class NeuralNet(nn.Module):
    
    '''Fully connected neural network with one hidden layer
    '''
    
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [160]:
def get_jth_minibatach(j, batch_size, X_train, y_train):
    '''返回数据集中的第j个minibatch
       
       @param j: 第j次iters_retrain
       @param batch_size: int
       @param X_train: torch.tensor
       @param y_train: torch.tensor
    '''
    num_data = X_train.size(0)
    num_minibatches = num_data // batch_size + ((num_data % batch_size) > 0)
    j = j % num_minibatches
    start = j * batch_size
    stop = start + batch_size
    return X_train[start:stop], y_train[start:stop]

In [161]:
def get_num_weights(model):
    '''返回模型的weights参数个数
    '''
    num_weights = 0
    for key, value in model.state_dict().items():
        if key.endswith('weight'):
            num_weights += torch.prod(torch.tensor(value.size()))
    return num_weights.item()

In [162]:
def get_row_weights(model_state_dict):
    row_weights = []
    for key, value in model_state_dict.items():
        if key.endswith('weight'):
            row_weights.append(value.view(-1))
    return torch.cat(row_weights)

In [163]:
# dataset
data = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.25, random_state=2020)
X_train, X_test = torch.tensor(X_train, dtype=torch.float), torch.tensor(X_test, dtype=torch.float)
y_train, y_test = torch.tensor(y_train, dtype=torch.long), torch.tensor(y_test, dtype=torch.long)

In [164]:
# train
model = NeuralNet(input_size, hidden_size, num_classes)
# initialize model weights
for m in model.modules():
    if isinstance(m, (nn.Linear)):
        nn.init.xavier_uniform_(m.weight)
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum) 
for i in range(num_retrains):
    saved_model_state_dict = [] # save the model state dict in each iters_retrain
    for j in range(iters_retrain):
        trn_x, trn_y = get_jth_minibatach(j, batch_size, X_train, y_train)
        trn_x.to(device)
        trn_y.to(device)
        output = model(X_train)
        loss = criterion(output, y_train) # + lambda * path_length TODO
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        saved_model_state_dict.append(copy.deepcopy(model.state_dict()))
        if (i*iters_retrain + j + 1) % 10 == 0:
            print('iters: [{0}]/[{1}] loss: {2:.2f}'.format((i*iters_retrain + j + 1), num_iters, loss))

iters: [10]/[300] loss: 0.35
iters: [20]/[300] loss: 0.31
iters: [30]/[300] loss: 0.27
iters: [40]/[300] loss: 0.23
iters: [50]/[300] loss: 0.20
iters: [60]/[300] loss: 0.20
iters: [70]/[300] loss: 0.20
iters: [80]/[300] loss: 0.20
iters: [90]/[300] loss: 0.19
iters: [100]/[300] loss: 0.19
iters: [110]/[300] loss: 0.19
iters: [120]/[300] loss: 0.19
iters: [130]/[300] loss: 0.19
iters: [140]/[300] loss: 0.19
iters: [150]/[300] loss: 0.19
iters: [160]/[300] loss: 0.19
iters: [170]/[300] loss: 0.19
iters: [180]/[300] loss: 0.19
iters: [190]/[300] loss: 0.19
iters: [200]/[300] loss: 0.19
iters: [210]/[300] loss: 0.19
iters: [220]/[300] loss: 0.20
iters: [230]/[300] loss: 0.19
iters: [240]/[300] loss: 0.21
iters: [250]/[300] loss: 0.28
iters: [260]/[300] loss: 0.20
iters: [270]/[300] loss: 0.19
iters: [280]/[300] loss: 0.19
iters: [290]/[300] loss: 0.18
iters: [300]/[300] loss: 0.18


In [165]:
# test
with torch.no_grad():
    correct = 0
    total = 0
    X_test.to(device)
    y_test.to(device)
    outputs = model(X_test)
    _, predicted = torch.max(outputs.data, 1)
    total += y_test.size(0)
    correct += (predicted == y_test).sum().item()

    print('Accuracy of the network on the Breast Cancer dataset: {0} %'.format(100 * correct / total))

Accuracy of the network on the Breast Cancer dataset: 91.60839160839161 %


In [166]:
torch.save(model.state_dict(), 'dnn_model.pth')