In [None]:
# banknote classification config

# 超参配置
# yaml
class Hyperparameter:
    # ################################################################
    #                             Data
    # ################################################################
    device = 'cuda'
    data_dir = './data/'
    data_path = '../input/boston-house-prices/housing.csv'
    trainset_path = './data/train.txt'
    devset_path = './data/dev.txt'
    testset_path = './data/test.txt'

    in_features = 13  # input feature dim
    out_dim = 1  # output feature dim (classes number)
    seed = 1234  # random seed

    # ################################################################
    #                             Model Structure
    # ################################################################
    layer_list = [in_features, 64, 128, 64, 32, 32, out_dim]
    # ################################################################
    #                             Experiment
    # ################################################################
    batch_size = 32
    init_lr = 1e-6
    epochs = 10000
    verbose_step = 10
    save_step = 200


HP = Hyperparameter()


In [None]:
import re
import os
import numpy as np

trainset_ratio = 0.7
devset_ratio = 0.2
testset_ratio = 0.1

for foldername in ['data', 'log', 'model_save']:
    if not os.path.exists(foldername):
        os.mkdir(foldername)

np.random.seed(HP.seed)

dataset = []

contents = open(HP.data_path).readlines()
for item in contents:
    out = re.sub(r"\s{2,}", " ", item).strip()
    dataset.append(out.split(" "))
dataset = np.array(dataset).astype(np.float32)

np.random.shuffle(dataset)
n_items = dataset.shape[0]
trainset_sum = int(trainset_ratio * n_items)
devset_sum = int(devset_ratio * n_items)
testset_sum = int(testset_ratio * n_items)

np.savetxt(HP.trainset_path, dataset[:trainset_sum], delimiter=',')
np.savetxt(HP.devset_path, dataset[trainset_sum:trainset_sum + devset_sum], delimiter=',')
np.savetxt(HP.testset_path, dataset[trainset_sum + devset_sum:], delimiter=',')


In [None]:
import numpy as np
import torch
from torch.utils.data import DataLoader

class HouseDataset(torch.utils.data.Dataset):

    def __init__(self, path):
        self.dataset = np.loadtxt(path, delimiter=",")

    def __getitem__(self, index):
        item = self.dataset[index]
        x, y = item[:-1], item[-1:]
        return torch.Tensor(x).float().to(HP.device), torch.Tensor(y).squeeze().float().to(HP.device),

    def __len__(self):
        return self.dataset.shape[0]

# 测试代码
# if __name__ =='__main__':
#
#     dataset = HouseDataset(HP.testset_path)
#     dataloader = DataLoader(dataset, batch_size=16, shuffle=True, drop_last=True)
#
#     for batch in dataloader:
#         x, y = batch
#         print(x)
#         print(y)
#         print(x.shape)
#         print(y.shape)
#         break

In [None]:
import torch


class HousePredictionModel(torch.nn.Module):

    def __init__(self):
        super(HousePredictionModel, self).__init__()
        self.layers = torch.nn.ModuleList(
            [torch.nn.Linear(in_features=in_dim, out_features=out_dim) for in_dim, out_dim in
             zip(HP.layer_list[:-1], HP.layer_list[1:])])

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
            x = torch.relu(x)
        return x


In [None]:
import os.path
import random
import torch
import numpy as np
from tensorboardX import SummaryWriter
from argparse import ArgumentParser
from torch import nn
from torch import optim
from torch.utils.data import DataLoader

logger = SummaryWriter('./log')

# seed init: 保证模型的可复现性
torch.manual_seed(HP.seed)
random.seed(HP.seed)
np.random.seed(HP.seed)
torch.cuda.manual_seed(HP.seed)


def evaluate(model, devloader, crit):
    model.eval()
    sum_loss = 0.
    with torch.no_grad():
        for batch in devloader:
            x, y = batch
            pred = model(x)
            loss = crit(pred, y)
            sum_loss += loss.item()

    model.train()
    return sum_loss / len(devloader)


def save_checkpoint(model, epoch, opt, save_path):
    save_dict = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': opt.state_dict()
    }
    torch.save(save_dict, save_path)


def train():

    model = HousePredictionModel().to(HP.device)

    criterion = nn.MSELoss()

    opt = optim.Adam(model.parameters(), lr=HP.init_lr)

    trainset = HouseDataset(HP.trainset_path)
    train_loader = DataLoader(trainset, batch_size=HP.batch_size, shuffle=True, drop_last=True)

    devset = HouseDataset(HP.devset_path)
    dev_loader = DataLoader(devset, batch_size=HP.batch_size, shuffle=True, drop_last=False)

    start_epoch, step = 0, 0

    model.train()

    for epoch in range(start_epoch, HP.epochs):
        print('Start Epoch: %d, Steps: %d' % (epoch, len(train_loader) / HP.batch_size))
        for batch in train_loader:
            x, y = batch  # 加载数据
            opt.zero_grad()  # 梯度归零
            pred = model(x)
            loss = criterion(pred, y)
            loss.backward()
            opt.step()

            logger.add_scalar('Loss/Train', loss, step)

            if not step % HP.verbose_step:
                eval_loss = evaluate(model, dev_loader, criterion)
                logger.add_scalar('Loss/Dev', eval_loss, step)

            if not step % HP.save_step:
                model_path = 'model_%d_%d.model' % (epoch, step)
                save_checkpoint(model, epoch, opt, os.path.join('model_save', model_path))

            step += 1
            logger.flush()
            print('Epoch:[%d/%d], step:%d, Train Loss:%.5f, Dev Loss:%.5f' % (
                epoch, HP.epochs, step, loss.item(), eval_loss))

    torch.save(model, "housemodel.dm")

    logger.close()

In [None]:
train()