In [82]:
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
from visdom import Visdom
import plotly.graph_objects as go
import plotly.figure_factory as ff
viz = Visdom()

Setting up a new session...


In [91]:
# 加载数据集
def load_housing_data(batch_size=256, test_size=0.3):
    df = fetch_california_housing()
    X, y = df.data, df.target
    y = y.reshape(-1, 1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    ss = StandardScaler()
    X_train = ss.fit_transform(X_train)
    X_test = ss.transform(X_test)
    # 绘制heatmap
    names = df.feature_names
    names.append('target')
    data = np.concatenate((y_train, X_train), axis=1)
    data = np.corrcoef(data, rowvar=0)
    fig = ff.create_annotated_heatmap(z=data, x=names, y=names, 
                                      annotation_text=np.around(data, decimals=2) , 
                                  colorscale='YlGnBu')
    fig.update_layout(title ='训练数据相关系数热力图')
    fig.update_xaxes(side="bottom")
    viz.plotlyplot(fig, win='heatmap')
    X_train, y_train = torch.from_numpy(X_train.astype(np.float32)), torch.from_numpy(y_train.astype(np.float32))
    X_test, y_test = torch.from_numpy(X_test.astype(np.float32)), torch.from_numpy(y_test.astype(np.float32))
    train_dataset = TensorDataset(X_train, y_train)
    return (DataLoader(train_dataset, batch_size, 
					shuffle=True), (X_test, y_test))

In [None]:
# 定义网络
class MlpNet(nn.Module):
    def __init__(self, drop_prod=0.3):
        super().__init__()
        self.blocks = nn.Sequential(nn.Linear(8, 100), nn.ReLU(),
                                    nn.Linear(100, 128), nn.Dropout(
                                        drop_prod), nn.ReLU(),
                                    nn.Linear(128, 50), nn.Dropout(
                                        drop_prod), nn.ReLU(),
                                    nn.Linear(50, 1))

    def forward(self, X):
        return self.blocks(X)


In [None]:
# 每一个训练epoch
def train_epoch(model, train_loader, optimizer, loss_fn, epoch):
    sz = len(train_loader.dataset)
    num_batches = len(train_loader)
    model.train()  # 将模型设置为训练模式
    train_loss = 0

    for batch_idx, (X, y) in enumerate(train_loader):
        pred = model(X)
        y = y.to(pred.device)
        loss = loss_fn(pred, y)
        # 梯度清零， 反向传播，更新网络参数
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # 记录损失与正确率
        train_loss += loss.item() * X.shape[0]
        if batch_idx % 20 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(X), sz,
                100. * batch_idx / num_batches, loss.item()))
    return train_loss / sz

In [None]:
def test(X_test, y_test, model):
    y_pred = model(X_test).detach().cpu()[:, 0]
    y_test = y_test.detach().cpu()[:, 0]
    mae = mean_absolute_error(y_pred, y_test)
    print(f'测试集上的绝对误差: {mae}')
    index = np.argsort(y_test)
    sc1 = go.Scatter(x=np.arange(len(y_test)), y=y_test[index], name='Original Y',
                     mode='lines')
    sc2 = go.Scatter(x=np.arange(len(y_pred)), y=y_pred[index], name='Prediction',
                     mode='markers',
                     marker=dict(size=4))
    fig = go.Figure([sc2, sc1])
    fig.update_layout(
        xaxis_title="Index",
        yaxis_title="Y",
        font=dict(
            family="Courier New, monospace",
            size=15,
            color="RebeccaPurple"
        )
    )
    viz.plotlyplot(fig, win='test')


In [None]:
net = MlpNet()
# 回归问题使用MSE作为损失函数
loss_fn = nn.MSELoss()
# 使用DP模式训练
net = nn.DataParallel(net)
# 使用AdamW更新参数
trainer = torch.optim.Adam(net.parameters(), lr=0.01)
# 获取训练数据集和测试数据集
train_iter, test_dataset = load_housing_data()
# 训练轮数
num_epochs = 40
# 记录损失和正确率
train_all_loss = []
for epoch in range(1, num_epochs + 1):
    print(f"Epoch {epoch}\n-------------------------------")
    tmp = train_epoch(net, train_iter, trainer, loss_fn, epoch)
    train_all_loss.append(tmp)

In [None]:
# 可视化训练损失
viz.line(Y=train_all_loss, X=np.arange(len(train_all_loss)),
         win='loss', opts=dict(legend=['Train loss'],
                               title='Housing Price Prediction',
                               xlabel='epoch', ylabel='loss',
                               markers=True, markersize=7))


In [None]:
test(test_dataset[0], test_dataset[1], net)