In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import torch
import random
!nvidia-smi  

# Basic code

## Basic operation

### 0. import packages

In [None]:
from torch.utils.data import Dataset, DataLoader             # Dataset用于存放数据集，DataLoader为数据迭代器
from torch import nn                                         # 模型模块

### 1. Tensor

#### 1.1 构造tensor

In [None]:
data = [[1,2],[3,4]]
x_list = torch.tensor(data)            # 通过list构造

array = np.array(data)
x_array = torch.tensor(array)          # 通过np.array构造

x_ones = torch.ones_like(x_list)       # 通过tensor构造，构造一个维度相同的全1tensor
x_ones

In [None]:
shape = (2,3)
# 构造2*3维的全1，全0，随机tensor
torch.ones(shape)
torch.rand(shape)
torch.zeros(shape)

#### 1.2 查看tensor属性

In [None]:
x = torch.ones((2,3))
# 查看tensor的形状、数据类型、存储设备（CPU or GPU）
x.shape
print(x.dtype)
x.device

Mark: 关于tensor类型 <https://zhuanlan.zhihu.com/p/64647295>

#### 1.3 tensor操作

In [None]:
x = torch.ones((2,3))
if torch.cuda.is_available():                                  # 将tensor存储到gpu上
    x = x.to('cuda')
    print(f"Device tensor is stored on: {x.device}")
# x = torch.rand(1, 28, 28, device=device)                     # 定义tensor时指定device

In [None]:
x[:,0] = 2                        # 按索引赋值
print(x)
t = torch.cat([x, x], dim=1)      # 合并tensor
print(t)

In [None]:
print(x*x)                    # *：对应位置相乘
print(x.mul(x))               # x.mul()：对应位置相乘
print(x.matmul(x.T))          # x.matmul()：矩阵乘法
print(x @ x.T)                # @：矩阵乘法

In [None]:
x.add_(5)               # 自加操作

#### 1.4 tensor&numpy

In [None]:
t = torch.tensor([1,2])
a = t.numpy()                 # tensor->array
t.add_(1)
a                             # tensor的变化会影响array

In [None]:
a = np.array([1,2])
t = torch.from_numpy(a)    # array->tensor
t

### 2. Dataset&DataLoader

**Dataset stores the samples and their corresponding labels, and DataLoader wraps an iterable around the Dataset to enable easy access to the samples.**

#### 2.1 载入内置Dataset（Fashion MNIST）

In [None]:
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt


training_data = datasets.FashionMNIST(
    root="data",                      # 文件存储位置
    train=True,                       # 表示训练集
    download=True,                    # root路径下无数据则下载
    transform=ToTensor()              # 将数据转换为tensor
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

In [None]:
labels_map = {
    0: "T-Shirt",
    1: "Trouser",
    2: "Pullover",
    3: "Dress",
    4: "Coat",
    5: "Sandal",
    6: "Shirt",
    7: "Sneaker",
    8: "Bag",
    9: "Ankle Boot",
}
figure = plt.figure(figsize=(8, 8))
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
    sample_idx = torch.randint(len(training_data), size=(1,)).item()
    img, label = training_data[sample_idx]
    figure.add_subplot(rows, cols, i)
    plt.title(labels_map[label])
    plt.axis("off")
    plt.imshow(img.squeeze(), cmap="gray")
plt.show()

#### 2.2 自定义Dataset

In [None]:
import os
import pandas as pd
from torchvision.io import read_image

class CustomImageDataset(Dataset):           # Dataset的子类
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):    # 初始化函数
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):                       # 数据集长度函数
        return len(self.img_labels)

    def __getitem__(self, idx):              # 根据索引idx读取数据
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

#### 2.3 创建DataLoader

In [None]:
train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)         # shuffle
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

train_features, train_labels = next(iter(train_dataloader))                       # dataloader是一个迭代器                  
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
img = train_features[0].squeeze()
label = train_labels[0]
plt.imshow(img, cmap="gray")
plt.show()
print(f"Label: {label}")

### 3. 数据变换

#### 3.1 one-hot编码

In [None]:
class_num = 10
batch_size = 4
label = torch.LongTensor(batch_size, 1).random_() % class_num
# dim=1, index=label, src=torch.ones(batch_size, 1)，对原数组按照dim和index，从src中取元素进行修改
torch.zeros(batch_size, class_num).scatter_(1, label, torch.ones(batch_size, 1))      # 这里的torch.ones(batch_size, 1)也可以写为1


## Build Nerual Network

### 0. 检查GPU是否可用

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

### 1. 定义模型类

In [None]:
class NeuralNetwork(nn.Module):                     # 是nn.Module的一个子类
    def __init__(self):                             # 初始化模型
        super(NeuralNetwork, self).__init__()       # 调用父类的初始化方法
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):                          # 对输入进行的操作
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

**torch.nn是pytorch的高阶API，类似于tensorflow中的keras**  
In PyTorch, the nn package serves this same purpose. The nn package defines a set of Modules, which are roughly equivalent to neural network layers. A Module receives input Tensors and computes output Tensors, but may also hold internal state such as Tensors containing learnable parameters. The nn package also defines a set of useful loss functions that are commonly used when training neural networks.  
Read More: <https://pytorch.org/tutorials/beginner/nn_tutorial.html#neural-net-from-scratch-no-torch-nn>

In [None]:
model = NeuralNetwork().to(device)                # 将模型放到指定的device上
print(model)                                      # 打印模型结构
for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")            # 查看模型参数

In [None]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

### 2. 常用的layer

In [None]:
nn.Flatten()
nn.Linear(in_features = 10, out_features = 1)
nn.ReLU()
softmax = nn.Softmax(dim=1)
print(softmax(logits))

更多用法见<https://pytorch.org/docs/stable/nn.html>  

## AutoGrad

### 1. 一个简单案例

In [None]:
x = torch.ones(5)  # input tensor
y = torch.zeros(3)  # expected output
w = torch.randn(5, 3, requires_grad=True)            # requires_grad表示可以计算梯度
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x, w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)

loss.backward()                                     # 计算loss关于参数的梯度
print(w.grad)
print(b.grad)

A function that we apply to tensors to construct computational graph is in fact an object of class Function.   
This object knows how to compute the function in the forward direction, and also how to compute its derivative during the backward propagation step.   
A reference to the backward propagation function is stored in grad_fn property of a tensor.

We can only obtain the grad properties for the leaf nodes of the computational graph, which have requires_grad property set to True. For all other nodes in our graph, gradients will not be available.  
We can only perform gradient calculations using backward once on a given graph, for performance reasons. If we need to do several backward calls on the same graph, we need to pass retain_graph=True to the backward call.

In [None]:
# 禁止计算梯度
z = torch.matmul(x, w)+b
print(z.requires_grad)

with torch.no_grad():
    z = torch.matmul(x, w)+b
print(z.requires_grad)


z = torch.matmul(x, w)+b
z_det = z.detach()              # 返回一个从当前图中分离的变量，其不需要梯度
print(z_det.requires_grad)

**Hint: 关于requires_grad, requires_grad_(), detach(), torch.no_grad()**  
- requires_grad: 生成tensor时的参数
- requires_grad_(): tensor类的函数，使用时对tensor进行in_place更新
- detach(): tensor类的函数，使用时返回一个新的tensor对象，其requires_grad参数为False，**是原tensor的赋值引用**，共用同一内存
- torch.no_grad(): with torch.no_grad()，后的操作都不计算梯度，常用于模型推断以加快计算速度

In [None]:
x1 = torch.Tensor([1,2,3])
x2 = torch.tensor([1.0, 2.0], requires_grad=True)            # 注意这里不能用Tensor函数初始化
x1.requires_grad_()
y = x2.detach()
x1.requires_grad, x2.requires_grad, y.requires_grad

### 2. 自定义函数的反向传播

In [None]:
class GradCoeff(torch.autograd.Function):       
       
    @staticmethod
    def forward(ctx, x, coeff):                 # 模型前向
        ctx.coeff = coeff                       # 将coeff存为ctx的成员变量
        #return x.view_as(x)
        return x

    @staticmethod
    def backward(ctx, grad_output):             # 模型梯度反传
        return ctx.coeff * grad_output, None    # backward的输出个数，应与forward的输入个数相同，此处coeff不需要梯度，因此返回None

# 尝试使用
x = torch.tensor([2.], requires_grad=True)
ret = GradCoeff.apply(x, -0.1)                  # 前向需要同时提供x及coeff，设置coeff为-0.1
ret = ret ** 3                          
print(ret)                                      # tensor([4.], grad_fn=<PowBackward0>)
ret.backward()  
print(x.grad)                                   # tensor([-0.4000])，梯度已乘以相应系数

In [None]:
class CustomLayer(nn.Module):
    def __init__(self):
        super(CustomLayer, self).__init__()
    def forward(self, input):
        # See the autograd section for explanation of what happens here.
        return GradCoeff.apply(input, -0.1)

In [None]:
layer = CustomLayer()
x = layer(torch.tensor([1.], requires_grad=True))
x

Mark: Finetune <https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html>

## Optimizing Model Parameters

### 1. Define Hyperparameters

In [None]:
learning_rate = 1e-3
batch_size = 64
epochs = 5

Mark: Hyperparameter tuning <https://pytorch.org/tutorials/beginner/hyperparameter_tuning_tutorial.html>

### 2. Loss function

In [None]:
loss_fn = nn.CrossEntropyLoss()        # 注意：使用crossentropyloss时不需要softmax层

### 3. Optimizer

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

Inside the training loop, optimization happens in three steps:
- Call optimizer.zero_grad() to reset the gradients of model parameters. Gradients by default add up; to prevent double-counting, we explicitly zero them at each iteration.（每个batch要重置梯度）
- Backpropagate the prediction loss with a call to loss.backwards(). PyTorch deposits the gradients of the loss w.r.t. each parameter.（反向传播梯度）
- Once we have our gradients, we call optimizer.step() to adjust the parameters by the gradients collected in the backward pass.（更新参数）

Mark: Optimizer <https://pytorch.org/docs/stable/optim.html>

### 4. Train&Test Loop

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):         # 表示一个epoch的训练过程
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):                # batch的最大值=len(dataset)/batch_size
        pred = model(X)
        loss = loss_fn(pred, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

the key parts of training are: 
- A network is created.

- An optimizer (in this case, a stochastic gradient descent optimizer) is created, and the network’s parameters are associated with it- .

- A training loop…
 - acquires an input,
 - runs the network,
 - computes a loss,
 - zeros the network’s parameters’ gradients,
 - calls loss.backward() to update the parameters’ gradients,
 - calls optimizer.step() to apply the gradients to the parameters.

### 5. Full Implementation

In [None]:
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork()

learning_rate = 1e-3
batch_size = 64
epochs = 5

loss_fn = nn.CrossEntropyLoss()  
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

model.train()
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

**注意训练模式和评估模式的切换**  
Note that the above process is done entirely while the network module is in “training mode”.   
Modules default to training mode and can be switched between training and evaluation modes using train() and eval(). They can behave differently depending on which mode they are in.   
For example, **the BatchNorm module maintains a running mean and variance during training that are not updated when the module is in evaluation mode.**  
In general, modules should be in training mode during training and only switched to evaluation mode for inference or evaluation.   

In [None]:
# 判断模型是否处于训练状态
class ModalModule(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self, x):
        if self.training:
      # Add a constant only in training mode.
            return x + 1.
        else:
            return x

## Save&Load Model

### 1. 保存模型参数

In [None]:
import torchvision.models as models

model = models.vgg16(pretrained=True)
torch.save(model.state_dict(), 'model_weights.pth')           # model.state_dict()得到的是一个存放模型参数的字典
model = models.vgg16() # we do not specify pretrained=True, i.e. do not load default weights
model.load_state_dict(torch.load('model_weights.pth'))
model.eval()

### 2. 保存整个模型（参数+结构）

In [None]:
torch.save(model, 'model.pth')
model = torch.load('model.pth')

### 3. 保存checkpoint

Saving and loading a general checkpoint model for inference or resuming training can be helpful for picking up where you last left off.   
When saving a general checkpoint, you must save more than just the model’s state_dict.   
- It is important to also save the **optimizer’s state_dict**, as this contains buffers and parameters that are updated as the model trains.   
- Other items that you may want to save are **the epoch you left off on, the latest recorded training loss, external torch.nn.Embedding layers**, and more, based on your own algorithm.

In [None]:
# 定义并初始化模型
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.linear = nn.Sequential(
            nn.BatchNorm1d(4),        # 对输入进行归一化
            nn.Linear(4, 3)
        )

    def forward(self, x):
        logits = self.linear(x)
        return logits
    
model = NeuralNetwork().double()
model = model.to(device)

optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

EPOCH = 5
PATH = "model.pt"
LOSS = 0.4

# 存储checkpoint
torch.save({
            'epoch': EPOCH,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': LOSS,
            }, PATH)

In [None]:
# 读取checkpoint
model = NeuralNetwork().double()
model = model.to(device)
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

model.eval()

# Example

## A manual Logistic Regression (AS an example of custom nn.Module)

### 1. Load data, create dataset&dataloader

In [None]:
from sklearn.datasets import load_breast_cancer
from torch.utils.data import TensorDataset, random_split

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))
# !nvidia-smi      # 查看gpu信息

def minmax_scaler(data):
    min_value = data.min(axis=0)
    max_value = data.max(axis=0)
    return (data-min_value)/(max_value-min_value)

cancer_data = load_breast_cancer()
x, y = torch.Tensor(minmax_scaler(cancer_data.data)).to(device), torch.Tensor(cancer_data.target).to(device) 



In [None]:
dataset = TensorDataset(x,y)   # 生成dataset对象

# 随机划分训练集与测试集
train_size, test_size = int(0.7*len(dataset)), len(dataset)-int(0.7*len(dataset))
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

from torch.utils.data import DataLoader
batch_size = 16
train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size, shuffle=True)

### 2. Define Model

In [None]:
from torch import nn
class LR(nn.Module):
    def __init__(self, in_features):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_features, 1))
        self.bias = nn.Parameter(torch.randn(1))
    def linear(self, inputs):
        logits = inputs@self.weight+self.bias
        return logits.to(device)
    def sigmoid(self, logits):
        return 1/(1+torch.exp(-logits))
    def forward(self, inputs):
        prob = self.sigmoid(self.linear(inputs))
        prob = prob.squeeze(-1)
        return prob

In [None]:
# More complex model (with mlp feature extractor)
net = nn.Sequential(
    nn.Linear(30, 10),
    nn.ReLU(10),
    nn.BatchNorm1d(10),
    LR(10)
)
lr_model = net.to(device)

In [None]:
# More complex model (with mlp feature extractor and custom module)
class net(nn.Module):
    def __init__(self, in_features):
        super().__init__()
        self.fe = nn.Sequential(
            nn.Linear(in_features, 20),
            nn.ReLU(20),
            nn.Dropout(p=0.2),
            nn.BatchNorm1d(20),
        )
        self.lr = LR(20)
    def forward(self, inputs):
        x = self.fe(inputs)
        prob = self.lr(x)
        return prob

### 2.1 Initialize Model Parameters

In [None]:
# Initialize model parameter
# Note that no_grad() is used here to avoid tracking this computation in the autograd graph.
@torch.no_grad()
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_normal_(m.weight)
        m.bias.fill_(0.0)

### 2.2 Module hooks

In [None]:
# Not finished

### 3. Define train&test loop

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    train_loss = 0
    for batch, (X, y) in enumerate(dataloader):
        # 计算预测值与损失
        pred = model(X)
        loss = loss_fn(pred, y)
        
        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 5 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:.3f}  [{current:>5d}/{size:>5d}]")
        train_loss += loss_fn(pred, y).item()
    return train_loss / num_batches                # 返回一个epoch内的平均训练损失

def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (torch.round(pred)==y).type(torch.float).sum().item()
            
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:.3f} \n")
    return test_loss

### 4. Train the model

In [None]:
torch.manual_seed(1)
torch.cuda.manual_seed(1)               # 设定随机种子以复现实验结果（相同seed下网络初始化相同）

feature_size = len(train_dataset[0][0])
lr_model = LR(feature_size).to(device)
# lr_model.to(dtype=torch.float32)      # 改变模型精度
# lr_model = net(feature_size).to(device)

# Apply the function recursively on the module and its submodules.
# lr_model.apply(init_weights)

loss_fn = torch.nn.BCELoss()    # 经过sigmoid
learning_rate = 1e-2
optimizer = torch.optim.Adam(lr_model.parameters(), lr=learning_rate)
train_loss, test_loss = np.array([]), np.array([])


lr_model.train()
if lr_model.training:
    print('Under training mode')
epochs = 30
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    epoch_train_loss = train_loop(train_dataloader, lr_model, loss_fn, optimizer)
    epoch_test_loss = test_loop(test_dataloader, lr_model, loss_fn)
    train_loss = np.append(train_loss, epoch_train_loss)
    test_loss = np.append(test_loss, epoch_test_loss)
print("Done!")

In [None]:
# 查看自定义module结构
for child in lr_model.named_children():
    print(child)
# print(lr_model)

In [None]:
import matplotlib.pyplot as plt
plt.plot(np.linspace(1,epochs,epochs), train_loss)
plt.plot(np.linspace(1,epochs,epochs), test_loss)

### 5. Predict result on test data

In [None]:
lr_model.eval()
with torch.no_grad():
    prob = lr_model(test_dataset[:][0])
    y_pred = (prob>0.5).type(torch.float)           
print(f"Predicted class: {y_pred}")

t = lambda x1, x2: int(x1==x2)
accuracy = sum(list(map(t, y_pred, test_dataset[:][1])))/len(list(map(t, y_pred, test_dataset[:][1])))
print(f"accuracy: {accuracy:.3f}")

## Example for classification（Iris dataset）

### 0. Get device for training

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))
# !nvidia-smi      # 查看gpu信息

### 1.1 Load Dataset

In [None]:
from sklearn.datasets import load_iris
from torch.utils.data import TensorDataset, random_split

iris_data = load_iris()
# 将Numpy数组转换为tensor，同时将数据转移至指定的device上
x, y = torch.Tensor(iris_data.data).to(device), torch.tensor(iris_data.target).to(device)        
dataset = TensorDataset(x,y)   # 生成dataset对象

# 随机划分训练集与测试集
train_size, test_size = int(0.7*len(dataset)), int(0.3*len(dataset))
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

**注意：大写的torch.Tensor()得到的是float32类型的tensor，与nn的默认参数类型相同，否则会报错**

### 1.2 Define DataLoader

In [None]:
from torch.utils.data import DataLoader
batch_size = 16
train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size, shuffle=True)

### 2. Define Model

#### 2.1 Define the network (Use nn.Sequential)

In [None]:
from torch import nn
"""
net是一个Sequential类的实例，其为串联在一起的多个层定义了一个容器
nn.Linear表示全连接层
"""
net = nn.Sequential(
    nn.Linear(4, 3)       # 标签共三类，使用Crossentropyloss，不需要softmax层
)
# net = net.double()
model = net.to(device)    # 将模型放在指定的device上

#### 2.2 Define the network (Use subclass of nn.Module)

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.linear = nn.Sequential(
            nn.BatchNorm1d(4),        # 对输入进行归一化
            nn.Linear(4, 3)
        )

    def forward(self, x):
        logits = self.linear(x)
        return logits
    
#model = NeuralNetwork().double()
model = NeuralNetwork()
model = model.to(device)

#### 2.3 Define the network (Combine model, loss, lr, optimizer in a model class)

In [None]:
class MyNN(nn.Module):
    def __init__(self):
        super(MyNN, self).__init__()
        self.linear = nn.Sequential(
            nn.BatchNorm1d(4),        # 对输入进行归一化
            nn.Linear(4, 3)
        )
        self.loss_fn = nn.CrossEntropyLoss()
        self.lr = 1e-2
        self.optimizer = torch.optim.SGD(self.parameters(), lr=self.lr)

    def forward(self, x):
        logits = self.linear(x)
        return logits
    
#model = MyNN().double()
model = MyNN()
model = model.to(device)

### 3. Define the train loop

#### 3.1 Using pure model (without loss&optimizer)

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    train_loss = 0
    for batch, (X, y) in enumerate(dataloader):
        # 计算预测值与损失
        pred = model(X)
        loss = loss_fn(pred, y)
        
        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 5 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:.3f}  [{current:>5d}/{size:>5d}]")
        train_loss += loss_fn(pred, y).item()
    return train_loss / num_batches                # 返回一个epoch内的平均训练损失

def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:.3f} \n")
    return test_loss

#### 3.2 Using customed model (with loss&optimizer)

In [None]:
def train_loop(dataloader, model):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    train_loss = 0
    for batch, (X, y) in enumerate(dataloader):
        # 计算预测值与损失
        pred = model(X)
        loss = model.loss_fn(pred, y)
        
        # 反向传播
        model.optimizer.zero_grad()
        loss.backward()
        model.optimizer.step()
        
        if batch % 5 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:.3f}  [{current:>5d}/{size:>5d}]")
        train_loss += model.loss_fn(pred, y).item()
    return train_loss / num_batches                # 返回一个epoch内的平均训练损失

def test_loop(dataloader, model):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += model.loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:.3f} \n")
    return test_loss

### 4. Train the defined network

#### 4.1 Train with pure model

In [None]:
# Define model hyperparameters
learning_rate = 1e-2

# Define loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
train_loss, test_loss = np.array([]), np.array([])

model.train()
epochs = 30
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    epoch_train_loss = train_loop(train_dataloader, model, loss_fn, optimizer)
    epoch_test_loss = test_loop(test_dataloader, model, loss_fn)
    train_loss = np.append(train_loss, epoch_train_loss)
    test_loss = np.append(test_loss, epoch_test_loss)
print("Done!")

#### 4.2 Train with customed model(with loss&optimizer)

In [None]:
epochs = 30
train_loss, test_loss = np.array([]), np.array([])

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    epoch_train_loss = train_loop(train_dataloader, model)
    epoch_test_loss = test_loop(test_dataloader, model)
    train_loss = np.append(train_loss, epoch_train_loss)
    test_loss = np.append(test_loss, epoch_test_loss)
print("Done!")

In [None]:
# 画训练曲线
import matplotlib.pyplot as plt
plt.plot(np.linspace(1,epochs,epochs), train_loss)
plt.plot(np.linspace(1,epochs,epochs), test_loss)

### 5. Get the predict value

In [None]:
model.eval()
with torch.no_grad():
    logits = model(test_dataset[:][0])
    pred_probab = nn.Softmax(dim=1)(logits)
    y_pred = pred_probab.argmax(1)             # argmax(1)返回数组各个横轴上最大值的索引
print(f"Predicted class: {y_pred}")

t = lambda x1, x2: int(x1==x2)
accuracy = sum(list(map(t, y_pred, test_dataset[:][1])))/len(list(map(t, y_pred, test_dataset[:][1])))
print(f"accuracy: {accuracy:.3f}")

## Example for regression (California Housing dataset)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))
# !nvidia-smi      # 查看gpu信息

### Load Dataset

In [None]:
from sklearn.datasets import fetch_california_housing
from torch.utils.data import TensorDataset, random_split

X,y = fetch_california_housing(return_X_y=True)
print(f"The shape of input is: {len(X[0])}")
# 将Numpy数组转换为tensor，同时将数据转移至指定的device上 
dataset = TensorDataset(torch.Tensor(X).to(device),torch.Tensor(y).to(device))   # 生成dataset对象

# 随机划分训练集与测试集
train_size, test_size = int(0.7*len(dataset)), len(dataset)-int(0.7*len(dataset))
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])


### Define DataLoader

In [None]:
from torch.utils.data import DataLoader
batch_size = 64
train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size, shuffle=True)

### Define model

#### 2.1 Define the network (Use nn.Sequential)

In [None]:
from torch import nn
"""
net是一个Sequential类的实例，其为串联在一起的多个层定义了一个容器
nn.Linear表示全连接层
"""
net = nn.Sequential(
    nn.BatchNorm1d(8),
    nn.Linear(8, 16),       
    nn.ReLU(16),
    nn.BatchNorm1d(16),
    nn.Linear(16,1)
)
# net = net.double()        # 网络参数类型为double型（与输入相同）
model = net.to(device)    # 将模型放在指定的device上

#### 2.2 Define the network (Use subclass of nn.Module)

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.linear_relu = nn.Sequential(
            nn.BatchNorm1d(8),        # 对输入进行归一化
            nn.Linear(8, 16),
            nn.ReLU(16),           
            nn.BatchNorm1d(16),
            nn.Linear(16,1)
        )

    def forward(self, x):
        logits = self.linear_relu(x)
        logits = logits.squeeze(-1)
        return logits
    
#model = NeuralNetwork().double()
model = NeuralNetwork()
model = model.to(device)

### 3. Define the train loop

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    train_loss = 0
    for batch, (X, y) in enumerate(dataloader):
        # 计算预测值与损失
        pred = model(X)                    # why need reshape?
        loss = loss_fn(pred, y)
        
        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 50 == 0:
            loss, current = loss.item(), batch * len(X)                    # 当前batch的train_loss
            print(f"Train loss: {loss:.3f}  [{current:>5d}/{size:>5d}]")
        train_loss += loss_fn(pred, y).item()
    return train_loss / num_batches                # 返回一个epoch内的平均训练损失

def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss = 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()

    test_loss /= num_batches
    print(f"Test MSE Error: {test_loss:.3f} \n")
    return test_loss

### 4. Train the model

In [None]:
# Define model hyperparameters
learning_rate = 1e-3

# Define loss and optimizer
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
train_loss, test_loss = np.array([]), np.array([])

model.train()
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    epoch_train_loss = train_loop(train_dataloader, model, loss_fn, optimizer)
    epoch_test_loss = test_loop(test_dataloader, model, loss_fn)
    print(f"Test MSE Error: {epoch_test_loss:.3f} \n")
    train_loss = np.append(train_loss, epoch_train_loss)
    test_loss = np.append(test_loss, epoch_test_loss)
print("Done!")

In [None]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
model.eval()
y_pred = model(test_dataset[:][0]).cpu().detach().numpy()
y_test = test_dataset[:][1].cpu().detach().numpy()
print(f"R2 score: {r2_score(y_test, y_pred):.3f}")
print(f"MSE: {mean_squared_error(y_pred, y_test):.3f}")

fig = plt.figure(figsize=(3,3), dpi=300)
ax = fig.add_subplot(111)
#ax.set(ylabel='AI', xlabel='样本点')
ax.scatter(y_test, y_pred, s=2, color='mediumblue',alpha=0.2)
x = np.linspace(0, 5, 5)
ax.plot(x,x,'k')
plt.xlabel('true', fontsize=15)
plt.ylabel('predict', fontsize=15)
#plt.legend(['True','Predict'], fontsize=15)
plt.grid(True) 

In [None]:
# 画训练曲线
import matplotlib.pyplot as plt
plt.plot(np.linspace(1,epochs,epochs), train_loss)
plt.plot(np.linspace(1,epochs,epochs), test_loss)

In [None]:
torch.save(model, 'model.pth')

## Example for Sequence Model

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))
# !nvidia-smi      # 查看gpu信息

### Load dataset

In [None]:
from sklearn.preprocessing import minmax_scale
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

data_1 = pd.DataFrame()
file_path = '../input/nh3-dann/'
value_name = ['FI3024', 'PDI3012', 'PIC3030', 'TI3037',
              'TI3038', 'TI4043', 'PI4043', 'FIC3004', 
              'FI4040', 'PI4040', 'TI4040', 'TI4042', 'PI4042', 'AI3012']
for name in value_name:
    data_1[name] = pd.read_csv(file_path+name+'.csv').value
data_1.dropna(inplace=True)
data_1.describe()

In [None]:
Data_1 = pd.read_csv('../input/nh3dann2/10-02-11-02_1.csv')
Data_2 = pd.read_csv('../input/nh3dann2/10-02-11-02_2.csv')
Data_3 = pd.read_csv('../input/nh3dann2/10-02-11-02_3.csv')

Data_AI12 = Data_2[Data_2.tag=='URE2.AI3012.PV'].value
Data_AI12.index = np.arange(0,len(Data_AI12))
Data_FI3024 = Data_3[Data_3.tag=='URE2.FI3024.PV'].value
Data_FI3024.index = np.arange(0,len(Data_FI3024))
Data_PDI3012 = Data_3[Data_3.tag=='URE2.PDI3012.PV'].value
Data_PDI3012.index = np.arange(0,len(Data_PDI3012))
Data_PIC3030 = Data_2[Data_2.tag=='URE2.PIC3030.PV'].value
Data_PIC3030.index = np.arange(0,len(Data_PIC3030))
Data_TI3037 = Data_3[Data_3.tag=='URE2.TI3037.PV'].value
Data_TI3037.index = np.arange(0,len(Data_TI3037))
Data_TI3038 = Data_2[Data_2.tag=='URE2.TI3038.PV'].value
Data_TI3038.index = np.arange(0,len(Data_TI3038))
Data_TI4043 = Data_3[Data_3.tag=='URE2.TI4043.PV'].value
Data_TI4043.index = np.arange(0,len(Data_TI4043))
Data_PI4043 = Data_3[Data_3.tag=='URE2.PI4043.PV'].value
Data_PI4043.index = np.arange(0,len(Data_PI4043))
Data_FIC3004 = Data_3[Data_3.tag=='URE2.FIC3004.PV'].value
Data_FIC3004.index = np.arange(0,len(Data_FIC3004))
Data_FI4040 = Data_3[Data_3.tag=='URE2.FI4040.PV'].value
Data_FI4040.index = np.arange(0,len(Data_FI4040))
Data_PI4040 = Data_3[Data_3.tag=='URE2.PI4040.PV'].value
Data_PI4040.index = np.arange(0,len(Data_PI4040))
Data_TI4040 = Data_3[Data_3.tag=='URE2.TI4040.PV'].value
Data_TI4040.index = np.arange(0,len(Data_TI4040))
Data_TI4042 = Data_3[Data_3.tag=='URE2.TI4042.PV'].value
Data_TI4042.index = np.arange(0,len(Data_TI4042))
Data_PI4042 = Data_3[Data_3.tag=='URE2.PI4042.PV'].value
Data_PI4042.index = np.arange(0,len(Data_PI4042))

Data_10 = pd.DataFrame({'FI3024':Data_FI3024, 'PDI3012':Data_PDI3012,
                         'PIC3030':Data_PIC3030, 'TI3037':Data_TI3037,
                         'TI3038':Data_TI3038, 'TI4043':Data_TI4043,
                         'PI4043':Data_PI4043, 'FIC3004':Data_FIC3004,
                         'FI4040':Data_FI4040, 'PI4040':Data_PI4040,
                         'TI4040':Data_TI4040, 'TI4042':Data_TI4042,
                         'PI4042':Data_PI4042, 'AI3012':Data_AI12})
data_2 = Data_10.dropna()
data_2.describe()

In [None]:
def MinMax(df):
    s_df = pd.DataFrame(columns=df.columns)
    for col in df.columns:
        s_value = minmax_scale(df[col])
        s_value = s_value.flatten()
        s_df[col] = s_value
    return s_df
# 去除3sigma以外的异常值
def RemoveOutlier(df):
    for col in df.columns:
        df = df[abs(df[col]-df[col].mean())<=3*df[col].std()]
    return df

In [None]:
data_1 = RemoveOutlier(data_1)
data_2 = RemoveOutlier(data_2)
data_source = data_1.iloc[130000:150000, :]
data_target = data_2.iloc[20000:38500, :]       # target:20000-35000, test:35000-38500

data_train = MinMax(data_source)
data_test = MinMax(data_target)

X_train = data_train.iloc[:, :13].values
y_train = data_train.iloc[:, 13].values
X_test = data_test.iloc[15000:, :13].values
y_test = data_test.iloc[15000:, 13].values

min_y, max_y = min(y_test), max(y_test)

### Slide Window Process

In [None]:
def create_window(X, y, window_len):
    data_len = len(X)
    feature_len = len(X[0])
    result = []
    for index in range(data_len-window_len+1):
        result.append(X[index: index+window_len])
    result = np.array(result)
    X_window = np.reshape(result, (result.shape[0], result.shape[1], feature_len))
    y_window = y[window_len-1:]
    return X_window, y_window

In [None]:
window_len = 3
X_lstm_train, y_lstm_train = create_window(X_train, y_train, window_len)
X_lstm_test, y_lstm_test = create_window(X_test, y_test, window_len)

### Create dataset&dataloader

In [None]:
from torch.utils.data import TensorDataset
train_dataset = TensorDataset(torch.Tensor(X_lstm_train).to(device),torch.Tensor(y_lstm_train).to(device)) 
test_dataset = TensorDataset(torch.Tensor(X_lstm_test).to(device),torch.Tensor(y_lstm_test).to(device)) 

In [None]:
from torch.utils.data import DataLoader
batch_size = 128
train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size, shuffle=True)

### Define Model

In [None]:
rnn = nn.LSTM(13, 128, 1, batch_first=True)          # (feature_size, hidden_size, num_layers)
inputs = torch.randn(128, 3, 13)    # (sequence length, batch_size, feature_size)/(batch_size, sequence length, feature_size) with batch_first=True
output, _  = rnn(inputs)              # (sequence length, batch_size, output_size), (hidden_state, cell_state)

In [None]:
from torch import nn
class lstm(nn.Module):
    def __init__(self):
        super(lstm, self).__init__()
        self.fe = nn.Sequential(
            nn.LSTM(13, 128, 1, batch_first=True)
        )
        self.predictor = nn.Sequential(
            nn.Dropout(p=0.25),
            nn.BatchNorm1d(128),
            nn.Linear(128, 1)
        )

    def forward(self, x):
        feature, _ = self.fe(x)
        pred = self.predictor(feature[:,-1])
        pred = pred.squeeze(-1)
        return pred

### Define the train loop

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    train_loss = 0
    for batch, (X, y) in enumerate(dataloader):
        # 计算预测值与损失
        pred = model(X)                    
        loss = loss_fn(pred, y)
        
        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 50 == 0:
            loss, current = loss.item(), batch * len(X)                    # 当前batch的train_loss
            print(f"Train loss: {loss:.3f}  [{current:>5d}/{size:>5d}]")
        train_loss += loss_fn(pred, y).item()
    return train_loss / num_batches                # 返回一个epoch内的平均训练损失

def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss = 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()

    test_loss /= num_batches
    print(f"Test MSE Error: {test_loss:.3f} \n")
    return test_loss

### Train the model

In [None]:
model = lstm().to(device)
# Define model hyperparameters
learning_rate = 3e-4

# Define loss and optimizer
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
train_loss, test_loss = np.array([]), np.array([])

model.train()
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    epoch_train_loss = train_loop(train_dataloader, model, loss_fn, optimizer)
    epoch_test_loss = test_loop(test_dataloader, model, loss_fn)
    print(f"Test MSE Error: {epoch_test_loss:.3f} \n")
    train_loss = np.append(train_loss, epoch_train_loss)
    test_loss = np.append(test_loss, epoch_test_loss)
print("Done!")

In [None]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
model.eval()
y_pred = model(test_dataset[:][0]).cpu().detach().numpy()
y_test = test_dataset[:][1].cpu().detach().numpy()
print(f"R2 score: {r2_score(y_test, y_pred):.3f}")
print(f"MSE: {mean_squared_error(y_pred, y_test):.3f}")

fig = plt.figure(figsize=(10,5), dpi=300)
ax = fig.add_subplot(111)
ax.set(ylabel='AI', xlabel='样本点')
ax.plot(y_test, marker='.', color='mediumblue')
ax.plot(y_pred, marker='.', color='red')
plt.xlabel('true', fontsize=15)
plt.ylabel('predict', fontsize=15)
#plt.legend(['True','Predict'], fontsize=15)
plt.grid(True) 

In [None]:
# 画训练曲线
import matplotlib.pyplot as plt
plt.plot(np.linspace(1,epochs,epochs), train_loss)
plt.plot(np.linspace(1,epochs,epochs), test_loss)

## Example for Autoencoder

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))
# !nvidia-smi      # 查看gpu信息

### Load data

In [None]:
from sklearn.datasets import load_breast_cancer
from torch import nn
from torch.utils.data import TensorDataset, random_split

def minmax_scaler(data):
    min_value = data.min(axis=0)
    max_value = data.max(axis=0)
    return (data-min_value)/(max_value-min_value)

cancer_data = load_breast_cancer()
x, y = torch.Tensor(minmax_scaler(cancer_data.data)).to(device), torch.Tensor(cancer_data.target).to(device) 

In [None]:
dataset = TensorDataset(x,y)   # 生成dataset对象

# 随机划分训练集与测试集
train_size, test_size = int(0.7*len(dataset)), len(dataset)-int(0.7*len(dataset))
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

from torch.utils.data import DataLoader
batch_size = 16
train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size, shuffle=True)

### Define Model

In [None]:
class autoencoder(nn.Module):
    def __init__(self, in_features):
        super(autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(in_features, 16),
            nn.ReLU(True)
        )
        self.decoder = nn.Sequential(
            nn.Linear(16, in_features),
            nn.ReLU(True)
        )
    def forward(self, inputs):
        encoding = self.encoder(inputs)
        rec = self.decoder(encoding)
        return encoding, rec

### Define train&test loop

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    train_loss = 0
    for batch, (X, y) in enumerate(dataloader):
        # 计算预测值与损失
        encoding, rec = model(X)
        loss = loss_fn(rec, X)
        
        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 5 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:.3f}  [{current:>5d}/{size:>5d}]")
        train_loss += loss_fn(rec, X).item()
    return train_loss / num_batches                # 返回一个epoch内的平均训练损失

def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            encoding, rec = model(X)
            test_loss += loss_fn(rec, X).item()
            
            
    test_loss /= num_batches
    print(f"Test Error: \n Avg loss: {test_loss:.3f} \n")
    return test_loss

### Train the model

In [None]:
torch.manual_seed(2)
torch.cuda.manual_seed(2)               # 设定随机种子以复现实验结果（相同seed下网络初始化相同）

feature_size = len(train_dataset[0][0])
model = autoencoder(feature_size).to(device)


loss_fn = torch.nn.MSELoss()
learning_rate = 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
train_loss, test_loss = np.array([]), np.array([])


model.train()
if model.training:
    print('Under training mode')
epochs = 20
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    epoch_train_loss = train_loop(train_dataloader, model, loss_fn, optimizer)
    epoch_test_loss = test_loop(test_dataloader, model, loss_fn)
    train_loss = np.append(train_loss, epoch_train_loss)
    test_loss = np.append(test_loss, epoch_test_loss)
print("Done!")

In [None]:
import matplotlib.pyplot as plt
plt.plot(np.linspace(1,epochs,epochs), train_loss)
plt.plot(np.linspace(1,epochs,epochs), test_loss)

### Use the encodings to predict

In [None]:
model.eval()
train_encodings,_ = model(train_dataset[:][0])
train_encodings = train_encodings.detach()
test_encodings,_ = model(test_dataset[:][0])
test_encodings = test_encodings.detach()

train_dataset = TensorDataset(train_encodings,train_dataset[:][1])   # 生成dataset对象
test_dataset = TensorDataset(test_encodings, test_dataset[:][1]) 

# 随机划分训练集与测试集
batch_size = 16
train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size, shuffle=True)

In [None]:
class lr(nn.Module):
    def __init__(self, in_features):
        super(lr, self).__init__()
        self.linear = nn.Sequential(
            nn.BatchNorm1d(in_features),        # 对输入进行归一化
            nn.Linear(in_features, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        logits = self.linear(x)
        logits = logits.squeeze(-1)
        return logits

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    train_loss = 0
    for batch, (X, y) in enumerate(dataloader):
        # 计算预测值与损失
        pred = model(X)
        loss = loss_fn(pred, y)
        
        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 5 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:.3f}  [{current:>5d}/{size:>5d}]")
        train_loss += loss_fn(pred, y).item()
    return train_loss / num_batches                # 返回一个epoch内的平均训练损失

def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (torch.round(pred)==y).type(torch.float).sum().item()
            
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:.3f} \n")
    return test_loss

In [None]:
torch.manual_seed(2)
torch.cuda.manual_seed(2)               # 设定随机种子以复现实验结果（相同seed下网络初始化相同）

feature_size = len(train_dataset[0][0])
lr_model = lr(feature_size).to(device)


loss_fn = torch.nn.BCELoss()
learning_rate = 1e-3
optimizer = torch.optim.Adam(lr_model.parameters(), lr=learning_rate)
train_loss, test_loss = np.array([]), np.array([])


lr_model.train()
if lr_model.training:
    print('Under training mode')
epochs = 30
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    epoch_train_loss = train_loop(train_dataloader, lr_model, loss_fn, optimizer)
    epoch_test_loss = test_loop(test_dataloader, lr_model, loss_fn)
    train_loss = np.append(train_loss, epoch_train_loss)
    test_loss = np.append(test_loss, epoch_test_loss)
print("Done!")


In [None]:
# 画训练曲线
import matplotlib.pyplot as plt
plt.plot(np.linspace(1,epochs,epochs), train_loss)
plt.plot(np.linspace(1,epochs,epochs), test_loss)

In [None]:
lr_model.eval()
with torch.no_grad():
    prob = lr_model(test_dataset[:][0])
    y_pred = (prob>0.5).type(torch.float)           # argmax(1)返回数组各个横轴上最大值的索引
print(f"Predicted class: {y_pred}")

t = lambda x1, x2: int(x1==x2)
accuracy = sum(list(map(t, y_pred, test_dataset[:][1])))/len(list(map(t, y_pred, test_dataset[:][1])))
print(f"accuracy: {accuracy:.3f}")

## Example for Finetune

### 1. Load Model

In [None]:
model = torch.load('model.pth')
print(model) 

### 2. 将指定层设为参数更新，其余层参数不更新

In [None]:
for param in model.parameters():
    param.requires_grad = False
for param in model.linear_relu[4].parameters():
    param.requires_grad = True

In [None]:
# 检查各层参数
for child in model.children():
    print(child)
    for param in model.parameters():
        print(param.requires_grad)
        # print(param)

### 3. 将优化器设置为只更新需要更新的部分参数

In [None]:
optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)
optimizer

### 4. 重新训练模型

In [None]:
# 增量训练集
new_x, new_y = np.array([]), np.array([])
for x, y in train_dataset:
    if 4 < y < 5:
        new_x = np.append(new_x, x.cpu().numpy())
        new_y = np.append(new_y, y.cpu().numpy())
new_x = new_x.reshape(-1,8)
new_train_dataset = TensorDataset(torch.Tensor(new_x).to(device),torch.Tensor(new_y).to(device))

batch_size = 64
new_train_dataloader = DataLoader(new_train_dataset, batch_size, shuffle=True)

In [None]:
loss_fn = nn.MSELoss()
train_loss, test_loss = np.array([]), np.array([])

model.train()
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    epoch_train_loss = train_loop(new_train_dataloader, model, loss_fn, optimizer)
    epoch_test_loss = test_loop(test_dataloader, model, loss_fn)
    print(f"Test MSE Error: {epoch_test_loss:.3f} \n")
    train_loss = np.append(train_loss, epoch_train_loss)
    test_loss = np.append(test_loss, epoch_test_loss)
print("Done!")

In [None]:
import matplotlib.pyplot as plt
plt.plot(np.linspace(1,epochs,epochs), train_loss)
plt.plot(np.linspace(1,epochs,epochs), test_loss)

In [None]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
model.eval()
y_pred = model(test_dataset[:][0]).cpu().detach().numpy()
y_test = test_dataset[:][1].cpu().detach().numpy()
print(f"R2 score: {r2_score(y_test, y_pred):.3f}")
print(f"MSE: {mean_squared_error(y_pred, y_test):.3f}")

fig = plt.figure(figsize=(3,3), dpi=300)
ax = fig.add_subplot(111)
#ax.set(ylabel='AI', xlabel='样本点')
ax.scatter(y_test, y_pred, s=2, color='mediumblue',alpha=0.2)
x = np.linspace(0, 5, 5)
ax.plot(x,x,'k')
plt.xlabel('true', fontsize=15)
plt.ylabel('predict', fontsize=15)
#plt.legend(['True','Predict'], fontsize=15)
plt.grid(True) 

## Example for Embedding（MF）

In [None]:
import numpy as np
import torch
from torch import nn 
from scipy.sparse import rand as sprand

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))
# !nvidia-smi      # 查看gpu信息


### 1. 生成测试数据

In [None]:
n_users = 100
n_items = 100
ratings = sprand(n_users, n_items, density=0.1, format="csr")
ratings.data = np.random.randint(1, 5, size=ratings.nnz).astype(np.float64)
ratings = ratings.toarray()

### 2. 定义模型

In [None]:
class MF(nn.Module):
    def __init__(self, n_users, n_items, embedding_dim=8):
        super(MF, self).__init__()
        self.user_emb = nn.Embedding(n_users, embedding_dim, sparse=True)   # 把每个用户映射为一个embedding_dim维的稠密向量
        self.item_emb = nn.Embedding(n_items, embedding_dim, sparse=True)   # 把每个物品映射为一个embedding_dim维的稠密向量

    def forward(self, user, item):
        user = torch.LongTensor([user]).to(device)
        item = torch.LongTensor([item]).to(device)
        return (self.user_emb(user) * self.item_emb(item)).sum(1)

### 3. 设置模型参数

In [None]:
torch.manual_seed(2)
torch.cuda.manual_seed(2)               # 设定随机种子以复现实验结果（相同seed下网络初始化相同）

mf_model = MF(n_users, n_items, embedding_dim=10).to(device)

loss_fn = torch.nn.MSELoss()
learning_rate = 1e-2
optimizer = torch.optim.SGD(mf_model.parameters(), lr=learning_rate)
epochs = 10
train_loss, test_loss = np.array([]), np.array([])

### 4. 训练模型

In [None]:
rows, cols = ratings.nonzero()
p = np.random.permutation(len(rows))
rows, cols = rows[p], cols[p]

mf_model.train()
for t in range(epochs):
    train_epoch_loss = 0
    for row, col in zip(*(rows, cols)):
        # Set gradients to zero
        optimizer.zero_grad()
    
        # Turn data into tensors
        rating = torch.Tensor([ratings[row, col]]).to(device)
        row = torch.LongTensor([row]).to(device)
        col = torch.LongTensor([col]).to(device)

        # Predict and calculate loss
        prediction = mf_model(row, col)     # 参数类型为LongTensor
        loss = loss_fn(prediction, rating)
        train_epoch_loss += loss.item()
        # Backpropagate
        loss.backward()

        # Update the parameters
        optimizer.step()
        
    train_loss = np.append(train_loss, train_epoch_loss/len(rows))

In [None]:
len(train_loss)

In [None]:
import matplotlib.pyplot as plt
plt.plot(np.linspace(1,epochs,epochs), train_loss)

In [None]:
mf_model(0,13)

## Example for Factorization Machine

In [2]:
import numpy as np
import torch
from torch import nn 
from sklearn.feature_extraction import DictVectorizer

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))
# !nvidia-smi      # 查看gpu信息

Using cuda device


### 1. 定义模型

In [6]:
class FM(nn.Module):
    def __init__(self, feature_dim, embedding_dim=10):
        super().__init__()
        # Initially we fill V with random values sampled from Gaussian distribution
        # NB: use nn.Parameter to compute gradients
        self.V = nn.Parameter(torch.randn(feature_dim, embedding_dim), requires_grad=True)
        self.linear = nn.Linear(feature_dim, 1)

        
    def forward(self, x):
        out_1 = torch.matmul(x, self.V).pow(2).sum(1, keepdim=True) #S_1^2
        out_2 = torch.matmul(x.pow(2), self.V.pow(2)).sum(1, keepdim=True) # S_2
        
        out_inter = 0.5*(out_1 - out_2)
        out_linear = self.linear(x)
        out = out_inter + out_linear
        
        return torch.sigmoid(out.squeeze(1))


## To be continued

- self-defined nn.module   
<https://pytorch.org/docs/stable/notes/modules.html>
- sequence model(RNN, LSTM, Transformer...)   
<https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html?highlight=lstm#torch.nn.LSTM>   
<https://pytorch.org/docs/stable/generated/torch.nn.Transformer.html?highlight=transformer#torch.nn.Transformer>
- Autoencoder  
<https://github.com/L1aoXingyu/pytorch-beginner/blob/master/08-AutoEncoder/simple_autoencoder.py>
- finetune  
<https://anchorety.github.io/2019/11/07/pytorch%E2%80%94%E2%80%94finetune%E5%B8%B8%E7%94%A8%E4%BB%A3%E7%A0%81/>
- pytorch lightning  
<https://www.pytorchlightning.ai/tutorials>