# Linear_regression
step by step

In [10]:
import random
import matplotlib.pyplot as plt
import matplotlib as mpl

import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset,TensorDataset,DataLoader
from torch.utils.tensorboard import SummaryWriter

from torchLearning import *

## Generation datasets 

- For example: $y = 2x_1-X_2 + 1$

In [11]:
# 回归类数据集创建函数
def tensorGenReg(num_examples = 1000, w = [2, -1, 1], bias = True, delta = 0.01, deg = 1):
    """回归类数据集创建函数。

    :param num_examples: 创建数据集的数据量
    :param w: 包括截距的（如果存在）特征系数向量
    :param bias：是否需要截距
    :param delta：扰动项取值
    :param deg：方程次数
    :return: 生成的特征张和标签张量
    """
    
    if bias == True:
        num_inputs = len(w)-1                                                        # 特征张量
        features_true = torch.randn(num_examples, num_inputs)                        # 不包含全是1的列的特征张量
        w_true = torch.tensor(w[:-1]).reshape(-1, 1).float()                         # 自变量系数
        b_true = torch.tensor(w[-1]).float()                                         # 截距
        if num_inputs == 1:                                                          # 若输入特征只有1个，则不能使用矩阵乘法
            labels_true = torch.pow(features_true, deg) * w_true + b_true
        else:
            labels_true = torch.mm(torch.pow(features_true, deg), w_true) + b_true
        features = torch.cat((features_true, torch.ones(len(features_true), 1)), 1)  # 在特征张量的最后添加一列全是1的列
        labels = labels_true + torch.randn(size = labels_true.shape) * delta         
                
    else: 
        num_inputs = len(w)
        features = torch.randn(num_examples, num_inputs)
        w_true = torch.tensor(w).reshape(-1, 1).float()
        if num_inputs == 1:
            labels_true = torch.pow(features, deg) * w_true
        else:
            labels_true = torch.mm(torch.pow(features, deg), w_true)
        labels = labels_true + torch.randn(size = labels_true.shape) * delta
    return features, labels

In [13]:
torch.manual_seed(420)
features, labels = tensorGenReg()

## Module

### Select model

In [14]:
def linreg(X,w):
    return torch.mm(X,w)

### Select Loss function

In [15]:
def squared_loss(y_hat,y):
    num_ = y.numel() # It returns the length of the input tensor
    sse = torch.sum((y_hat.reshape(-1,1) - y.reshape(-1,1)) ** 2)
    return sse/num_

### Select optim

In [16]:
def sgd(params,lr):
    params.data -= lr * params.grad
    params.grad.zero_()

## Train

In [17]:
write = SummaryWriter(log_dir='reg_loss')

In [18]:
def data_iter(batch_size, features, labels):
    """
    数据切分函数
    
    :param batch_size: 每个子数据集包含多少数据
    :param featurs: 输入的特征张量
    :param labels：输入的标签张量
    :return l：包含batch_size个列表，每个列表切分后的特征和标签所组成 
    """
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices)
    l = []
    for i in range(0, num_examples, batch_size):
        j = torch.tensor(indices[i: min(i + batch_size, num_examples)])
        l.append([torch.index_select(features, 0, j), torch.index_select(labels, 0, j)])
    return l


In [20]:
batch_size = 10
lr = 0.03
num_epochs = 3
w = torch.zeros(3,1, requires_grad=True)

net = linreg
loss = squared_loss

for epoch in range(num_epochs):
    for X,y in data_iter(batch_size, features, labels):
        l = loss(net(X,w),y)
        l.backward()
        sgd(w,lr)
    train_l = loss(net(features,w), labels)
    print('epoch %d, loss %f' %(epoch+1, train_l))

epoch 1, loss 0.000127
epoch 2, loss 0.000102
epoch 3, loss 0.000101


In [21]:
batch_size = 10
lr = 0.03
num_epochs = 3
w = torch.zeros(3,1, requires_grad=True)

net = linreg
loss = squared_loss

for epoch in range(num_epochs):
    for X,y in data_iter(batch_size, features, labels):
        l = loss(net(X,w),y)
        l.backward()
        sgd(w,lr)
    train_l = loss(net(features,w), labels)
    write.add_scalar('mul', train_l, epoch)

# Using function(torch)

## define parameters

In [25]:
batch_size = 10
lr = 0.03
mum_epochs = 3

## Preparation data

In [26]:
torch.manual_seed(420)

features,labels = tensorGenReg()
features = features[:,:-1]
data = TensorDataset(features,labels)
batchData = DataLoader(data,batch_size=batch_size,shuffle = True)

## Module

### Model

In [27]:
class LR(nn.Module):
    def __init__(self,in_features=2,out_features=1):
        super().__init__()
        self.linear = nn.Linear(in_features,out_features)
    def forward(self,x):
        out = self.linear(x)
        return out
    
LR_model = LR()

### Loss function

In [28]:
criterion = nn.MSELoss()

### Optim

In [29]:
optimizer = optim.SGD(LR_model.parameters(),lr=0.03)

### Train

In [32]:
def fit(net,criterion, optimizer, batchdata,epochs):
    for epoch in range(epochs):
        for X,y in batchdata:
            yhat = net.forward(X)
            loss = criterion(yhat,y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        write.add_scalar('loss',loss,global_step=epoch)

In [33]:
torch.manual_seed(420)   

fit(net = LR_model, 
    criterion = criterion, 
    optimizer = optimizer, 
    batchdata = batchData, 
    epochs = num_epochs)

### Result

In [34]:
LR_model

LR(
  (linear): Linear(in_features=2, out_features=1, bias=True)
)

In [35]:
list(LR_model.parameters())

[Parameter containing:
 tensor([[ 1.9992, -1.0003]], requires_grad=True),
 Parameter containing:
 tensor([0.9994], requires_grad=True)]

In [36]:
criterion(LR_model(features),labels)

tensor(0.0001, grad_fn=<MseLossBackward>)

In [37]:
write.add_graph(LR_model,(features,))