### Logistic回归的代码实现

- 参考课本P45

In [None]:
# 导包
import torch
from torch import nn
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# 设定随机种子
torch.manual_seed(2019)

# 从data.txt读入数据，读入数据点之后，根据不同的label将数据点分为红色和蓝色，并且画图展示出来了
with open('data/data.txt', 'r') as f:
    data_list = f.readlines()
    data_list = [i.split('\n')[0] for i in data_list]   # 去除每行最后一换行符
    data_list = [i.split(',') for i in data_list]   # 去除逗号
    data = [(float(i[0]), float(i[1]), float(i[2])) for i in data_list]    # 取出三个数据   这里是（100，3）
    
# 标准化   x0-----(n,3)    x1 ----(m, 3)
x0 = list(filter(lambda x:x[-1] == 0.0, data))  # lambda argument_list: expression ，  filter(function, iterable)参考https://www.runoob.com/python/python-func-filter.html
x1 = list(filter(lambda x:x[-1] == 1.0, data))
plot_x0_0 = [i[0] for i in x0]   # 横坐标
plot_x0_1 = [i[1] for i in x0]   # 纵坐标
plot_x1_0 = [i[0] for i in x1]   # 横坐标
plot_x1_1 = [i[1] for i in x1]   # 纵坐标

plt.plot(plot_x0_0, plot_x0_1, 'ro', label='x_0')
plt.plot(plot_x1_0, plot_x1_1, 'bo', label='x_1')
plt.legend(loc='best')   # loc='best'最佳展示位置

**接下来将数据转成numpy类型，接着转到Tensor为之后的训练作准备**


In [None]:
np_data = np.array(data, dtype='float32')   # 转换为numpy array
x_data = torch.from_numpy(np_data[:, 0:2])    # 转换成Tensor，大小是[100, 2]
y_data = torch.from_numpy(np_data[:, -1]).view(-1,1)  # 转换成Tensor，大小是[100,1]
print('y_data:',y_data.shape)

# 定义 sigmoid 函数
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# 画出 sigmoid 的图像
plot_x = np.arange(-10, 10.01, 0.01)
plot_y = sigmoid(plot_x)

plt.plot(plot_x, plot_y, 'r')

In [None]:
# 取tensor 中数据
x_data = Variable(x_data)   # (100,2)
y_train = Variable(y_data)   # (100, 1)

import torch.nn.functional as F   # 后续需要用到函数，不用自己写

In [None]:
# 定义 logistic 回归模型
w = Variable(torch.randn(2, 1), requires_grad=True) 
b = Variable(torch.zeros(1), requires_grad=True)

def logistic_regression(x):
    return F.sigmoid(torch.mm(x, w) + b)

# 画出参数更新之前的结果
w0 = w[0].data[0].numpy()
w1 = w[1].data[0].numpy()
b0 = b.data[0].numpy()

plot_x = np.arange(0.2, 1, 0.01)
plot_y = (-w0 * plot_x - b0) / w1

plt.plot(plot_x, plot_y, 'g', label='cutting line')
plt.plot(plot_x0_0, plot_x0_1, 'ro', label='x_0')
plt.plot(plot_x1_0, plot_x1_1, 'bo', label='x_1')
plt.legend(loc='best')   # loc='best'最佳展示位置

In [None]:
# 计算loss
def binary_loss(y_pred, y):
    logits = (y * y_pred.clamp(1e-12).log() + (1 - y) * (1 - y_pred).clamp(1e-12).log()).mean()
    return -logits

In [None]:
y_pred = logistic_regression(x_data)
loss = binary_loss(y_pred, y_data)
print(loss)

In [None]:
# 使用 torch.optim 更新参数
from torch import nn
w = nn.Parameter(torch.randn(2, 1))
b = nn.Parameter(torch.zeros(1))

def logistic_regression(x):
    return F.sigmoid(torch.mm(x, w) + b)

optimizer = torch.optim.SGD([w, b], lr=1.)

In [None]:
# 进行 1000 次更新
import time

start = time.time()
for e in range(1000):
    # 前向传播
    y_pred = logistic_regression(x_data)
    loss = binary_loss(y_pred, y_data) # 计算 loss
    # 反向传播
    optimizer.zero_grad() # 使用优化器将梯度归 0
    loss.backward()
    optimizer.step() # 使用优化器来更新参数
    # 计算正确率
    mask = y_pred.ge(0.5).float()  # 大于0.5等于1，小于0.5等于0
    acc = (mask == y_data).sum().item() / y_data.shape[0]
    if (e + 1) % 200 == 0:
        print('epoch: {}, Loss: {:.5f}, Acc: {:.5f}'.format(e+1, loss.item(), acc))
during = time.time() - start
print()
print('During Time: {:.3f} s'.format(during))

In [None]:
# 画出更新之后的结果
w0 = w[0].data[0].numpy()
w1 = w[1].data[0].numpy()
b0 = b.data[0].numpy()

plot_x = np.arange(0.2, 1, 0.01)
plot_y = (-w0 * plot_x - b0) / w1

plt.plot(plot_x, plot_y, 'g', label='cutting line')
plt.plot(plot_x0_0, plot_x0_1, 'ro', label='x_0')
plt.plot(plot_x1_0, plot_x1_1, 'bo', label='x_1')
plt.legend(loc='best')   # loc='best'最佳展示位置

In [None]:
# 使用自带的loss
criterion = nn.BCEWithLogitsLoss() # 将 sigmoid 和 loss 写在一层，有更快的速度、更好的稳定性

w = nn.Parameter(torch.randn(2, 1))
b = nn.Parameter(torch.zeros(1))

def logistic_reg(x):
    return torch.mm(x, w) + b

optimizer = torch.optim.SGD([w, b], 1.)

In [None]:
y_pred = logistic_reg(x_data)
loss = criterion(y_pred, y_data)
print(loss.data)

In [None]:
# 同样进行 1000 次更新

start = time.time()
for e in range(1000):
    # 前向传播
    y_pred = logistic_reg(x_data)
    loss = criterion(y_pred, y_data)
    # 反向传播
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    # 计算正确率
    mask = y_pred.ge(0.5).float()
    acc = (mask == y_data).sum().item() / y_data.shape[0]
    if (e + 1) % 200 == 0:
        print('epoch: {}, Loss: {:.5f}, Acc: {:.5f}'.format(e+1, loss.item(), acc))

during = time.time() - start
print()
print('During Time: {:.3f} s'.format(during))

# 整理上诉代码  如下所示

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable

In [None]:
class LogisticRegression(nn.Module):
    def __init__(self):
        super(LogisticRegression, self).__init__()
        self.lr = nn.Linear(2, 1)
        self.sm = nn.Sigmoid()

    def forward(self, x):
        x = self.lr(x)
        x = self.sm(x)
        return x

    
def file_open():
    with open('data/data.txt', 'r') as f:
        data_list = f.readlines()
        data_list = [i.split('\n')[0] for i in data_list]
        data_list = [i.split(',') for i in data_list]
        data = [(float(i[0]), float(i[1]), float(i[2])) for i in data_list]
        data = torch.Tensor(data)
        
        return data

def train(n_epoch, data):
    for epoch in range(n_epoch):
        if torch.cuda.is_available():
            x = Variable(data[:, 0:2]).cuda()
            y = Variable(data[:, 2]).cuda().view(-1,1)
        else:
            x = Variable(data[:, 0:2])
            y = Variable(data[:, -1]).view(-1,1)

        # forward
        out = logistic_model(x)
        loss = criterion(out, y)
        print_loss = loss.data.item()
        mask = out.ge(0.5).float()
        correct = (mask == y).sum()
        acc = correct.item() / x.size(0)
        
        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (epoch + 1) % 1000 == 0:
            print("epoch{} ----- loss:{}-------acc:{}".format(epoch+1, loss, acc))

def plot(logistic_model):
    # 预测的函数模型，将数据点分开
    w0, w1 = logistic_model.lr.weight[0]
    w0 = w0.item()
    w1 = w1.item()
    b = logistic_model.lr.bias.item()
    plot_x = np.arange(30, 100, 0.1)
    plot_y = (-w0 * plot_x - b) / w1
    plt.plot(plot_x, plot_y)
    
    # 将数据点画出来
    x0 = list(filter(lambda x: x[-1] == 0.0, data))
    x1 = list(filter(lambda x: x[-1] == 1.0, data))
    plot_x0_0 = [i[0] for i in x0]
    plot_x0_1 = [i[1] for i in x0]
    plot_x1_0 = [i[0] for i in x1]
    plot_x1_1 = [i[1] for i in x1]

    plt.plot(plot_x0_0, plot_x0_1, 'ro', label='x_0')
    plt.plot(plot_x1_0, plot_x1_1, 'bo', label='x_1')
    plt.legend(loc='best')
    plt.show()

    
if __name__ == '__main__':
    
    # 读取数据
    data = file_open()
    print("data shape",data.shape)
    # 加载模型
    logistic_model = LogisticRegression()
    if torch.cuda.is_available():
        logistic_model.cuda()
    
    #定义评价标准 和 优化器
    criterion = nn.BCELoss()
    optimizer = torch.optim.SGD(logistic_model.parameters(), lr=1e-3, momentum=0.9)
    
    # 开始训练
    n_epoch = 3000
    train(n_epoch, data)
    
    # 画出图像
    plot(logistic_model)

In [None]:
"""Logistic 回归的代码实现"""

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np


class LogisticRegression(nn.Module):
    def __init__(self):
        super(LogisticRegression, self).__init__()
        self.lr = nn.Linear(2, 1)
        self.sm = nn.Sigmoid()

    def forward(self, x):
        x = self.lr(x)
        x = self.sm(x)
        return x


if __name__ == '__main__':
    with open('data/data.txt', 'r', encoding='utf8') as f:
        data_list = f.readlines()
        data_list = [i.split('\n')[0] for i in data_list]
        data_list = [i.split(',') for i in data_list]
        data = [(float(i[0]), float(i[1]), float(i[2])) for i in data_list]
        data = torch.Tensor(data)

    logistic_model = LogisticRegression()
    if torch.cuda.is_available():
        logistic_model.cuda()

    criterion = nn.BCELoss()
    optimizer = torch.optim.SGD(logistic_model.parameters(), lr=1e-3, momentum=0.9)

    for epoch in range(10000):
        if torch.cuda.is_available():
            x = Variable(data[:, 0:2]).cuda()
            y = Variable(data[:, 2]).cuda().unsqueeze(1)
        else:
            x = Variable(data[:, 0:2])
            y = Variable(data[:, 2]).unsqueeze(1)
        # forward
        out = logistic_model(x)
        loss = criterion(out, y)
        print_loss = loss.data.item()
        mask = out.ge(0.5).float()
        correct = (mask == y).sum()
        acc = correct.item() / x.size(0)
        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if (epoch + 1) % 1000 == 0:
            print('*' * 10)
            print("epoch{} ----- loss:{}-------acc:{}".format(epoch+1, loss, acc))


    w0, w1 = logistic_model.lr.weight[0]
    w0 = w0.item()
    w1 = w1.item()
    b = logistic_model.lr.bias.item()
    plot_x = np.arange(30, 100, 0.1)
    plot_y = (-w0 * plot_x - b) / w1
    plt.plot(plot_x, plot_y)

    x0 = list(filter(lambda x: x[-1] == 0.0, data))
    x1 = list(filter(lambda x: x[-1] == 1.0, data))
    plot_x0_0 = [i[0] for i in x0]
    plot_x0_1 = [i[1] for i in x0]
    plot_x1_0 = [i[0] for i in x1]
    plot_x1_1 = [i[1] for i in x1]

    plt.plot(plot_x0_0, plot_x0_1, 'ro', label='x_0')
    plt.plot(plot_x1_0, plot_x1_1, 'bo', label='x_1')
    plt.legend()
    plt.show()
