# 数据处理方式：
- 直接用torch生成tensor -> Variable
- 从numpy -> torch tensor -> Variable
- 采用自带的 dataset
- 自定义的数据集

# 直接用torch生成tensor -> Variable

In [2]:
import torch 
from torch.autograd import Variable

x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1) # xdata shape = (100,1) 
y = x.pow(2) + 0.2*torch.rand(x.size())
x, y = Variable(x), Variable(y)

class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()
        # 包含的层
        self.hidden = torch.nn.Linear(n_feature, n_hidden)
        self.predict = torch.nn.Linear(n_hidden, n_output)
    
    def forward(self, x):
        # 层连接
        x = torch.relu(self.hidden(x))
        x = self.predict(x)
        return x 

net = Net(1, 10, 1)

optimizer = torch.optim.SGD(net.parameters(), lr=0.5)
loss_func = torch.nn.MSELoss()

for t in range(1000):
    prediction = net(x)
    loss = loss_func(prediction, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if t%200 == 0:
        print('loss:', loss) 

loss: tensor(0.4932, grad_fn=<MseLossBackward>)
loss: tensor(0.0168, grad_fn=<MseLossBackward>)
loss: tensor(0.0036, grad_fn=<MseLossBackward>)
loss: tensor(0.0039, grad_fn=<MseLossBackward>)
loss: tensor(0.0040, grad_fn=<MseLossBackward>)


# 从numpy -> torch tensor -> Variable

In [21]:
import torch 
from torch.autograd import Variable
import numpy as np

x = np.linspace(-1, 1, 100).reshape((100, -1)).astype(np.float32)
y = x**2 + 0.2*np.random.random(x.shape).astype(np.float32)  

x, y = Variable(torch.from_numpy(x)), Variable(torch.from_numpy(y)) 

class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()
        # 包含的层
        self.hidden = torch.nn.Linear(n_feature, n_hidden)
        self.predict = torch.nn.Linear(n_hidden, n_output)
    
    def forward(self, x):
        # 层连接
        x = torch.relu(self.hidden(x))
        x = self.predict(x)
        return x 

net = Net(1, 10, 1)

optimizer = torch.optim.SGD(net.parameters(), lr=0.5)
loss_func = torch.nn.MSELoss()

for t in range(1000):
    prediction = net(x)
    loss = loss_func(prediction, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if t%200 == 0:
        print('loss:', loss) 

loss: tensor(0.4710, grad_fn=<MseLossBackward>)
loss: tensor(0.0611, grad_fn=<MseLossBackward>)
loss: tensor(0.0610, grad_fn=<MseLossBackward>)
loss: tensor(0.0610, grad_fn=<MseLossBackward>)
loss: tensor(0.0610, grad_fn=<MseLossBackward>)


# 采用自带的 dataset

In [29]:
import torch
from torch import nn
from torch.autograd import Variable
import torchvision.datasets as dsets

import torchvision.transforms as transforms
import matplotlib.pyplot as plt
%matplotlib inline

torch.manual_seed(1)

# Hyper Parameters
EPOCH = 1               # train the training data n times, to save time, we just train 1 epoch
BATCH_SIZE = 64
TIME_STEP = 28          # rnn time step / image height
INPUT_SIZE = 28         # rnn input size / image width
LR = 0.01               # learning rate
DOWNLOAD_MNIST = True   # set to True if haven't download the data

mnist_base_path="/Users/zhouwencheng/Desktop/Grass/data/picture/mnist"
train_data = dsets.MNIST(
    root = mnist_base_path,
    train=True,
    transform=transforms.ToTensor(), # (0, 1)
    download=DOWNLOAD_MNIST
)

print(train_data.train_data.size())
print(train_data.targets.size())

train_loader = torch.utils.data.DataLoader(dataset = train_data,
                                           batch_size=BATCH_SIZE,
                                           shuffle=True)
test_data = dsets.MNIST(root=mnist_base_path, 
                        train=False, 
                        transform=transforms.ToTensor())
test_x = Variable(test_data.test_data, volatile=True).type(torch.FloatTensor)[:2000]/255.
test_y = test_data.targets.numpy().squeeze()[:2000]

class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.rnn = nn.LSTM(               # if use nn.RNN(), it hardly learns
            input_size = INPUT_SIZE,
            hidden_size = 64,     # rnn hidden unit
            num_layers = 1,        # number of rnn layer
            batch_first = True,   # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
        )
        self.out = nn.Linear(64, 10)
        
    def forward(self, x):
        # x shape (batch, time_step, input_size)
        # r_out shape (batch, time_step, output_size)
        # h_n shape (n_layers, batch, hidden_size)
        # h_c shape (n_layers, batch, hidden_size)
        r_out, (h_n, h_c) = self.rnn(x, None) # None represents zero initial hidden state

        # choose r_out at the last time step
        out = self.out(r_out[:, -1,:])
        return out
    
rnn = RNN()
print(rnn)

optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) # optimize all cnn parameters
loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted

# train and testing
for epoch in range(EPOCH):
    for step, (x, y) in enumerate(train_loader):
        b_x = Variable(x.view(-1, 28, 28))
        b_y = Variable(y)
        
        output = rnn(b_x)
        loss = loss_func(output, b_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if step % 200 == 0:
            test_output = rnn(test_x)
            pred_y = torch.max(test_output, 1)[1].data.numpy().squeeze()
            accuracy = sum(pred_y == test_y)/float(test_y.size)
            print(f'Epoch:  {epoch} | step:{step} | train loss: {loss.data} | test accuracy: {accuracy}')
#             print(loss.data)
    print("OK")
            

torch.Size([60000, 28, 28])
torch.Size([60000])
RNN(
  (rnn): LSTM(28, 64, batch_first=True)
  (out): Linear(in_features=64, out_features=10, bias=True)
)




Epoch:  0 | step:0 | train loss: 2.2883260250091553 | test accuracy: 0.1025
Epoch:  0 | step:200 | train loss: 0.4038448631763458 | test accuracy: 0.8375
Epoch:  0 | step:400 | train loss: 0.21243950724601746 | test accuracy: 0.913
Epoch:  0 | step:600 | train loss: 0.05201313644647598 | test accuracy: 0.9405
Epoch:  0 | step:800 | train loss: 0.12530261278152466 | test accuracy: 0.942
OK


# 自定义的数据集

In [55]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
import torchvision.datasets as dsets
import torchvision.transforms as transforms 
import numpy as np


class CustomDataset(Dataset):#需要继承data.Dataset
    
    def __init__(self):
        # TODO
        # 1. Initialize file path or list of file names.
        pass
    
    def __getitem__(self, index):
        # TODO
        # 1. Read one data from file (e.g. using numpy.fromfile, PIL.Image.open).
        # 2. Preprocess the data (e.g. torchvision.Transform).
        # 3. Return a data pair (e.g. image and label).
        #这里需要注意的是，第一步：read one data，是一个data
        
        data = np.random.random(10).astype(np.float32)
        label = np.random.randint(0, 2)  

        data = Variable(torch.from_numpy(data)) 
        return data, label 
    
    def __len__(self):
        # You should change 0 to the total size of your dataset.
        return 1000
    
a = 100
train_data = CustomDataset()
train_loader = DataLoader(dataset=train_data, batch_size=20, shuffle=False)


class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()
        # 包含的层
        self.hidden = torch.nn.Linear(n_feature, n_hidden)
        self.predict = torch.nn.Linear(n_hidden, n_output)
    
    def forward(self, x):
        # 层连接
        x = torch.relu(self.hidden(x))
        x = self.predict(x)
        return x 

net = Net(10, 10, 2)

print(net)

optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) # optimize all cnn parameters
loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted

# train and testing
for epoch in range(EPOCH):
    for step, (x, y) in enumerate(train_loader):
        x = Variable(x)
        y = Variable(y)
        output = net(x) 
        loss = loss_func(output, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step() 
        if step%10 == 0 :
            print(loss)
    


Net(
  (hidden): Linear(in_features=10, out_features=10, bias=True)
  (predict): Linear(in_features=10, out_features=2, bias=True)
)
tensor(0.6767, grad_fn=<NllLossBackward>)
tensor(0.6240, grad_fn=<NllLossBackward>)
tensor(0.6816, grad_fn=<NllLossBackward>)
tensor(0.7057, grad_fn=<NllLossBackward>)
tensor(0.6983, grad_fn=<NllLossBackward>)


In [27]:
np.random.random(10)
np.random.randint(0, 2)

1