In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import Dataset
import classification_models
from classification_models import GetLoader

In [2]:
#导入训练数据
a=np.load("new_train_data.npy")
b=a.reshape([a.shape[0],a.shape[1]*a.shape[2]])
c=b[~np.isnan(b).any(axis=1),:]
train_data=c.reshape([c.shape[0], a.shape[1],a.shape[2]])
d=np.load("new_train_label.npy").reshape(-1,1)
train_label=d[~np.isnan(b).any(axis=1),:].reshape(-1,)

In [3]:
#导入valid数据
a=np.load("new_valid_data.npy")
b=a.reshape([a.shape[0],a.shape[1]*a.shape[2]])
c=b[~np.isnan(b).any(axis=1),:]
valid_data=c.reshape([c.shape[0], a.shape[1],a.shape[2]])
d=np.load("new_valid_label.npy").reshape(-1,1)
valid_label=d[~np.isnan(b).any(axis=1),:].reshape(-1,)

In [4]:
train_data.shape,train_label.shape,valid_data.shape,valid_label.shape

((397353, 10, 42), (397353,), (48055, 10, 42), (48055,))

In [5]:
def predict_precision(model,images,labels,device,predict_type):
    with torch.no_grad():
        correct=0
        total=0
        images=images.type(torch.FloatTensor)
        labels=labels.type(torch.FloatTensor)
        images=images.to(device)
        labels=labels.to(device)
        outputs=model(images)
        _,predicted=torch.max(outputs.data,1)
        total+=sum(predicted)
        correct+=(sum(predicted*labels))
        print('precision of the model on the'+predict_type+'data: {}%'.format(100*correct/total))
    return predicted, 100*correct/total

In [10]:
#TRIAL1
from classification_models import LSTM
# Hyper Parameters
sequence_length = 10  # 序列长度，将图像的每一列作为一个序列
input_size = 42  # 输入数据的维度
hidden_size = 128  # 隐藏层的size
num_layers =  4 # 有多少层

num_classes = 2
batch_size = 1024
num_epochs = 800
learning_rate = 0.001

device = torch.device("cuda:1")

In [7]:
lstm = LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, num_classes=num_classes,device=device)
lstm.to(device)

LSTM(
  (lstm): LSTM(42, 128, num_layers=4, batch_first=True)
  (fc): Linear(in_features=128, out_features=2, bias=True)
)

In [11]:
# Loss and Optimizer
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)

In [12]:
train=GetLoader(train_data,train_label)
valid=GetLoader(valid_data,valid_label)
train_loader=torch.utils.data.DataLoader(dataset=train,batch_size=batch_size,shuffle=True,num_workers=8)
valid_loader=torch.utils.data.DataLoader(dataset=valid,batch_size=valid_data.shape[0],shuffle=False,num_workers=2)

In [15]:
total_step=0
train_precision=[]
valid_precision=[]

In [17]:
#训练过程
for epoch in range(500):
    for data in train_loader:
        images, labels = data
        images=images.type(torch.FloatTensor)
        labels=labels.type(torch.FloatTensor)
        images=images.to(device)
        labels=labels.to(device)
        #forward pass
        outputs=lstm(images)
        loss=loss_function(outputs,labels.long())
        
        #Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_step+=1
        if (total_step)%1000==0:#each 10 iterations is one epoch
            print("Epoch [{}/{}],step[{}] Loss:{:.4f}".format(epoch+1,num_epochs,total_step,loss.item()))
            _,train_pre=predict_precision(lstm,images,labels,device,predict_type='training')
            train_precision.append(train_pre)
            for images, labels in valid_loader:
                _,valid_pre=predict_precision(lstm,images,labels,device,predict_type='validation')
            valid_precision.append(valid_pre)

Epoch [3/800],step[2000] Loss:0.6810
precision of the model on thetrainingdata: 58.00865936279297%
precision of the model on thevalidationdata: 49.09107208251953%
Epoch [6/800],step[3000] Loss:0.6682
precision of the model on thetrainingdata: 64.70587921142578%
precision of the model on thevalidationdata: 49.0029411315918%
Epoch [8/800],step[4000] Loss:0.6618
precision of the model on thetrainingdata: 60.7843132019043%
precision of the model on thevalidationdata: 49.33353042602539%
Epoch [11/800],step[5000] Loss:0.6623
precision of the model on thetrainingdata: 62.910797119140625%
precision of the model on thevalidationdata: 49.13975143432617%
Epoch [13/800],step[6000] Loss:0.6621
precision of the model on thetrainingdata: 61.47186279296875%
precision of the model on thevalidationdata: 48.804813385009766%
Epoch [16/800],step[7000] Loss:0.6442
precision of the model on thetrainingdata: 66.92607116699219%
precision of the model on thevalidationdata: 47.887638092041016%
Epoch [18/800],ste

KeyboardInterrupt: 

In [None]:
plt.plot(train_precision,label="training precision")
plt.plot(valid_precision,label="validation precision")
plt.title("LSTM on stock")
plt.xlabel("1000*x training step")
plt.ylabel("precision")

In [None]:
batch_size2 = 4096
lstm2 = LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, num_classes=num_classes,device=device)
lstm2.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lstm2.parameters(), lr=learning_rate)


In [None]:
train=GetLoader(train_data,train_label)
valid=GetLoader(valid_data,valid_label)
train_loader=torch.utils.data.DataLoader(dataset=train,batch_size=batch_size2,shuffle=True,num_workers=0)
valid_loader=torch.utils.data.DataLoader(dataset=valid,batch_size=valid_data.shape[0],shuffle=False,num_workers=0)

In [None]:
total_step=0
train_precision2=[]
valid_precision2=[]

In [None]:
# Loss and Optimizer
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lstm2.parameters(), lr=learning_rate)

In [None]:
#训练过程
for epoch in range(2000):
    for data in train_loader:
        images, labels = data
        images=images.type(torch.FloatTensor)
        labels=labels.type(torch.FloatTensor)
        images=images.to(device)
        labels=labels.to(device)
        #forward pass
        outputs=lstm2(images)
        loss=loss_function(outputs,labels.long())
        
        #Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_step+=1
        if (total_step)%1000==0:#each 10 iterations is one epoch
            print("Epoch [{}/{}],step[{}] Loss:{:.4f}".format(epoch+1,num_epochs,total_step,loss.item()))
            _,train_pre=predict_precision(lstm2,images,labels,device,predict_type='training')
            train_precision2.append(train_pre)
            for images, labels in valid_loader:
                _,valid_pre=predict_precision(lstm2,images,labels,device,predict_type='validation')
            valid_precision2.append(valid_pre)

In [None]:
plt.plot(train_precision2,label="training precision")
plt.plot(valid_precision2,label="validation precision")
plt.title("LSTM on stock(with bigger batch_size)")
plt.xlabel("1000*x training step")
plt.ylabel("precision")

In [None]:
# Hyper Parameters
sequence_length = 10  # 序列长度，将图像的每一列作为一个序列
in_channel = 42
out_channel=32
hidden_size = 64  # 隐藏层的size
num_layers =  1 # 有多少层

num_classes = 2
batch_size = 512
num_epochs = 10000
learning_rate = 0.001

device = torch.device("cuda:1")

In [None]:
class CONV1D_LSTM(nn.Module):
    def __init__(self ,in_channel,out_channel, hidden_size, num_layers, num_classes,device=torch.device("cuda:1")):
        super(CONV1D_LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.conv1 = nn.Conv1d(in_channels=in_channel, out_channels=out_channel,kernel_size=1, stride=1, bias=False)
        self.bn1 = nn.BatchNorm1d(out_channel)
        self.conv2 = nn.Conv1d(in_channels=out_channel, out_channels=out_channel,kernel_size=1, stride=1, bias=False)
        self.bn2 = nn.BatchNorm1d(out_channel)
        self.relu = nn.ReLU()
        self.lstm = nn.LSTM(out_channel, hidden_size, num_layers, batch_first=True)  # batch_first=True仅仅针对输入而言
        self.fc = nn.Linear(hidden_size, num_classes)
        
        for m in self.modules():
            if isinstance(m, nn.Conv1d):
                nn.init.kaiming_normal_(m.weight.to(device), mode='fan_out', nonlinearity='relu')


    def forward(self, x):
        
        #forward prop
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        
        out=torch.transpose(out,2,1)
        #因为pytorch里lstm和conv1d的input sequence位置不一样，需要调整。
        
        # 设置初始状态h_0与c_0的状态是初始的状态，一般设置为0，尺寸是,x.size(0)
        h0 = Variable(torch.zeros(self.num_layers, out.size(0), self.hidden_size)).to(device)
        c0 = Variable(torch.zeros(self.num_layers, out.size(0), self.hidden_size)).to(device)

        # Forward propagate RNN
        out, (h_n, c_n) = self.lstm(out, (h0, c0))  # 送入一个初始的x值，作为输入以及(h0, c0)
        # Decode hidden state of last time step
        out = self.fc(out[:, -1, :])  # output也是batch_first, 实际上h_n与c_n并不是batch_first
        return out

In [None]:
#from classification_models import CONV1D_LSTM
conv1d_lstm = CONV1D_LSTM(in_channel=in_channel, out_channel=out_channel,
                   hidden_size=hidden_size, num_layers=num_layers, num_classes=num_classes,device=device)
conv1d_lstm.to(device)

In [None]:
train_data=np.transpose(train_data,(0,2,1))
valid_data=np.transpose(valid_data,(0,2,1))
train=GetLoader(train_data,train_label)
valid=GetLoader(valid_data,valid_label)
train_loader=torch.utils.data.DataLoader(dataset=train,batch_size=batch_size,shuffle=True,num_workers=0)
valid_loader=torch.utils.data.DataLoader(dataset=valid,batch_size=valid_data.shape[0],shuffle=False,num_workers=0)

In [None]:
train_data.shape

In [None]:
total_step=0
train_precision=[]
valid_precision=[]

In [None]:
# Loss and Optimizer
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(conv1d_lstm.parameters(), lr=learning_rate)

In [None]:
#训练过程
#出现的问题：把learning rate设为0.1和0.01，都会出现diverge问题
#把learning_rate设为0.0001时在2000epoch左右（其他超参同上方超参表）时会出现loss无法进一步缩小的问题
total_step=0
for epoch in range(num_epochs):
    for data in train_loader:
        images, labels = data
        images=images.type(torch.FloatTensor)
        labels=labels.type(torch.FloatTensor)
        images=images.to(device)
        labels=labels.to(device)
        #forward pass
        outputs=conv1d_lstm(images)
        loss=loss_function(outputs,labels.long())
        
        #Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_step+=1
        if (total_step)%1000==0:
            print("Epoch [{}/{}],step[{}] Loss:{:.4f}".format(epoch+1,num_epochs,total_step,loss.item()))
            _,train_pre=predict_precision(conv1d_lstm,images,labels,device,predict_type='training')
            train_precision.append(train_pre)
            for images, labels in valid_loader:
                _,valid_pre=predict_precision(conv1d_lstm,images,labels,device,predict_type='validation')
            valid_precision.append(valid_pre)

In [None]:
plt.plot(train_precision,label="training precision")
plt.plot(valid_precision,label="validation precision")
plt.title("conv1d_lstm on stock")
plt.xlabel("1000*x training step")
plt.ylabel("precision")

训练过程总结：
使用以下超参时：

$Hyper Parameters
sequence_length = 10  # 序列长度，将图像的每一列作为一个序列
in_channel = 42
out_channel=32
hidden_size = 64  # 隐藏层的size
num_layers =  2 # 有多少层
num_classes = 2
batch_size = 1024
num_epochs = 10000
learning_rate = 0.0001
device = torch.device("cuda:1")$


出现的问题：把learning rate设为0.1和0.01，都会出现diverge问题
把learning_rate设为0.0001时在2000epoch左右（其他超参同上方超参表）激活函数为leakyrelu,lstm两层时会出现loss无法进一步缩小的问题(pic3),初步认为是因为梯度消失导致的问题，对此想出的解决办法为：


（1）调整激活参数为relu。若有效，可进一步考虑使用更深层次的网络结构


$Hyper Parameters
sequence_length = 10  # 序列长度，将图像的每一列作为一个序列
in_channel = 42
out_channel=32
hidden_size = 64  # 隐藏层的size
num_layers =  1 # 有多少层
num_classes = 2
batch_size = 512
num_epochs = 10000
learning_rate = 0.001
device = torch.device("cuda:1")$
结果无效，比leakyrelu变差了。见pic4

（2）使用不同的optimizer

（1）调整激活参数为relu。若有效，可进一步考虑使用更深层次的网络结构
$Hyper Parameters
sequence_length = 10  # 序列长度，将图像的每一列作为一个序列
in_channel = 42
out_channel=32
hidden_size = 64  # 隐藏层的size
num_layers =  1 # 有多少层
num_classes = 2
batch_size = 512
num_epochs = 10000
learning_rate = 0.001
device = torch.device("cuda:1")$
结果无效，比leakyrelu变差了。见pic4

（2）使用不同的optimizer