## 导包

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd 


### 导入数据

In [2]:
#use numpy to load dataset
dataset =np.loadtxt('./NFSYSU-Wu-Zhan-master/dataset/stock_2.csv',delimiter=",",dtype="str")

In [3]:
print(dataset.shape)

(6110, 10)


In [4]:
dataset[:5,:]

array([['index_code', 'date', 'open', 'close', 'low', 'high', 'volume',
        'money', 'change', 'label'],
       ['sh000001', '1990/12/20', '104.3', '104.39', '99.98', '104.39',
        '197000', '85000', '0.044108822', '109.13'],
       ['sh000001', '1990/12/21', '109.07', '109.13', '103.73', '109.13',
        '28000', '16100', '0.045406648', '114.55'],
       ['sh000001', '1990/12/24', '113.57', '114.55', '109.13', '114.55',
        '32000', '31100', '0.049665537', '120.25'],
       ['sh000001', '1990/12/25', '120.09', '120.25', '114.55', '120.25',
        '15000', '6500', '0.04975993', '125.27']], dtype='<U12')

In [5]:
# pandas 导入数据
dataset_pd=pd.read_csv('./NFSYSU-Wu-Zhan-master/dataset/stock_2.csv')

In [6]:
dataset_pd.head(5)

Unnamed: 0,index_code,date,open,close,low,high,volume,money,change,label
0,sh000001,1990/12/20,104.3,104.39,99.98,104.39,197000.0,85000.0,0.044109,109.13
1,sh000001,1990/12/21,109.07,109.13,103.73,109.13,28000.0,16100.0,0.045407,114.55
2,sh000001,1990/12/24,113.57,114.55,109.13,114.55,32000.0,31100.0,0.049666,120.25
3,sh000001,1990/12/25,120.09,120.25,114.55,120.25,15000.0,6500.0,0.04976,125.27
4,sh000001,1990/12/26,125.27,125.27,120.25,125.27,100000.0,53700.0,0.041746,125.28


In [7]:
#pandas 的 数据 转成numpy的矩阵
dataset_np = dataset_pd.values

In [8]:
dataset_np.shape

(6109, 10)

In [9]:
dataset_np[:5,:]

array([['sh000001', '1990/12/20', 104.3, 104.39, 99.98, 104.39, 197000.0,
        85000.0, 0.044108822, 109.13],
       ['sh000001', '1990/12/21', 109.07, 109.13, 103.73, 109.13,
        28000.0, 16100.0, 0.045406648, 114.55],
       ['sh000001', '1990/12/24', 113.57, 114.55, 109.13, 114.55,
        32000.0, 31100.0, 0.049665537, 120.25],
       ['sh000001', '1990/12/25', 120.09, 120.25, 114.55, 120.25,
        15000.0, 6500.0, 0.04975993, 125.27],
       ['sh000001', '1990/12/26', 125.27, 125.27, 120.25, 125.27,
        100000.0, 53700.0, 0.041746362, 125.28]], dtype=object)

### 切出X和y

In [10]:
X = dataset_np[:,2:-1].astype(np.float32)
y = dataset_np[:,-1:].astype(np.float32)

In [11]:
print(X.shape)
print(X.dtype)

(6109, 7)
float32


In [12]:
print(y.shape)
print(X.dtype)

(6109, 1)
float32


### 切分训练集 和测试集

In [13]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=19)

### 标准化

In [14]:
from sklearn.preprocessing import StandardScaler
standard = StandardScaler()
standard.fit(X_train)

X_train_standard =standard.transform(X_train)
X_test_standard =standard.transform(X_test)

### 设定超参数

In [15]:
input_size = X.shape[1]
ouput_size = y.shape[1]
hidden_size = 32 # 神经元的个数为32
learning_rate = 1e-2
EPOCH = 10000

### 搭建模型——全连接神经网络

In [16]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.hidden1 = nn.Linear(input_size,hidden_size) #[-1,7]*[7,32]==>[-1,32]
        self.hidden2 = nn.Linear(hidden_size,hidden_size) #[-1,32]*[32,32]
        
        self.predict = nn.Linear(hidden_size,ouput_size) #[-1,32]*[32,1 ]
        
    def forward(self,x):
        #x(X_train 和 X_test) == [-1,7]
        #真正搭建网络的地方
        x = F.relu(self.hidden1(x))
        x = F.relu(self.hidden2(x))
        
        out = self.predict(x)
        
        return out

In [17]:
net = Net()
net

Net(
  (hidden1): Linear(in_features=7, out_features=32, bias=True)
  (hidden2): Linear(in_features=32, out_features=32, bias=True)
  (predict): Linear(in_features=32, out_features=1, bias=True)
)

### 优化器，损失函数

In [18]:
optimizer = torch.optim.Adam(net.parameters(),lr=learning_rate)
loss_func =  nn.MSELoss()

In [19]:
y_train.shape

(4887, 1)

In [20]:
y_train.dtype

dtype('float32')

### 开始训练我们的模型

In [21]:
Train_losses = []

for epoch in range(EPOCH):
        input_x = Variable(torch.FloatTensor(X_train_standard))  #特征 array -->tensor-->Variable
        input_y = Variable(torch.FloatTensor(y_train))  #标签 array -->tensor-->Variable

        y_pred = net(input_x)

        loss = loss_func(input_y,y_pred)
        Train_losses.append(loss.data.numpy())#将Tensor 转成 numpy 数值

        #非常标准的一套流程
        optimizer.zero_grad() #梯度归0
        loss.backward() #求梯度
        optimizer.step() #把梯度传进去

        #--------------------------#
        if(epoch+1)%50 == 0:
            print('Training...Epoch:',(epoch+1),'Loss:',loss.data.numpy())
        #-------------使用 测试集 去查看 测试集的 效果-----------#
        #每隔 500个epoch，就导入 测试集 去查看 模型的效果
        if(epoch+1)%500 == 0:
                test_x = Variable(torch.FloatTensor(X_test_standard))#特征 array -->tensor-->Variable
                test_y = Variable(torch.FloatTensor(y_test))#标签 array -->tensor-->Variable

                test_pred = net(test_x)

                test_loss = loss_func(test_y,test_pred)

                #非常标准的一套流程
                optimizer.zero_grad() #梯度归0
                test_loss.backward() #求梯度
                optimizer.step() #把梯度传进去
                print('-----------------------------')
                print('Testing...Epoch:',(epoch+1),'Loss:',test_loss.data.numpy())
                print('-----------------------------')

Training...Epoch: 50 Loss: 3003117.5
Training...Epoch: 100 Loss: 610202.25
Training...Epoch: 150 Loss: 147015.45
Training...Epoch: 200 Loss: 81208.28
Training...Epoch: 250 Loss: 40363.613
Training...Epoch: 300 Loss: 16359.588
Training...Epoch: 350 Loss: 7221.7354
Training...Epoch: 400 Loss: 3858.0903
Training...Epoch: 450 Loss: 2075.6875
Training...Epoch: 500 Loss: 1450.6141
-----------------------------
Testing...Epoch: 500 Loss: 1633.3112
-----------------------------
Training...Epoch: 550 Loss: 1252.4307
Training...Epoch: 600 Loss: 1118.5753
Training...Epoch: 650 Loss: 1050.0193
Training...Epoch: 700 Loss: 1010.1191
Training...Epoch: 750 Loss: 978.7086
Training...Epoch: 800 Loss: 954.46216
Training...Epoch: 850 Loss: 940.5181
Training...Epoch: 900 Loss: 931.4474
Training...Epoch: 950 Loss: 924.60754
Training...Epoch: 1000 Loss: 919.64526
-----------------------------
Testing...Epoch: 1000 Loss: 1324.4283
-----------------------------
Training...Epoch: 1050 Loss: 915.09717
Training..

Training...Epoch: 8600 Loss: 644.49536
Training...Epoch: 8650 Loss: 644.46265
Training...Epoch: 8700 Loss: 645.9451
Training...Epoch: 8750 Loss: 644.47437
Training...Epoch: 8800 Loss: 644.3791
Training...Epoch: 8850 Loss: 645.26825
Training...Epoch: 8900 Loss: 644.4875
Training...Epoch: 8950 Loss: 644.2994
Training...Epoch: 9000 Loss: 644.2712
-----------------------------
Testing...Epoch: 9000 Loss: 1075.2711
-----------------------------
Training...Epoch: 9050 Loss: 644.80347
Training...Epoch: 9100 Loss: 644.22107
Training...Epoch: 9150 Loss: 644.1935
Training...Epoch: 9200 Loss: 644.1673
Training...Epoch: 9250 Loss: 644.1426
Training...Epoch: 9300 Loss: 644.12366
Training...Epoch: 9350 Loss: 646.0694
Training...Epoch: 9400 Loss: 644.10065
Training...Epoch: 9450 Loss: 644.058
Training...Epoch: 9500 Loss: 644.0343
-----------------------------
Testing...Epoch: 9500 Loss: 1075.2535
-----------------------------
Training...Epoch: 9550 Loss: 644.2483
Training...Epoch: 9600 Loss: 644.0018

## 让模型能够 收敛 的方法
### 1.加大EPOCH
### 2.减小学习率
### 3.增加模型的复杂度，增加多一些网路层次，增加神经元个数


### 验证模型的 好坏，可视化

In [23]:
# 1.把tensor 转为 array
prediction = test_pred.data.numpy()

In [24]:
type(prediction)

numpy.ndarray

In [None]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

def caculate(y_true,y_predict):
    print''