<a href="https://colab.research.google.com/github/SherlockHems/AI-Financial-Time-Series-Prediction/blob/main/BTC_AVAP_LSTM_1TM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 手搓AI股价预测模型-从入门到入街
## 黑铁段位：LSTM单变量单步长+单步长roll预测篇
### 问题描述：BTC未来价格预测（0 Order Difference）
### 使用模型：Simple LSTM

In [28]:
# 导入tensorflow
import tensorflow as tf
# 导入tensorflow.keras
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
# 导入数据科学三件套
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# 导入sklearn的scaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

### 数据处理

In [None]:
# 导入数据
series_unscaled_unsplit =  pd.read_csv('BTC_AVAP_1TM.csv')
# 检查格式
series_unscaled_unsplit.head()

In [30]:
# 去除标题和日期
series_unscaled = series_unscaled_unsplit.iloc[:,1:].values
series_unscaled.shape

(4230, 1)

In [None]:
# 检查未标准化前的数据格式
df = pd.DataFrame(series_unscaled)
df.head()

In [None]:
df = pd.DataFrame(series_unscaled)
df.tail()

In [None]:
# 数据标准化
sc   = MinMaxScaler( feature_range=(0,1) )
series = np.zeros((series_unscaled.shape[0],series_unscaled.shape[1]))
# 给每一列单独标准化
series = sc.fit_transform(series_unscaled)
# 检查标准化后的数据格式
df = pd.DataFrame(series)
df.tail()

### 模型建立

In [37]:
# 创建输入数组
n_timesteps = 50
n_features  = series.shape[1]
X = []
Y = []
for date in range(series.shape[0] - n_timesteps):
  x = series[date : date + n_timesteps]
  X.append(x)
  y = series[date + n_timesteps]
  Y.append(y)

In [38]:
# 转换数组的格式并检查shape
n_sample = len(X)
X = np.array(X).reshape(n_sample, n_timesteps, n_features) # 输入的数据必须为 n_sample, n_timesteps, n_features 格式
Y = np.array(Y)
print("X.shape", X.shape, "Y.shape", Y.shape)

X.shape (4180, 50, 1) Y.shape (4180, 1)


In [39]:
# 建立一个简单的单层LSTM
n_input = Input( shape= (n_timesteps, n_features) )
# x = LSTM(64,return_sequences=True,dropout=0.1)(i)
# x = LSTM(64,dropout=0.1)(i)
x = LSTM(64,dropout=0.1)(n_input)
x = Dense(1)(x)
model = Model(n_input,x)
model.compile(
  loss='mse',
  optimizer=Adam(learning_rate=0.01),
)

### 训练模型

In [None]:
# 训练模型
r = model.fit(
  X[:-n_sample//4], Y[:-n_sample//4],
  epochs=50,
  validation_data=(X[-n_sample//4:], Y[-n_sample//4:]),
)

### 检查结果

In [None]:
# 损失曲线
import matplotlib.pyplot as plt
plt.plot(r.history['loss'], label='loss')
plt.plot(r.history['val_loss'], label='val_loss')
plt.legend()

In [None]:
# 单步预测
outputs = model.predict(X)
print(outputs.shape)
predictions = outputs[:,0]

plt.plot(Y, label='targets')
plt.plot(predictions, label='predictions')
plt.legend()
plt.show()

In [None]:
# 多步预测
validation_target = Y[-n_sample//4:]
validation_predictions = []

# 第一个input
last_x = X[-n_sample//4]

while len(validation_predictions) < len(validation_target):
  p = model.predict(last_x.reshape(1, n_timesteps, 1))[0,0] # 1x1 array -> 散数

  # 将结果加到预测结果数列中
  validation_predictions.append(p)

  # 作为新的input
  last_x = np.roll(last_x, -1)
  last_x[-1] = p

plt.plot(validation_target, label='forecast target')
plt.plot(validation_predictions, label='forecast prediction')
plt.legend()