<a href="https://colab.research.google.com/github/Hank-Cui/otis2019/blob/master/Short_term_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker

from statsmodels.tsa.arima_model import ARIMA
from pandas.plotting import autocorrelation_plot

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA

%pip install --upgrade xgboost
import xgboost as xgb

import os
import time
import glob
import PIL
import imageio
from IPython import display

Requirement already up-to-date: xgboost in /usr/local/lib/python3.6/dist-packages (0.90)


In [0]:
import torch
import torch.nn

from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
print(tf.__version__)

1.15.0


In [0]:
from google.colab import drive
drive.mount('/gdrive')

In [0]:
file_list = glob.glob("/gdrive/My Drive/Blair/10/KWHS2019/hist_data/*")
df = pd.read_csv("/gdrive/My Drive/Blair/10/KWHS2019/KWHS_list.csv")
stock_list = df['TICKER']
technology = df[df['SECTOR'] == "Technology"]
energy = df[df['SECTOR'] == "Energy"]

In [0]:
comp_tic = "GOOG"
df = pd.read_csv("/gdrive/My Drive/Blair/10/KWHS2019/hist_data/"+comp_tic+".csv")
df.head()

In [0]:
def plot_stock(company_name, days):
    comp_tic = company_name

    df = pd.read_csv("/gdrive/My Drive/Blair/10/KWHS2019/hist_data/"+comp_tic+".csv")
    df = df[-days:]

    df['Date'] = pd.to_datetime(df.Date,format='%Y-%m-%d')
    df.index = df['Date']

    #plot
    df = tech_indictor(df)

    plt.figure(figsize=(16,9), dpi=80)
    plt.plot(df['Close'], label='Close Price history')
    plt.plot(df['macd'], label='MACD', color='orange')
    plt.plot(df['dea'], label='DEA', color='Magenta')
    plt.plot(df['ma21'], label='MA 21', color='r', linestyle='--')
    plt.plot(df['upper_band'], label='Upper Band', color='c')
    plt.plot(df['lower_band'], label='Lower Band', color='c')
    plt.plot(df['ma7'], label='MA 7', color='g',linestyle='--')


    plt.title(comp_tic+' Stock Price') # 添加标题
    plt.xlabel('Date') 
    plt.ylabel('USD')
    plt.legend(loc='upper left')
    plt.show()

In [0]:
def tech_indictor(data):

    # Simple Moving Average
    data['ma7'] = data['Close'].rolling(window=7).mean()
    data['ma21'] = data['Close'].rolling(window=21).mean()

    # MACD indicator
    data['ema12'] = data.Close.ewm(span=12, adjust=False).mean()
    data['ema26'] = data.Close.ewm(span=26, adjust=False).mean()
    data['dea'] = data.Close.ewm(span=9, adjust=False).mean()
    data['macd'] = data['ema12']-data['ema26']

    # Bollinger Band
    data['md'] = data.Close.rolling(window=20).std()
    data['upper_band'] = data['ma21'] + (data['md'] * 2)
    data['lower_band'] = data['ma21'] - (data['md'] * 2)

    # Momentum
    data['momentum'] = data['Close'] - 1

    # Exponential Moving Average
    data['ema'] = data['Close'].ewm(com=0.5).mean()

    return data

In [0]:
def fundamental_analysis:
  #TODO:

In [0]:
 def show_fourier_transform(comp_tic, days):
    df = pd.read_csv("/gdrive/My Drive/Blair/10/KWHS2019/hist_data/"+comp_tic+".csv")
    df = df[-days:]

    data_FT = df[['Date', 'Close']]
    fft_list = np.fft.fft(np.array(data_FT['Close']))

    plt.figure(figsize=(14, 7), dpi=100)

    for i in [3, 6, 9, 30]:
        fft_a = np.copy(fft_list)
        fft_a[i:-i] = 0
        plt.plot(np.fft.ifft(fft_a), label = 'Fourier Transform of %i components' % i)

    plt.plot(np.array(data_FT['Close']),  label='Real')
    plt.xlabel('Days')
    plt.ylabel('USD')
    plt.title('{} close stock prices + Fourier transforms of recent {} days'.format(comp_tic, days))
    plt.legend()
    plt.show()

In [0]:
def show_arima_predictions(comp_tic, days):

    df_1 = pd.read_csv("/gdrive/My Drive/Blair/10/KWHS2019/hist_data/"+comp_tic+".csv")
    # df_1 = pd.read_csv("/content/AAPL.csv")
    data = np.array(df_1[-days:]['Close'])

    model = ARIMA(data, order=(5, 1, 0))
    model_fit = model.fit(disp=0)
    print(model_fit.summary())
    autocorrelation_plot(data)  
    plt.figure(figsize=(10, 7), dpi=80)
    plt.show() 

    train, test = train_test_split(data, test_size=0.33, shuffle=False)

    history = [x for x in train]
    predictions = []

    # train data
    for i in range(len(test)): 
        model = ARIMA(history, order=(5,1,0))
        model_fit = model.fit(disp=0)
        output = model_fit.forecast()
        result = output[0]
        predictions.append(result)
        obs = test[i]
        history.append(obs)

    error = mean_squared_error(test, predictions) # calculate MSE
    print('Test MSE: %.3f' % error)

    plt.figure(figsize=(12, 6), dpi=100)
    plt.plot(test, label='Real')
    plt.plot(predictions, color='red', label='Predicted')
    plt.xlabel('Days')
    plt.ylabel('USD')
    plt.title('ARIMA model on {} stock'.format(comp_tic))
    plt.legend()
    plt.show()

In [0]:
def get_Weight(comp_tic, days):
    a = tech_indictor(df)[-days:]
    b = df[['ma7','ma21','ema12', 'ema26', 'md', 'macd','upper_band','lower_band']][-days:]
    data = b.copy()
    
    y = a['Close']
    X = data

    X_train_FI, X_test_FI, y_train_FI, y_test_FI = train_test_split(X, y, test_size=0.33, shuffle=False)

    regressor = xgb.XGBRegressor(gamma=0.0,
                                n_estimators=1500,
                                base_score=0.7,
                                colsample_bytree=1,
                                learning_rate=0.03)

    xgbModel = regressor.fit(X_train_FI,y_train_FI, \
                            eval_set = [(X_train_FI, y_train_FI), (X_test_FI, y_test_FI)], \
                            verbose=False)

    eval_result = regressor.evals_result()
    training_rounds = range(len(eval_result['validation_0']['rmse']))

    plt.scatter(x=training_rounds,y=eval_result['validation_0']['rmse'],label='Training Error')
    plt.scatter(x=training_rounds,y=eval_result['validation_1']['rmse'],label='Validation Error')
    plt.xlabel('Iterations')
    plt.ylabel('RMSE')
    plt.title('Training Vs Validation Error')
    plt.legend()
    plt.show()

    fig = plt.figure(figsize=(8,8))
    plt.xticks(rotation='vertical')
    plt.bar([i for i in range(len(xgbModel.feature_importances_))], xgbModel.feature_importances_.tolist(), tick_label=X_test_FI.columns)
    plt.title('Figure 6: Feature importance of the technical indicators.')
    plt.show()

In [0]:
#NN
#TODO:

In [0]:
#PCA
pca = PCA(n_components=.8)
x_pca = StandardScaler().fit_transform(vae_added_df)
principalComponents = pca.fit_transform(x_pca)
print(principalComponents.n_components_)

In [0]:
#neural network

def gpu_exists():
    try:
        mx.nd.zeros((1,), ctx=mx.gpu(0))
    except:
        return False
    return True

data_ctx = mx.cpu()
if gpu_exists():
    print('Using GPU for model_ctx')
    model_ctx = mx.gpu(0)
else:
    print('Using CPU for model_ctx')
    model_ctx = mx.cpu()


df = pd.read_csv("/content/AAPL.csv")
num_training_days = 365
data = np.array(df[-1000:]['Close'])

#------------------------------------这一段什么意思？？
VAE_data = data
batch_size = 64
n_batches = VAE_data.shape[0]/batch_size
VAE_data = VAE_data.values

train_iter = mx.io.NDArrayIter(data={'data': VAE_data[:num_training_days,:-1]}, \
                               label={'label': VAE_data[:num_training_days, -1]}, batch_size = batch_size)
test_iter = mx.io.NDArrayIter(data={'data': VAE_data[num_training_days:,:-1]}, \
                              label={'label': VAE_data[num_training_days:,-1]}, batch_size = batch_size)
# #-----------------------------------

model_ctx =  mx.gpu() #cpu()

class VAE(gluon.HybridBlock):
    def __init__(self, n_hidden=400, n_latent=2, n_layers=1, n_output=784, \
                 batch_size=100, act_type='relu', **kwargs):
        self.soft_zero = 1e-10
        self.n_latent = n_latent
        self.batch_size = batch_size
        self.output = None
        self.mu = None
        super(VAE, self).__init__(**kwargs)
        
        with self.name_scope():
            self.encoder = nn.HybridSequential(prefix='encoder')
            
            for i in range(n_layers):
                self.encoder.add(nn.Dense(n_hidden, activation=act_type))
            self.encoder.add(nn.Dense(n_latent*2, activation=None))

            self.decoder = nn.HybridSequential(prefix='decoder')
            for i in range(n_layers):
                self.decoder.add(nn.Dense(n_hidden, activation=act_type))
            self.decoder.add(nn.Dense(n_output, activation='sigmoid'))

    def hybrid_forward(self, F, x):
        h = self.encoder(x)
        #print(h)
        mu_lv = F.split(h, axis=1, num_outputs=2)
        mu = mu_lv[0]
        lv = mu_lv[1]
        self.mu = mu

        eps = F.random_normal(loc=0, scale=1, shape=(self.batch_size, self.n_latent), ctx=model_ctx)
        z = mu + F.exp(0.5*lv)*eps
        y = self.decoder(z)
        self.output = y

        KL = 0.5*F.sum(1+lv-mu*mu-F.exp(lv),axis=1)
        logloss = F.sum(x*F.log(y+self.soft_zero)+ (1-x)*F.log(1-y+self.soft_zero), axis=1)
        loss = -logloss-KL

        return loss

n_hidden=400 # neurons in each layer
n_latent=2 
n_layers=3 # num of dense layers in encoder and decoder respectively
n_output=VAE_data.shape[1]-1 

net = VAE(n_hidden=n_hidden, n_latent=n_latent, n_layers=n_layers, n_output=n_output, batch_size=batch_size, act_type='gelu')




AttributeError: ignored