In [2]:
# 1. Install libraries
!pip install numpy pandas tensorflow scikit-learn
!pip install finnhub-python

Collecting tensorflow
  Using cached tensorflow-2.16.1-cp311-cp311-macosx_10_15_x86_64.whl.metadata (4.1 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Using cached absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Using cached astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=23.5.26 (from tensorflow)
  Using cached flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Using cached gast-0.5.4-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Using cached google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting h5py>=3.10.0 (from tensorflow)
  Using cached h5py-3.11.0-cp311-cp311-macosx_10_9_x86_64.whl.metadata (2.5 kB)
Collecting libclang>=13.0.0 (from tensorflow)
  Using cached libclang-18.1.1-py2.py3-none-macosx_10_9_x86_64.whl.metadata (5.2 kB)
Collecting ml-dtypes~=0.3.1 (from tens

In [7]:
# 2. Import libaries
from numpy.random import seed
seed(1)
import tensorflow
tensorflow.random.set_seed(2)
import os
import sys
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import IsolationForest
from keras.models import *
from keras.layers import *
import pickle


if not sys.warnoptions:
    import warnings

    warnings.simplefilter("ignore")

In [12]:
# 3. Read data
def read_data():
    # SP500 + DOW30 + Nasdaq
    df_spy = pd.read_csv('../../data/sp500_max.csv')
    df_dowjones = pd.read_csv('../../data/dowjones_max.csv')
    df_nasdaq = pd.read_csv('../../data/nasdaq_max.csv')
    df_spy = df_spy.set_index('Date').add_suffix('_spy')
    df_dowjones = df_dowjones.set_index('Date').add_suffix('_dowjones')
    df_nasdaq = df_nasdaq.set_index('Date').add_suffix('_nasdaq')
    df = df_spy.merge(df_dowjones, left_index=True, right_index=True, how='inner')
    df = df.merge(df_nasdaq, left_index=True, right_index=True, how='inner')
    df = df.reset_index()
    df = df[['Open_spy', 'Close_spy', 'Volume_spy', 'Low_spy', 'High_spy', \
             'Open_dowjones', 'Close_dowjones', 'Volume_dowjones', 'Low_dowjones', 'High_dowjones', \
             'Open_nasdaq', 'Close_nasdaq', 'Volume_nasdaq', 'Low_nasdaq', 'High_nasdaq']]
    return df

def TA_analysis(df):
    # TA Analysis #1: finta
    from finta import TA
    for method in TA.__dict__.keys():
        if method.startswith('__'):
            continue
        else:
            try:
                df[method] = getattr(TA, method)(df)
            except:
                print(method, 'cannot be called')
    import pandas_ta as ta
    df.ta.indicators()
    # df.ta.log_return(cumulative=True, append=True)
    # df.ta.percent_return(cumulative=True, append=True)
    
    return df

In [13]:
# 4. Feature Engineering
# Get data in the Kera's format
def series_to_supervised(data, n_in=1, n_out=1, lead_time=0, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(lead_time, lead_time + n_out):
        cols.append(df.iloc[:, 1].shift(-i))
        if i == 0:
            names += [('var%d(t)' % (n_vars))]
        else:
            names += [('var%d(t+%d)' % (n_vars, i))]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

def feature_engineering(df, stage='TRAIN', model=None, feature_window=60, target_window=1, lead_time_window=0):
    # get time series data
    values = df.values
    # ensure all data is float
    values = values.astype('float32')
    # normalize features
    if stage == 'TRAIN':
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaler.fit(values)
    else:
        scaler = model['scaler']
    scaled = scaler.transform(values)
    print('scaled: ', scaled.shape)

    # frame as supervised learning
    if stage == 'TRAIN':
        df_reframed = series_to_supervised(scaled, feature_window, target_window, lead_time_window, True)
    else:
        df_reframed = series_to_supervised(scaled, feature_window, 0, 0, True)

    print(df_reframed.head(10))
    print(len(df_reframed))
    return scaler, df_reframed

def split_data(df_reframed, train_ratio=1.0, target_window=1):
    # split into train and test sets
    dataset = df_reframed.values
    train_size = int(len(dataset) * train_ratio)
    train, test = dataset[0:train_size, :], dataset[train_size:len(dataset), :]

    # split into input and outputs
    train_X, train_y = train[:, :-target_window], train[:, -target_window:]
    test_X, test_y = test[:, :-target_window], test[:, -target_window:]
    return train_X, train_y, test_X, test_y


def reshape(X, feature_window=60):
    # reshape input to be 3D [samples, timesteps, features]
    num_features = int(X.shape[1] / feature_window)
    X = X.reshape((X.shape[0], feature_window, num_features))
    return X

In [14]:
# 5. Modeling
def LSTM_auto_encoder(train_X):
    inputs_ae = Input(shape=(train_X.shape[1], train_X.shape[2]))
    encoded_ae = LSTM(128, return_sequences=True, dropout=0.3)(inputs_ae, training=True)
    decoded_ae = LSTM(32, return_sequences=True, dropout=0.3)(encoded_ae, training=True)
    out_ae = TimeDistributed(Dense(train_X.shape[2]))(decoded_ae)

    sequence_autoencoder = Model(inputs_ae, out_ae)
    sequence_autoencoder.compile(optimizer='adam', loss='mse', metrics=['mse'])
    sequence_autoencoder.summary()

    sequence_autoencoder.fit(train_X, train_X, batch_size=1024, epochs=50, verbose=2, shuffle=True)

    encoder = Model(inputs_ae, encoded_ae)
    return encoder


def LSTM_forecaster(train_X, train_y, target_window=15):
    input_fc = Input(shape=(train_X.shape[1], train_X.shape[2]))
    lstm_fc = LSTM(128, return_sequences=True, dropout=0.3)(input_fc, training=True)
    lstm_fc = LSTM(32, return_sequences=False, dropout=0.3)(lstm_fc, training=True)
    dense_fc = Dense(50)(lstm_fc)
    out_fc = Dense(target_window)(dense_fc)

    model_fc = Model(input_fc, out_fc)

    model_fc.compile(loss='mse', optimizer='adam', metrics=['mse'])
    model_fc.fit(train_X, train_y, epochs=50, batch_size=1024, verbose=2, shuffle=True)
    return model_fc

def train(train_X, train_y, target_window=15):
    encoder = LSTM_auto_encoder(train_X)
    train_X = encoder.predict(train_X)
    forecaster = LSTM_forecaster(train_X, train_y, target_window)
    return encoder, forecaster


def predict(model, df):
    y_pred = model.predict(df)
    return y_pred


def evaluate(y_pred, y_true):
    y_diff = y_pred - y_true
    y_diff_square = y_diff * y_diff
    y_rmse = [np.sqrt(np.mean(y_diff[:, i])) for i in range(y_true.shape[1])]
    y_mape = np.mean(abs(y_diff))
    print('Evaluation result - rmse: ', y_rmse, ' mape: ', y_mape)

In [15]:
# 6. Workflow
def workflow_train(data_freq='60s', train_ratio=1.0, feature_window=60, target_window=1, lead_time_window=0, result_path=None):
    from numpy.random import seed
    seed(1)
    import tensorflow
    tensorflow.random.set_seed(2)
    
    df = read_data()
    scaler, df_reframed = feature_engineering(df, stage='TRAIN', model=None, feature_window=feature_window, target_window=target_window, lead_time_window=lead_time_window)
    train_X, train_y, test_X, test_y = split_data(df_reframed, train_ratio=train_ratio, target_window=target_window)
    train_X = reshape(train_X, feature_window)
    encoder, forecaster = train(train_X, train_y, target_window=target_window)
    if train_ratio < 1.0:
        test_X = reshape(test_X, feature_window)
        y_pred_encoder = predict(encoder, test_X)
        y_pred_forecaster = predict(forecaster, y_pred_encoder)
        evaluate(y_pred_forecaster, test_y)
    model = {'scaler': scaler, 'encoder': encoder, 'forecaster': forecaster}
    pickle.dump(model, open(os.path.join(result_path, 'model.pkl'), 'wb'))
    
    return model


def workflow_predict(df, model):
    from numpy.random import seed
    seed(1)
    import tensorflow
    tensorflow.random.set_seed(2)
    
    _, test_X = feature_engineering(df, stage='PREDICT', model=model)
    test_X = reshape(test_X.values, feature_window=60)
    print("after reshape: ", test_X)
    y_pred_encoder = predict(model['encoder'], test_X)
    print("After encoder: ", y_pred_encoder)
    y_pred_forecaster = predict(model['forecaster'], y_pred_encoder)
    print("After forecaster: ", y_pred_forecaster)
    return y_pred_forecaster

In [16]:
# 7. Parameters
train_ratio=0.99
feature_window=60
target_window=1
lead_time_window=0
result_path='/Users/zhang_family_mac/Yongqiang/stock_prediction/result'

In [17]:
# 8. Main function
def main():
    workflow_train(train_ratio=train_ratio,
                   feature_window=feature_window,
                   target_window=target_window,
                   lead_time_window=lead_time_window,
                   result_path=result_path)

In [None]:
if __name__ == '__main__':
    main()

scaled:  (8881, 15)
    var1(t-60)  var2(t-60)  var3(t-60)  var4(t-60)  var5(t-60)  var6(t-60)  \
60    0.000272    0.000826    0.008802    0.000016    0.000476    0.001206   
61    0.000827    0.000891    0.013548    0.000787    0.000812    0.001903   
62    0.000893    0.000966    0.010271    0.000634    0.000675    0.001402   
63    0.000968    0.000654    0.007902    0.000596    0.000613    0.001179   
64    0.000655    0.001190    0.008628    0.000381    0.000836    0.001006   
65    0.001192    0.001271    0.011267    0.001105    0.001203    0.001786   
66    0.001273    0.001215    0.011014    0.001184    0.001194    0.001460   
67    0.001217    0.001648    0.011949    0.001218    0.001337    0.001464   
68    0.001651    0.001763    0.008872    0.001605    0.001471    0.001645   
69    0.001767    0.001240    0.007780    0.001118    0.001409    0.001570   

    var7(t-60)  var8(t-60)  var9(t-60)  var10(t-60)  ...  var7(t-1)  \
60    0.001786    0.005040    0.001113     0.00156

Epoch 1/50
9/9 - 18s - 2s/step - loss: 0.0657 - mse: 0.0657
Epoch 2/50
9/9 - 10s - 1s/step - loss: 0.0231 - mse: 0.0231
Epoch 3/50
9/9 - 10s - 1s/step - loss: 0.0142 - mse: 0.0142
Epoch 4/50
9/9 - 9s - 1s/step - loss: 0.0111 - mse: 0.0111
Epoch 5/50
9/9 - 10s - 1s/step - loss: 0.0097 - mse: 0.0097
Epoch 6/50
9/9 - 9s - 1s/step - loss: 0.0086 - mse: 0.0086
Epoch 7/50
9/9 - 10s - 1s/step - loss: 0.0082 - mse: 0.0082
Epoch 8/50
9/9 - 11s - 1s/step - loss: 0.0077 - mse: 0.0077
Epoch 9/50
9/9 - 11s - 1s/step - loss: 0.0073 - mse: 0.0073
Epoch 10/50
9/9 - 11s - 1s/step - loss: 0.0068 - mse: 0.0068
Epoch 11/50
9/9 - 20s - 2s/step - loss: 0.0068 - mse: 0.0068
Epoch 12/50
9/9 - 10s - 1s/step - loss: 0.0065 - mse: 0.0065
Epoch 13/50
9/9 - 11s - 1s/step - loss: 0.0061 - mse: 0.0061
Epoch 14/50
9/9 - 10s - 1s/step - loss: 0.0057 - mse: 0.0057
Epoch 15/50
9/9 - 11s - 1s/step - loss: 0.0055 - mse: 0.0055
Epoch 16/50
9/9 - 10s - 1s/step - loss: 0.0051 - mse: 0.0051
Epoch 17/50
9/9 - 10s - 1s/step - l

In [None]:
import finnhub

# Setup client
finnhub_client = finnhub.Client(api_key="YOUR API KEY")

# Stock candles
res = finnhub_client.stock_candles('AAPL', 'D', 1590988249, 1591852249)
print(res)

#Convert to Pandas Dataframe
import pandas as pd
print(pd.DataFrame(res))

# Aggregate Indicators
print(finnhub_client.aggregate_indicator('AAPL', 'D'))

# Basic financials
print(finnhub_client.company_basic_financials('AAPL', 'all'))

# Earnings surprises
print(finnhub_client.company_earnings('TSLA', limit=5))

# EPS estimates
print(finnhub_client.company_eps_estimates('AMZN', freq='quarterly'))

# Company Executives
print(finnhub_client.company_executive('AAPL'))

# Company News
# Need to use _from instead of from to avoid conflict
print(finnhub_client.company_news('AAPL', _from="2020-06-01", to="2020-06-10"))

# Company Peers
print(finnhub_client.company_peers('AAPL'))

# Company Profile
print(finnhub_client.company_profile(symbol='AAPL'))
print(finnhub_client.company_profile(isin='US0378331005'))
print(finnhub_client.company_profile(cusip='037833100'))

# Company Profile 2
print(finnhub_client.company_profile2(symbol='AAPL'))

# Revenue Estimates
print(finnhub_client.company_revenue_estimates('TSLA', freq='quarterly'))

# List country
print(finnhub_client.country())

# Crypto Exchange
print(finnhub_client.crypto_exchanges())

# Crypto symbols
print(finnhub_client.crypto_symbols('BINANCE'))

# Economic data
print(finnhub_client.economic_data('MA-USA-656880'))

# Filings
print(finnhub_client.filings(symbol='AAPL', _from="2020-01-01", to="2020-06-11"))

# Financials
print(finnhub_client.financials('AAPL', 'bs', 'annual'))

# Financials as reported
print(finnhub_client.financials_reported(symbol='AAPL', freq='annual'))

# Forex exchanges
print(finnhub_client.forex_exchanges())

# Forex all pairs
print(finnhub_client.forex_rates(base='USD'))

# Forex symbols
print(finnhub_client.forex_symbols('OANDA'))

# Fund Ownership
print(finnhub_client.fund_ownership('AMZN', limit=5))

# General news
print(finnhub_client.general_news('forex', min_id=0))

# Investors ownership
print(finnhub_client.ownership('AAPL', limit=5))

# IPO calendar
print(finnhub_client.ipo_calendar(_from="2020-05-01", to="2020-06-01"))

# Major developments
print(finnhub_client.press_releases('AAPL', _from="2020-01-01", to="2020-12-31"))

# News sentiment
print(finnhub_client.news_sentiment('AAPL'))

# Pattern recognition
print(finnhub_client.pattern_recognition('AAPL', 'D'))

# Price target
print(finnhub_client.price_target('AAPL'))

# Quote
print(finnhub_client.quote('AAPL'))

# Recommendation trends
print(finnhub_client.recommendation_trends('AAPL'))

# Stock dividends
print(finnhub_client.stock_dividends('KO', _from='2019-01-01', to='2020-01-01'))

# Stock dividends 2
print(finnhub_client.stock_basic_dividends("KO"))

# Stock symbols
print(finnhub_client.stock_symbols('US')[0:5])

# Transcripts
print(finnhub_client.transcripts('AAPL_162777'))

# Transcripts list
print(finnhub_client.transcripts_list('AAPL'))

# Earnings Calendar
print(finnhub_client.earnings_calendar(_from="2020-06-10", to="2020-06-30", symbol="", international=False))

# Covid-19
print(finnhub_client.covid19())

# Upgrade downgrade
print(finnhub_client.upgrade_downgrade(symbol='AAPL', _from='2020-01-01', to='2020-06-30'))

# Economic code
print(finnhub_client.economic_code()[0:5])

# Economic calendar
print(finnhub_client.calendar_economic('2021-01-01', '2021-01-07'))

# Support resistance
print(finnhub_client.support_resistance('AAPL', 'D'))

# Technical Indicator
print(finnhub_client.technical_indicator(symbol="AAPL", resolution='D', _from=1583098857, to=1584308457, indicator='rsi', indicator_fields={"timeperiod": 3}))

# Stock splits
print(finnhub_client.stock_splits('AAPL', _from='2000-01-01', to='2020-01-01'))

# Forex candles
print(finnhub_client.forex_candles('OANDA:EUR_USD', 'D', 1590988249, 1591852249))

# Crypto Candles
print(finnhub_client.crypto_candles('BINANCE:BTCUSDT', 'D', 1590988249, 1591852249))

# Tick Data
print(finnhub_client.stock_tick('AAPL', '2020-03-25', 500, 0))

# BBO Data
print(finnhub_client.stock_nbbo("AAPL", "2020-03-25", 500, 0))

# Indices Constituents
print(finnhub_client.indices_const(symbol = "^GSPC"))

# Indices Historical Constituents
print(finnhub_client.indices_hist_const(symbol = "^GSPC"))

# ETFs Profile
print(finnhub_client.etfs_profile('SPY'))
print(finnhub_client.etfs_profile(isin="US78462F1030"))

# ETFs Holdings
print(finnhub_client.etfs_holdings('SPY'))
print(finnhub_client.etfs_holdings(isin="US00214Q1040", skip=2))
print(finnhub_client.etfs_holdings("IPO", date='2022-03-10'))

# ETFs Sector Exposure
print(finnhub_client.etfs_sector_exp('SPY'))

# ETFs Country Exposure
print(finnhub_client.etfs_country_exp('SPY'))

# International Filings
print(finnhub_client.international_filings('RY.TO'))
print(finnhub_client.international_filings(country='GB'))

# SEC Sentiment Analysis
print(finnhub_client.sec_sentiment_analysis('0000320193-20-000052'))

# SEC similarity index
print(finnhub_client.sec_similarity_index('AAPL'))

# Bid Ask
print(finnhub_client.last_bid_ask('AAPL'))

# FDA Calendar
print(finnhub_client.fda_calendar())

# Symbol lookup
print(finnhub_client.symbol_lookup('apple'))

# Insider transactions
print(finnhub_client.stock_insider_transactions('AAPL', '2021-01-01', '2021-03-01'))

# Mutual Funds Profile
print(finnhub_client.mutual_fund_profile("VTSAX"))
print(finnhub_client.mutual_fund_profile(isin="US9229087286"))

# Mutual Funds Holdings
print(finnhub_client.mutual_fund_holdings("VTSAX"))
print(finnhub_client.mutual_fund_holdings(isin="US9229087286", skip=2))

# Mutual Funds Sector Exposure
print(finnhub_client.mutual_fund_sector_exp("VTSAX"))

# Mutual Funds Country Exposure
print(finnhub_client.mutual_fund_country_exp("VTSAX"))

# Revenue breakdown
print(finnhub_client.stock_revenue_breakdown('AAPL'))

# Social sentiment
print(finnhub_client.stock_social_sentiment('GME'))

# Investment Themes
print(finnhub_client.stock_investment_theme('financialExchangesData'))

# Supply chain
print(finnhub_client.stock_supply_chain('AAPL'))

# Company ESG
print(finnhub_client.company_esg_score("AAPL"))

# Earnings Quality Score
print(finnhub_client.company_earnings_quality_score('AAPL', 'quarterly'))

# Crypto Profile
print(finnhub_client.crypto_profile('BTC'))

# EBITDA Estimates
print(finnhub_client.company_ebitda_estimates("TSLA", freq="quarterly"))

# EBIT Estimates
print(finnhub_client.company_ebit_estimates("TSLA", freq="quarterly"))

# USPTO Patent
print(finnhub_client.stock_uspto_patent("AAPL", "2021-01-01", "2021-12-31"))

# Visa application
print(finnhub_client.stock_visa_application("AAPL", "2021-01-01", "2022-06-15"))

# Insider sentiment
print(finnhub_client.stock_insider_sentiment('AAPL', '2021-01-01', '2022-03-01'))

# Bond Profile
print(finnhub_client.bond_profile(isin='US912810TD00'))

# Bond price
print(finnhub_client.bond_price('US912810TD00', 1590988249, 1649099548))

# Lobbying
print(finnhub_client.stock_lobbying("AAPL", "2021-01-01", "2022-06-15"))

# USA Spending
print(finnhub_client.stock_usa_spending("LMT", "2021-01-01", "2022-06-15"))

# Sector metrics
print(finnhub_client.sector_metric('NA'))

## Fund's EET data
print(finnhub_client.mutual_fund_eet('LU2036931686'))
print(finnhub_client.mutual_fund_eet_pai('LU2036931686'))

# Symbol & ISIN change
print(finnhub_client.isin_change(_from='2022-10-01', to='2022-10-07'))
print(finnhub_client.symbol_change(_from='2022-10-01', to='2022-10-07'))

# 13-F data
print(finnhub_client.institutional_profile())
print(finnhub_client.institutional_portfolio(cik='1000097', _from='2022-01-01', to='2022-10-07'))
print(finnhub_client.institutional_ownership('TSLA', '', _from='2022-01-01', to='2022-10-07'))

# Bond yield and FINRA Trace tick
print(finnhub_client.bond_yield_curve('10y'))
print(finnhub_client.bond_tick('US693475BF18', '2022-08-19', 500, 0, 'trace'))

# Congressional Trading
print(finnhub_client.congressional_trading('AAPL', '2020-01-01', '2023-03-31'))

# Price metrics with historical data
print(finnhub_client.price_metrics(symbol="AAPL", date="2022-01-01"))

## Market Holday / Status
print(finnhub_client.market_holiday(exchange='US'))
print(finnhub_client.market_status(exchange='US'))

In [2]:
import finnhub

# Setup client
finnhub_client = finnhub.Client(api_key="co42a2hr01qqksebiak0co42a2hr01qqksebiakg")

In [4]:
print(finnhub_client.quote('AAPL'))

{'c': 171.48, 'd': -1.83, 'dp': -1.0559, 'h': 172.23, 'l': 170.51, 'o': 171.7, 'pc': 173.31, 't': 1711656000}


In [5]:
print(finnhub_client.financials('AAPL', 'bs', 'annual'))

FinnhubAPIException: FinnhubAPIException(status_code: 403): You don't have access to this resource.

In [6]:
res = finnhub_client.stock_candles('AAPL', 'D', 1590988249, 1591852249)
print(res)

FinnhubAPIException: FinnhubAPIException(status_code: 403): You don't have access to this resource.

In [10]:
df_spy = pd.read_csv('../../data/sp500_max.csv')
df_dowjones = pd.read_csv('../../data/dowjones_max.csv')
df_nasdaq = pd.read_csv('../../data/nasdaq_max.csv')
df_spy = df_spy.set_index('Date').add_suffix('_spy')
df_dowjones = df_dowjones.set_index('Date').add_suffix('_dowjones')
df_nasdaq = df_nasdaq.set_index('Date').add_suffix('_nasdaq')

In [11]:
df_spy

Unnamed: 0_level_0,Open_spy,High_spy,Low_spy,Close_spy,Adj Close_spy,Volume_spy
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1927-12-30,17.660000,17.660000,17.660000,17.660000,17.660000,0
1928-01-03,17.760000,17.760000,17.760000,17.760000,17.760000,0
1928-01-04,17.719999,17.719999,17.719999,17.719999,17.719999,0
1928-01-05,17.549999,17.549999,17.549999,17.549999,17.549999,0
1928-01-06,17.660000,17.660000,17.660000,17.660000,17.660000,0
...,...,...,...,...,...,...
2020-04-17,2842.429932,2879.219971,2830.879883,2874.560059,2874.560059,5792140000
2020-04-20,2845.620117,2868.979980,2820.429932,2823.159912,2823.159912,5220160000
2020-04-21,2784.810059,2785.540039,2727.100098,2736.560059,2736.560059,5075830000
2020-04-22,2787.889893,2815.100098,2775.949951,2799.310059,2799.310059,5049660000


# Only SP500
df = pd.read_csv('/Users/zhang_family_mac/Yongqiang/stock_prediction/data/sp500_max.csv')
df.columns = map(str.lower, df.columns)
df = df[['open', 'close', 'volume', 'low', 'high']]

high_max = df['Close'].max()
high_min = df['Close'].min() 
y_pred = y_pred * (high_max - high_min) + high_min
df_final = pd.DataFrame({'y_pred': y_pred.ravel(), 'y_true': df.loc[60:, 'Close'].values})
df_final['diff'] = df_final['y_pred'] - df_final['y_true']
import math
rmse = math.sqrt((df_final['diff'] * df_final['diff']).mean())
print('RMSE: ', rmse)
error_ratio = rmse / df_final['y_true'].mean()
print('error ratio: ', error_ratio)