# Time Series Prediction

Objectives
 - Learn how to build a Keras model for time series prediction and deploy on AI Platform
 
In this lab we will build a custom Keras model

In [None]:
PROJECT = "munn-sandbox"  # Replace with your PROJECT
BUCKET = "munn-sandbox"  # Replace with your BUCKET
REGION = "us-east1"            # Choose an available region for Cloud MLE

In [None]:
import os
os.environ["PROJECT"] = PROJECT
os.environ["BUCKET"] = BUCKET
os.environ["REGION"] = REGION

## Create data and make train/valid/test split

In [None]:
def create_query(phase, sample_size):
    basequery = """
    SELECT
      symbol,
      Date,
      Open,
      Close,
      tomorrow_close,
      tomo_close_m_close,
      close_MIN_prior_5_days,
      close_MIN_prior_20_days,
      close_MIN_prior_260_days,
      close_MAX_prior_5_days,
      close_MAX_prior_20_days,
      close_MAX_prior_260_days,
      close_AVG_prior_5_days,
      close_AVG_prior_20_days,
      close_AVG_prior_260_days,
      close_STDDEV_prior_5_days,
      close_STDDEV_prior_20_days,
      close_STDDEV_prior_260_days,
      direction
    FROM
      `munn-sandbox.stock_market.percent_change_sp500`
    WHERE
      tomorrow_close IS NOT NULL
      AND tomo_close_m_close IS NOT NULL
      AND MOD(ABS(FARM_FINGERPRINT(symbol)), EVERY_N) = 1
    """

    if phase == "TRAIN":
        subsample = """
        AND MOD(ABS(FARM_FINGERPRINT(symbol)), EVERY_N * 100) >= (EVERY_N * 0)
        AND MOD(ABS(FARM_FINGERPRINT(symbol)), EVERY_N * 100) <  (EVERY_N * 70)
        """
    elif phase == "VALID":
        subsample = """
        AND MOD(ABS(FARM_FINGERPRINT(symbol)), EVERY_N * 100) >= (EVERY_N * 70)
        AND MOD(ABS(FARM_FINGERPRINT(symbol)), EVERY_N * 100) <  (EVERY_N * 85)
        """
    elif phase == "TEST":
        subsample = """
        AND MOD(ABS(FARM_FINGERPRINT(symbol)), EVERY_N * 100) >= (EVERY_N * 85)
        AND MOD(ABS(FARM_FINGERPRINT(symbol)), EVERY_N * 100) <  (EVERY_N * 100)
        """

    query = basequery + subsample
    return query.replace("EVERY_N", sample_size)

In [None]:
from google.cloud import bigquery
bq = bigquery.Client(project=PROJECT)

for phase in ["TRAIN", "VALID", "TEST"]:
    # 1. Create query string
    query_string = create_query(phase, "50")
    # 2. Load results into DataFrame
    df = bq.query(query_string).to_dataframe()

    # 3. Write DataFrame to CSV
    df.to_csv("stock_market-{}.csv".format(phase.lower()), index_label = False, index = False)
    print("Wrote {} lines to {}".format(len(df), "stock_market-{}.csv".format(phase.lower())))

In [None]:
!head *.csv