In [3]:
%load_ext autoreload
%autoreload 2

from FP567_Lib import *

import tensorflow as tf
from tensorflow import keras
import matplotlib as mpl
import matplotlib.pyplot as plt
import pathlib
import os
import glob
import json
import numpy as np
import pandas as pd

# improve/change plot appearance
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [66]:
'''
We have all the files in resources/market_item_data/, which are item infos in the form of:
{ 
    item_id : 
    [
        [date/time_n+1, price_at_time_n+1, amount_sold_at_time_n+1], [date/time_n+2, price_at_time_n+2, amount_sold_at_time_n+2], ..., [date/time_n+m, price_at_time_n+m, amount_sold_at_time_n+m]
    ]
}
for some amount of time.
The amount of time varies between items, as not all items have existed as long as others.

Let's make a Market object, which computes a bunch of the info we want.
'''
market = Market()

In [3]:
'''
So, lets extend all the items that are not highest_unit_time worth of info, backwards in time,
so that the items that do not have as many as highest_unit_time, now have highest_unit_time
amount of info, with the time stamp, but just 0, 0 for those days.

To do that, we can call the balance method of the Market object, using the 
longest time span of unix times, and 0, 0 as the default amount sold and price
'''
market.balance_as_is(0, 0)
market.is_balanced()

True

In [114]:
'''
Now we have a balanced market and want to include updates into a matrix in the below form.
Notice how it is essentially m matrices, where each matrix represents a unit of time,
appended onto one another from left to right, and is n rows by k+2 cols = amount_sold col + price col + k embedded update cols
        | unix_time_0                                                                                                       | unix_time_1                                                                                                       |     | unix_time_m 
________| amount_sold_0 | price_0 | update_unix_time_0_feat_1 | update_unix_time_0_feat_2 | ... | update_unix_time_0_feat_k | amount_sold_1 | price_1 | update_unix_time_1_feat_1 | update_unix_time_1_feat_2 | ... | update_unix_time_1_feat_k | ... | amount_sold_m | price_m | update_unix_time_m_feat_1 | update_unix_time_m_feat_2 | ... | update_unix_time_m_feat_k |
item_1  |               |         |                           |                           | ... |                           |               |         |                           |                           | ... |                           | ... |               |         |                           |                           | ... |                           |
item_2  |               |         |                           |                           | ... |                           |               |         |                           |                           | ... |                           | ... |               |         |                           |                           | ... |                           |
.       |       .           .                   .                       .                   ...               .                    .             .                .                            .                ...               .                           .           .                  .                           .                ...               .             | 
.       |       .           .                   .                       .                   ...               .                    .             .                .                            .                ...               .                           .           .                  .                           .                ...               .             | 
.       |       .           .                   .                       .                   ...               .                    .             .                .                            .                ...               .                           .           .                  .                           .                ...               .             | 
item_n  |               |         |                           |                           | ... |                           |               |         |                           |                           | ... |                           |     |               |         |                           |                           | ... |                           |
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

Alot of them will be zeros.
There will be zeros for
    update_unix_time_i_feat_j for all j when there was no update for unix_time_i.
    an item's amount_sold_i and price_i when that item was not being sold for unix_time_i.

So, need to build a matrix of all the items info and tack on the embedded update.
Lets build it so that it goes embedded update cols, amount sold col, price col, so that later,
when we do forcasting and a day's worth of cols are forcasted, the price will be the last col
of the output and thus, easier to quickly spot the forcasted price.
'''
market.build_features_matrix()

In [None]:
'''
Save the features matrix so we dont have to keep making it.
'''
market.save_market_with_updates_rep_as_csv(PATH_TO_ASSEMBLED_FORCASTING_MATRIX)

In [5]:
'''
Train a forcasting model.
'''
df, forcasted_day_len = get_forcasting_market_df(True)

In [13]:
total_days = df.shape[1]/forcasted_day_len
if not total_days.is_integer() or df.shape[1] % int(total_days) != 0:
    raise Exception(
        "The market matrix is malformed. Total days =",
         int(total_days),
        "and forcasted day length =",
        forcasted_day_len,
        "and total cols in the market =", df.shape[1])

num_training_days = int(total_days*0.8)
num_validation_days = total_days - num_training_days

num_training_cols = num_training_days*forcasted_day_len
num_validation_cols = num_validation_days*forcasted_day_len

training_days_df = df.loc[:, :num_training_cols]
validation_days_df = df.loc[:, num_training_cols:num_validation_cols]

In [None]:
tf.concat(tf.convert_to_tensor(df.loc[:, :100], dtype=np.float16), tf.convert_to_tensor(df.loc[:, :100], in))

In [18]:
t1=tf.convert_to_tensor(df.loc[:, :100], dtype=np.float16)
t2=tf.convert_to_tensor(df.loc[:, :100], dtype=np.float16)

In [33]:
t_stack=tf.stack([t1,t2], axis=0)

In [None]:
t_stack

In [None]:
'''
Find a days at a time to predict value that divides evenly into both the validations
and training days (hopefully)
'''
window_size_to_days_to_predict_scale = 12
days_at_a_time_to_predict = 7 # just do 4 if none found
for i in range(3, 10):
    if i % num_validation_days == i % num_training_days == 0:
        days_at_a_time_to_predict = i
        break

'''
Say days_window_size worth of columns are equal to predict_size worth of columns,
then slide the window predict_size columns and repeat over and over.
'''
days_window_size = days_at_a_time_to_predict * window_size_to_days_to_predict_scale * forcasted_day_len
predict_size = days_at_a_time_to_predict*forcasted_day_len
window_start = 0

X_training_tensor_stack = tf.convert_to_tensor(
    training_days_df.loc[:, window_start:predict_size], dtype=np.float16)
Y_training_tensor_stack = tf.
window_start = window_start + predict_size
while window_start < training_days_df.shape[1]:
    X_training_tensor_stack = tf.stack(
        [X_training_tensor_stack, tf.convert_to_tensor(training_days_df.loc[:, window_start:predict_size], dtype=np.float16)],
        axis=0)
    Y_training_tensor_stack = tf.
    window_start = window_start + predict_size