In [None]:
import os
import csv
import glob
import json
import numpy as np
np.set_printoptions(threshold = np.inf)
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models, losses, optimizers, metrics, callbacks

In [None]:
MAX_SPLIT_SIZE = 24
SAVE_DIR = "results/"
if not os.path.exists(SAVE_DIR):
    os.mkdir(SAVE_DIR)

In [None]:
def read_csv(csv_path):
    datas = list()

    with open(csv_path, "r") as f:
        r = csv.reader(f)

        for i, R in enumerate(r):
            if i == 0: continue

            if i == 1:
                next_peak = int(float(R[2]))
                continue

            peak = int(float(R[2]))
            gain = next_peak - peak
            gain_percent = (next_peak - peak) / peak * 100

            tmp = {
                "year" : int(R[0]),
                "month" : int(R[1]),
                "peak" : peak,
                "price" : int(R[4]),
                "steam_online" : int(R[5]),
                "gain" : gain,
                "gain_percent" : gain_percent
            }
            datas.append(tmp)

            next_peak = peak

    return list(reversed(datas))

def standardize_data(datas):
    items = list(datas[-1].keys())

    mean_items = {
        I : np.average([D[I] for D in datas]) for I in items
    }

    std_items = {
        I : np.std([D[I] for D in datas]) for I in items
    }

    sd_datas = list()
    for D in datas:
        tmp = dict()
        for I in items:
            if std_items[I] == 0:
                tmp[I] = 0
            else:
                tmp[I] = (D[I] - mean_items[I]) / std_items[I]
        sd_datas.append(tmp)

    return sd_datas, mean_items, std_items

In [None]:
class MyMetric(metrics.Metric):
    def __init__(self):
        super().__init__(name = "MyMetric")
        self.sum = self.add_weight(name = "sum", initializer = "zeros")
        self.count = self.add_weight(name = "count", initializer = "zeros")

    def update_state(self, y_true, y_pred, sample_weight = None):
        tf.print(y_true.shape, y_pred.shape)
        self.count.assign_add(tf.cast(tf.shape(y_true)[0], tf.float32))
        tmp = tf.math.reduce_sum(tf.math.pow(y_true[:, -1] - y_pred[:, -1], 2))
        self.sum.assign_add(tmp)

    def result(self):
        tf.print(self.count)
        return self.sum / 10 # self.count

    def reset_state(self):
        self.sum.assign(0)
        self.count.assign(0)

    def reset_states(self):
        self.sum.assign(0)
        self.count.assign(0)

In [None]:
csgo_datas = read_csv("csgo_dataset.csv")
sd_csgo_datas, mean_csgo, std_csgo = standardize_data(csgo_datas)

dota2_datas = read_csv("dota2_dataset.csv")
sd_dota2_datas, mean_dota2, std_dota2 = standardize_data(dota2_datas)

rust_datas = read_csv("Rust_dataset.csv")
sd_rust_datas, mean_rust, std_rust = standardize_data(rust_datas)

In [None]:
inputs = list()
teacher_signals = list()

for i in range(MAX_SPLIT_SIZE, len(sd_csgo_datas)):
    tmp_inputs = list()
    tmp_teacher_signals = list()

    for D in sd_csgo_datas[i - MAX_SPLIT_SIZE : i]:
        tmp = list()
        tmp.append( D["peak"] )
        tmp.append( D["price"] )
        tmp.append( D["steam_online"] )

        tmp_inputs.append(tmp)
        tmp_teacher_signals.append(D["gain_percent"])

    inputs.append( tmp_inputs )
    teacher_signals.append( tmp_teacher_signals )

inputs = np.array(inputs)
teacher_signals = np.array(teacher_signals)

In [None]:
train_index, test_index = train_test_split(list(range(len(inputs))), test_size = 0.1)
train_inputs, train_teacher_signals = inputs[train_index], teacher_signals[train_index]
test_inputs, test_teacher_signals = inputs[test_index], teacher_signals[test_index]

#
train_inputs.shape, train_teacher_signals.shape, test_inputs.shape, test_teacher_signals.shape

((85, 24, 3), (85, 24), (10, 24, 3), (10, 24))

return_sequence : True

batch_size : フルバッチ

中間層 : 1層

In [None]:
return_sequence = True
batch_size = len(train_index)
num_middle = 1

In [None]:
for i in tqdm(range(MAX_SPLIT_SIZE, 0, -1)):

    target_dir = f"{SAVE_DIR}RNN{num_middle}_{return_sequence}_{batch_size}_{i}/"
    if os.path.exists(target_dir):
        continue
    os.mkdir(target_dir)

    tmp_train_inputs = train_inputs[:, -i:, :]
    tmp_train_teacher_signals = train_teacher_signals[:, -i:]
    tmp_test_inputs = test_inputs[:, -i:, :]
    tmp_test_teacher_signals = test_teacher_signals[:, -i:]

    def build_model():
        input = layers.Input(shape = tmp_train_inputs.shape[1:])
        x = layers.SimpleRNN(units = 1024, activation = "relu", return_sequences = return_sequence)(input)
        output = layers.Dense(units = 1, activation = "linear")(x)

        return models.Model(input, output)

    mcp = callbacks.ModelCheckpoint(filepath = f"{target_dir}/model.keras",
                                    monitor = "val_loss",
                                    mode = "min",
                                    save_best_only = True)
    model = build_model()
    model.compile(loss = losses.MeanSquaredError(),
                  optimizer = optimizers.Adam(learning_rate = 0.0001),
                  metrics = MyMetric())
    history = model.fit(x = tmp_train_inputs, y = tmp_train_teacher_signals, batch_size = batch_size, epochs = 1,
                        validation_data = (tmp_test_inputs, tmp_test_teacher_signals), callbacks = [mcp])

    with open(f"{target_dir}/history.json", "w") as f:
        json.dump(history.history, f)

    break

  0%|          | 0/24 [00:00<?, ?it/s]

TensorShape([85, 19]) TensorShape([85, 19, 1])
85
10


 21%|██        | 5/24 [00:01<00:05,  3.78it/s]


In [None]:
history.history

{'loss': [0.8092679977416992],
 'MyMetric': [448.5855407714844],
 'val_loss': [0.45748835802078247],
 'val_MyMetric': [4.928160190582275]}

In [None]:
p = model(tmp_test_inputs)[:, :, 0]
a, b = tmp_test_teacher_signals[:, -1], p[:, -1]
A, B = tmp_test_teacher_signals, p

In [None]:
p.shape

TensorShape([10, 19])

In [None]:
tf.shape(p)

<tf.Tensor: shape=(2,), dtype=int32, numpy=array([10, 19], dtype=int32)>

In [None]:
losses.MeanSquaredError()(a, b)

<tf.Tensor: shape=(), dtype=float32, numpy=0.44242358>

In [None]:
tf.math.reduce_sum(tf.math.pow(a - b, 2)) / len(a)

<tf.Tensor: shape=(), dtype=float32, numpy=0.44242358>

In [None]:
c = MyMetric()
c.update_state(A, B)
c.result()

(10, 19) TensorShape([10, 19])
10


<tf.Tensor: shape=(), dtype=float32, numpy=0.44242358>

In [None]:
model.train_on_batch(tmp_test_inputs, tmp_test_teacher_signals)

TensorShape([None, 19]) TensorShape([None, 19, 1])
10


[0.45748835802078247, 4.928160190582275]