In [1]:
import pandas as pd
import numpy as np
from loguru import logger
import torch
import random
from creme import utils
from torch.autograd import Variable

In [2]:
import math
from torch import nn
import torch.nn.functional as F
from loguru import logger

In [3]:
sarsa_frame = pd.DataFrame({
    "state": [np.random.uniform(size=(1, 34)) for x in range(10)],
    "action": [np.array([random.randint(0, 2), random.uniform(0, 1)]) for x in range(10)],
    "reward": [random.uniform(0, 100) for x in range(10)],
})

In [4]:
loss_frame = pd.DataFrame({
    "reward_hat": [random.uniform(0, 100) for x in range(10)],
    "reward": [random.uniform(0, 100) for x in range(10)],
})

In [5]:
prediction_frame = pd.DataFrame({
    "state": [np.random.uniform(size=(1, 34)) for x in range(10)],
    "action": [np.array([random.randint(0, 2), random.uniform(0, 1)]) for x in range(10)],
})

In [6]:
loss_frame

Unnamed: 0,reward_hat,reward
0,90.083318,8.553471
1,72.243942,5.524772
2,92.082176,49.882096
3,92.845944,85.445681
4,28.382103,39.459658
5,22.780694,31.145685
6,99.68695,12.982061
7,18.362692,9.395369
8,81.874773,18.665016
9,83.417835,54.672237


In [7]:
sarsa_frame

Unnamed: 0,state,action,reward
0,"[[0.5469649930347463, 0.34557819127701916, 0.7...","[0.0, 0.6088514671258236]",89.857153
1,"[[0.018427252349868528, 0.9847573885495392, 0....","[2.0, 0.41370929877425944]",42.467335
2,"[[0.4565701830232022, 0.6528485240346409, 0.40...","[0.0, 0.25727442881168183]",0.28624
3,"[[0.6893865368979512, 0.9237895398209094, 0.70...","[1.0, 0.9617725482372111]",65.347457
4,"[[0.0031736393497587834, 0.8433774081312341, 0...","[0.0, 0.5652516928110436]",70.111248
5,"[[0.28686701386724, 0.5340788317986376, 0.9295...","[0.0, 0.800461228278516]",73.173202
6,"[[0.4396440672567813, 0.24486094213544274, 0.1...","[1.0, 0.9718301808990636]",93.04158
7,"[[0.035089443825667255, 0.9597712091909386, 0....","[0.0, 0.9110745152539236]",71.975529
8,"[[0.3001283225072773, 0.879137409907724, 0.631...","[2.0, 0.5825325633380334]",94.268932
9,"[[0.2542767568132376, 0.8336136192354718, 0.41...","[0.0, 0.9294824846994743]",43.169529


In [8]:
prediction_frame

Unnamed: 0,state,action
0,"[[0.9321896405860199, 0.2090376845478461, 0.30...","[2.0, 0.7451409696663028]"
1,"[[0.669969573300711, 0.21918374692935216, 0.30...","[2.0, 0.4336799577129017]"
2,"[[0.26756694632039923, 0.17451233510070907, 0....","[1.0, 0.021396058979906152]"
3,"[[0.4315431330994898, 0.8750624140959447, 0.67...","[2.0, 0.8765005069050824]"
4,"[[0.44229514384623825, 0.22235317024128853, 0....","[1.0, 0.5341199625132242]"
5,"[[0.8323055793703098, 0.26318446551648245, 0.8...","[1.0, 0.9552905394892822]"
6,"[[0.9739394656640668, 0.755013992433295, 0.100...","[1.0, 0.0663412472763314]"
7,"[[0.6724519668389645, 0.7129238364138337, 0.41...","[2.0, 0.27321508984929455]"
8,"[[0.19848761811581306, 0.19883483863690965, 0....","[1.0, 0.22846359511182657]"
9,"[[0.505924791560785, 0.5236816069685634, 0.631...","[2.0, 0.04236716852257205]"


In [9]:
def squared_reshape(arr:np.array):
  """ 
    Dynamically reshapes the array to a square. 
    It adds zero to certain locations if the number of assets isn't a perfect square.
  """
  sqr = np.sqrt(arr.size)
  sqr_int = sqr.astype(int)

  if sqr != sqr_int:
    sqr_int += 1
  y = np.resize(arr, (sqr_int, sqr_int))
  return y

In [10]:
def loss_split(frame:pd.DataFrame, y_name="reward", y_hat_name="reward_hat"):
  _y = frame[y_name].to_numpy()
  _y_hat = frame[y_hat_name].to_numpy()
  return _y, _y_hat

In [11]:
def sa_split(frame:pd.DataFrame, state_name:str="state", action_name:str="action",):
  """ Get a state action pair the RL agent. Use to get stacked sequence of decisions and their associated states."""
  X_s = []
  for row in frame.iterrows():
    _states =  np.array(row[1][state_name]).reshape(-1)
    _actions = np.array(row[1][action_name]).reshape(-1)
    _combined = np.hstack((_states, _actions))
    
    X_s.append(_combined)
  return np.array(X_s)

In [12]:
def sarsa_split(frame:pd.DataFrame, state_name:str="state", action_name:str="action", reward_name:str="reward"):
  """Split the sarsa"""
  X_s = []
  Y_s = []
  for row in frame.iterrows():
    _states =  np.array(row[1][state_name]).reshape(-1)
    _actions = np.array(row[1][action_name]).reshape(-1)
    _rewards = np.array(row[1][reward_name])
    _combined = squared_reshape(np.hstack((_states, _actions)))
    Y_s.append(_combined)
    X_s.append(_combined)
    
  return X_s, np.array(Y_s)

In [13]:
def window_stack(a, stepsize=1, width=3):
    n = a.shape[0]
    return np.stack( a[i:1+n+i-width:stepsize] for i in range(0,width))

In [14]:
def experiment_window_stack(a, stepsize=1, width=3):
    n = a.shape[0]
    return np.stack( a[i:1+n+i-width:stepsize] for i in range(0,width))

In [15]:
loss_split(loss_frame)

(array([ 8.55347134,  5.52477176, 49.88209622, 85.4456809 , 39.45965795,
        31.1456847 , 12.9820611 ,  9.39536884, 18.66501552, 54.67223734]),
 array([90.08331803, 72.24394232, 92.0821757 , 92.84594363, 28.38210288,
        22.78069411, 99.6869504 , 18.36269208, 81.87477259, 83.41783461]))

In [18]:
X, y = sarsa_split(sarsa_frame)
X.shape

AttributeError: 'numpy.ndarray' object has no attribute 'dstack'

In [17]:
stacked_x = np.dstack((X[0], X[1]))

NameError: name 'X' is not defined

In [19]:
X[0], X[1]

NameError: name 'X' is not defined

In [20]:
torch.tensor(stacked_x).unsqueeze(0).shape

NameError: name 'stacked_x' is not defined

In [23]:
z = [torch.randn(size=(6, 6)) for x in range(6)]

In [None]:
z