In [1]:
from rl import RL, State
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
import numpy as np
import pickle as pk

In [2]:
agent = RL(epochs=1000, eps=1, delta=0.01, m_ep=2, min_eps=0.1, lr=0.01)

In [22]:
# sorted([(k,v) for k,v in agent.q_matrix.items()], key=lambda x:x[1], reverse=True)

In [3]:
def make_dataset(q_table):
    uniqs = dict()
    for k,v in q_table.items():
        q, a = k[:-1], int(k[-1])
        if q in uniqs:
            vals = uniqs[q]
            vals[a] = v
        else:
            vals = [0]*7
            vals[a] = v
        uniqs[q] = vals
    res = [(k,v) for k,v in uniqs.items()]
    feat = list(map(lambda x:list(x[0]), res))

    def convert(arr):
        return list(map(lambda x:-1 if x == '2' else int(x), arr))
    feat = list(map(lambda x:convert(x), feat))
    
    return np.array(feat), np.array(list(map(lambda x: x[1], res)))

In [37]:
for i in range(40):
    print(f"\nIteration #{i}\n")
    agent.train()


Iteration #0

Epoch 76/1000  -  7.60%

In [None]:
x, y = make_dataset(agent.q_matrix)
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=43)
regr = MLPRegressor(hidden_layer_sizes=(100,512,256), random_state=42, max_iter=10000).fit(x_train, y_train)
# regr.predict(X_test[:2])
print()
print(x.shape, y.shape)
print(regr.score(x_test, y_test))


(102827, 42) (102827, 7)
0.33156656493851294


In [5]:
# with open('rl_store/rl.ai', 'rb') as f:
#     regr = pk.load(f)
# with open('rl_store/qa.ai', 'rb') as f:
#     agent.q_matrix = pk.load(f)
# with open('rl_store/hist.ai', 'rb') as f:
#     agent.history = pk.load(f)

In [10]:
with open('rl_store/rl.ai', 'wb') as f:
    pk.dump(regr, f)
with open('rl_store/qa.ai', 'wb') as f:
    pk.dump(agent.q_matrix, f)
with open('rl_store/hist.ai', 'wb') as f:
    pk.dump(agent.history, f)

In [None]:
res = []
for _ in range(100):
    res += [agent.vs_random(State(), regr)]

In [None]:
import plotly.express as px

def simplify_plot(hist, lr, max_display= 30):
    y = list(map(lambda x: x[0], hist))
    x = range(1,len(hist)+1)

    mul = int(len(hist)/max_display)
    y = [(i+1,v) for i,v in enumerate(y)]
    y = list(filter(lambda x: x[0] % mul == 0,y))
    y = list(map(lambda x: x[1],y))

    x = [(i+1,v) for i,v in enumerate(x)]
    x = list(filter(lambda x: x[0] % mul == 0,x))
    x = list(map(lambda x: x[1],x))

    fig = px.line(x=x, y=y)
    fig.update_layout(title=f'Evaluation history with learning rate {lr}', xaxis_title='Epoch #', yaxis_title='Score', width=1500)
    return fig.show()

In [None]:
simplify_plot(agent.history, agent.lr, max_display=300)