In [2]:
import torch
import torch.nn as nn
import datetime
import json
import torch.nn.functional as F
import torch.nn.init as init
import numpy as np
from torch.profiler import profile, record_function, ProfilerActivity

In [3]:
class PolicyValueModel(nn.Module):
    def __init__(self, count_of_actions, n):
        super(PolicyValueModel, self).__init__()

        # self.conv1 = nn.Conv1d(1, 16, 3)
        # self.conv2 = nn.Conv1d(16, 32, 3)

        self.fc_p1 = nn.Linear(count_of_actions, n)
        self.fc_p2 = nn.Linear(n, count_of_actions)

        self.fc_v1 = nn.Linear(count_of_actions, n)
        self.fc_v2 = nn.Linear(n, 1)

        features_layers = []  # [self.conv1, self.conv2]
        for layer in features_layers:
            torch.nn.init.xavier_normal_(layer.weight)
            torch.nn.init.zeros_(layer.bias)

        output_layers = [self.fc_p1, self.fc_p2, self.fc_v1, self.fc_v2]
        for layer in output_layers:
            torch.nn.init.xavier_normal_(layer.weight)
            torch.nn.init.zeros_(layer.bias)

    def forward(self, x):
        # x = self.conv1(x)
        # x = self.conv2(x)

        # x = x.view(-1, 32)

        x_logit = F.relu(self.fc_p1(x))
        logit = self.fc_p2(x_logit)

        x_value = F.relu(self.fc_v1(x))
        value = self.fc_v2(x_value)

        return logit, value


In [4]:
#%load_ext autoreload
#%autoreload 2

In [10]:
import importlib
from env_parallel import Env
from ppo import Agent

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('device: ', device)
start_date = datetime.datetime.now()

# CONFIG
env_candidates = 315
env_p = 63
env_count = 30
results_path = 'results/'
run_count = 1
# END


dataset_results = json.load(open('data/results.json'))

env = Env(env_p, env_candidates, env_count, f'data/{dataset_results[str(env_candidates)]["short"]}-{env_candidates}', device)

for i in range(run_count):
    net = PolicyValueModel(env_candidates, 64)
    # net = torch.load('models/save.net')

    agent = Agent(net, device=device, lr=0.01, name=f'p_med_{i}', results_path=results_path, epsilon = 0.3, td_steps=env_p, lr_decay=0.8)

    # with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], with_stack=True) as prof:
    agent.train(env=env, count_of_envs=env_count, input_dim=(env_candidates,),
                count_of_iterations=400, count_of_steps=3 * env_p, batch_size=3 * env_p)

    # prof.export_chrome_trace("trace.json")
    # agent.test(env)

print(datetime.datetime.now() - start_date)


device:  cuda
optimizer: Adam
Training is starting
iteration:        1 	epsiode:  150 	score:  -1.0000 	avg score:  -1.0000 	Best score:  -1.0000 Obj:  244839 	Avg obj:   238119 	Best obj: 109786.0
iteration:        3 	epsiode:  300 	score:  -1.0000 	avg score:  -1.0000 	Best score:  -1.0000 Obj:  220085 	Avg obj:   235169 	Best obj: 109786.0
iteration:        4 	epsiode:  450 	score:  -1.0000 	avg score:  -1.0000 	Best score:  -1.0000 Obj:  171131 	Avg obj:   231986 	Best obj: 97463.0
iteration:        6 	epsiode:  600 	score:  -1.0000 	avg score:  -1.0000 	Best score:  -1.0000 Obj:  287729 	Avg obj:   232571 	Best obj: 97463.0
iteration:        8 	epsiode:  750 	score:  -1.0000 	avg score:  -1.0000 	Best score:  -1.0000 Obj:  244990 	Avg obj:   230347 	Best obj: 95802.0
iteration:        9 	epsiode:  900 	score:  -1.0000 	avg score:  -1.0000 	Best score:  -1.0000 Obj:  210208 	Avg obj:   238738 	Best obj: 95802.0
iteration:       11 	epsiode:  1050 	score:  -1.0000 	avg score:  -1.00

KeyboardInterrupt: 

In [None]:
# prof.export_stacks('stacks.txt')
# prof.export_chrome_trace("trace.json")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import json
from pylab import rcParams
rcParams['figure.figsize'] = 15, 10
rcParams.update({'font.size': 18})

dataset_results = json.load(open('data/results.json'))

optimalne = dataset_results[str(env_candidates)][str(env_p)]
plt.title(f'c={env_candidates} p={env_p} env={env_count} Vektor')
plt.axhline(y=optimalne, color='r', label=f'Optimalne riesenie ({optimalne})')
for i in range(run_count):
    data = pd.read_csv(f'results/p_med_{i}.csv')
    d = data['avg_obj']
    plt.plot(data['episode'], d, linewidth=3, label=f'Trenovanie')

    plt.yticks(list(plt.yticks()[0]) + [optimalne])
    
    obj = json.load(open(f'results/p_med_{i}_result.json'))

    plt.scatter(obj['episode'], obj['best_obj'], linewidths=5, label=f'Min ({obj["best_obj"]})')
    plt.annotate(obj['best_obj'], (obj['episode'], obj['best_obj']), fontsize=12)

plt.legend()

KeyError: '8'

In [None]:
p = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0]
np.sum(p)

8

In [None]:
print(prof.key_averages(group_by_stack_n=5).table(sort_by="self_cuda_time_total", row_limit=2))

NameError: name 'prof' is not defined