In [1]:
import os
import sys

import support_functions

sys.path.append(os.path.join(os.path.abspath(os.getcwd()), "PEGNN"))
import json
import time
import myconfig_kcn as myconfig
import solver_kcn as solver
from datetime import datetime


def make_dir(path):
    try:
        os.mkdir(path)
    except:
        pass


# rebuild the folder missed?
def build_folder_and_clean(path):
    check = os.path.exists(path)
    if check:
        pass
    else:
        os.makedirs(path)


def train(job_id, settings):
    result_sheet = []

    print("Start training...")
    list_total, list_err = solver.training(settings=settings, job_id=job_id)
    print("Start evaluation...")
    best_err, r_squared = solver.evaluate(settings=settings, job_id=job_id)

    result_sheet.append([list_total, list_err, best_err, r_squared])

    # collect wandb result into file
    rtn = {
        "best_err": sum(result_sheet[0][2]) / len(result_sheet[0][2]),
        "r_squared": sum(result_sheet[0][3]) / len(result_sheet[0][3]),
        "list_total_0": result_sheet[0][0],
        "list_err_0": result_sheet[0][1],
    }

    json_dump = json.dumps(rtn)
    with open(settings['agent_dir'] + f'/{job_id}.rtn', 'w') as fresult:
        fresult.write(json_dump)



# RuntimeError: mat1 and mat2 shapes cannot be multiplied (2974x42 and 46x256)
# problem for number of the dataset_size since i change the size into minimal size
# but if this problem occurs in ssh server then means all right
if __name__ == '__main__':
    job_id = '000011'

    print('Init...')

    settings = {
        'agent_id': '00011',
        'agent_dir': './logs',
        'origin_path': './Dataset_res250_reg4c/',

        # debug mode=>data_set
        'debug': True,
        'bp': False,

        # full_batch->batch->accumulation_steps double
        'batch': 16,
        'accumulation_steps': 128 // 16,
        'test_batch': 0,

        'es_mindelta': 0.5,

        # 'num_features_in': 2,
        'num_features_in': 10,

        'num_features_out': 1,
        'emb_hidden_dim': 256,
        
        'k': 20,
        'conv_dim': 256,

        'seed': 1,
        'model': 'PEGNN',
        'fold': 4,
        'holdout': 1,
        'lowest_rank': 1,

        'hp_marker': 'tuned',
        'nn_length': 3,
        'nn_hidden_dim': 32,
        'dropout_rate': 0.1,

        # for transformer
        'd_model': 32,
        'nhead': 2,

        'dim_feedforward': 128,
        'transformer_dropout': 0.1,
        'num_encoder_layers': 2,
        'env_features_in': 11,

        
        'transformer_dec_output': 32,
        'emb_dim': 32,
        
        'epoch': 1,
        'es_endure': 5,
        'nn_lr': 1e-5,
    }

    # build working folder
    dt_string = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    coffer_slot = myconfig.coffer_path + str(job_id) + '/'

    # missed
    make_dir(coffer_slot)
    build_folder_and_clean(coffer_slot)
    settings['coffer_slot'] = coffer_slot
    settings['tgt_op'] = 'mcpm10'

    train(job_id, settings)

Init...
Start training...
{
  "agent_id": "00011",
  "agent_dir": "./logs",
  "origin_path": "./Dataset_res250_reg4c/",
  "debug": true,
  "bp": false,
  "batch": 16,
  "accumulation_steps": 8,
  "test_batch": 0,
  "es_mindelta": 0.5,
  "num_features_in": 10,
  "num_features_out": 1,
  "emb_hidden_dim": 256,
  "k": 20,
  "conv_dim": 256,
  "seed": 1,
  "model": "PEGNN",
  "fold": 4,
  "holdout": 1,
  "lowest_rank": 1,
  "hp_marker": "tuned",
  "nn_length": 3,
  "nn_hidden_dim": 32,
  "dropout_rate": 0.1,
  "d_model": 32,
  "nhead": 2,
  "dim_feedforward": 128,
  "transformer_dropout": 0.1,
  "num_encoder_layers": 2,
  "env_features_in": 11,
  "transformer_dec_output": 32,
  "emb_dim": 32,
  "epoch": 1,
  "es_endure": 5,
  "nn_lr": 1e-05,
  "coffer_slot": "./coffer_kcn/000011/",
  "tgt_op": "mcpm10"
}
Working on CPU
Length of df dict: 200
Length of call list: 6656
Length of df dict: 200
Length of call list: 5632
INTP_Model(
  (conv1): GCNConv(10, 256)
  (conv2): GCNConv(256, 256)
  (fc)

  vmin = self._density_vmin(array)
  vmax = self._density_vmax(array)


Current epoch: 1
total test: 352s: 0.3384432066231966 - real_iter_time: 0.327311754226684573
Test Done
		--------
		Iter: 104, inter_train_loss: 20.966842276975513
		--------

		--------
		test_loss: 1606.034423828125, last best test_loss: 2248.423095703125
		--------

		--------
		r_squared: 0.10984355398288645, MAE: 22.812988
		--------

Current epoch: 2


  vmin = self._density_vmin(array)
  vmax = self._density_vmax(array)


Finished Training
Start evaluation...
Working on CPU
Length of df dict: 200
Length of call list: 1536
		--------
		r_squared: 0.31000856358609985, MAE: 2.3672464
		--------

		--------
		Differ: 4.215063095092773, count: 1536
		--------



  vmin = self._density_vmin(array)
  vmax = self._density_vmax(array)


<Figure size 640x480 with 0 Axes>