In [3]:
import csv
import logging
import ast
# make deterministic
import numpy as np
import torch
import torch.nn as nn
from torch.nn import functional as F
import math
from torch.utils.data import Dataset
from collections import deque
import random
import torch
import pickle
import blosc
import argparse
import pandas as pd

from CasualGPT.utils import set_seed
from CasualGPT.GPT_model_returnscore import GPT, GPTConfig
from CasualGPT.GPT_trainer_returnscore import Trainer, TrainerConfig
from CasualGPT.utils import sample

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('--seed', type=int, default=123)
parser.add_argument('--context_length', type=int, default=30)
parser.add_argument('--epochs', type=int, default=5)
parser.add_argument('--model_type', type=str, default='reward_conditioned')
parser.add_argument('--num_steps', type=int, default=500000)
parser.add_argument('--num_buffers', type=int, default=50)
parser.add_argument('--game', type=str, default='Breakout')
parser.add_argument('--batch_size', type=int, default=128)
# 
parser.add_argument('--trajectories_per_buffer', type=int, default=10, help='Number of trajectories to sample from each of the buffers.')
parser.add_argument('--data_dir_prefix', type=str, default='./dqn_replay/')
parser.add_argument('-f')
args = parser.parse_args()

set_seed(args.seed)

class StateActionReturnDataset(Dataset):

    def __init__(self, data, block_size, actions,actions_neg, actions_len, return_step, done_idxs, rtgs, timesteps):        
        self.block_size = block_size
        self.vocab_size = 5010
        # self.vocab_size = actions.shape[0] 
        self.data = data
        self.actions = actions
        self.actions_neg = actions_neg
        self.actions_len = actions_len
        self.return_step = return_step
        self.done_idxs = done_idxs
        self.rtgs = rtgs
        self.timesteps = timesteps
    
    def __len__(self):
        return len(self.data) - self.block_size

    def __getitem__(self, idx):
        block_size = self.block_size // 3
        done_idx = idx + block_size
        for i in self.done_idxs:
            if i > idx and i>block_size: # first done_idx greater than idx
                done_idx = min(int(i), done_idx)
                break
        idx = done_idx - block_size
        # states = torch.tensor(np.array(self.data[idx:done_idx]), dtype=torch.float32).reshape(block_size, -1) # (block_size, 4*84*84)
        # states = states / 255.
        # states = torch.tensor(self.data[idx:done_idx], dtype=torch.long).unsqueeze(1)
        # actions = torch.tensor(self.actions[idx:done_idx], dtype=torch.long).unsqueeze(1) # (block_size, 1)
        states = torch.tensor(self.data[idx:done_idx], dtype=torch.long)
        actions = torch.tensor(self.actions[idx:done_idx], dtype=torch.long)
        actions_neg = torch.tensor(self.actions_neg[idx:done_idx], dtype=torch.long)
        actions_len = torch.tensor(self.actions_len[idx:done_idx], dtype=torch.long)
        return_step = torch.tensor(self.return_step[idx:done_idx], dtype=torch.float32)
        
        rtgs = torch.tensor(self.rtgs[idx:done_idx], dtype=torch.float32).unsqueeze(1)
        timesteps = torch.tensor(self.timesteps[idx:idx+1], dtype=torch.int64).unsqueeze(1)
        return states, actions,actions_neg, actions_len, return_step, rtgs, timesteps

In [3]:
# 4Rec accuracy

# data_load_num
# 小于4893
idx_num=3000


#划分数据集
idx_num_train = int(0.8 * idx_num)
idx_num_test = idx_num-idx_num_train

user_retain = pd.read_csv('./Data/DT_session_4_08_to_5_08_Pure_r2.csv')
done_idx_seq = pd.read_csv('./Data/done_idx_seq.csv')

rtgs=user_retain['rtg'].values
actions_len=user_retain['actions_len'].values
return_step=user_retain['return'].values
timesteps=user_retain['session'].values
done_idxs = done_idx_seq['done_idx'].values
obss = user_retain['obss'].values
actions = user_retain['actions'].values
actions_neg = user_retain['actions'].values

obss = np.array([ast.literal_eval(i) for i in obss])
obss = np.vstack(obss)

actions = np.array([ast.literal_eval(i) for i in actions])
actions = np.vstack(actions)

actions_neg = np.array([ast.literal_eval(i) for i in actions_neg])
actions_neg = np.vstack(actions_neg)

rtgs = np.array([ast.literal_eval(i) for i in rtgs])
rtgs = np.vstack(rtgs)


In [4]:
vocab_size=8000
# actions, obss, vocab_size = re_index(actions, obss)

def timestep_paddle(timesteps_train):
    time_flag_train=0
    timesteps_list_train=list(timesteps_train)
    for i in range(len(timesteps_list_train)):
        if timesteps_list_train[i]==0:
            time_flag_train+=1
            if time_flag_train==2:
                timesteps_list_train.insert(i,timesteps_list_train[i-1]+1)
                break
    timesteps_train=np.array(timesteps_list_train)
    return timesteps_train

#train_dataset
sample_num_train=done_idxs[idx_num_train]
#sample_num_train=800
obss_train=obss[:sample_num_train]
rtgs_train=rtgs[:sample_num_train]
actions_train=actions[:sample_num_train]
actions_neg_train=actions_neg[:sample_num_train]

actions_len_train=actions_len[:sample_num_train]
return_step_train=return_step[:sample_num_train]
timesteps_train=timesteps[:sample_num_train]
done_idxs_train=done_idxs[:idx_num_train+1]
timesteps_train=timestep_paddle(timesteps_train)

train_dataset = StateActionReturnDataset(obss_train, args.context_length*3, actions_train,actions_neg_train, actions_len_train, return_step_train, done_idxs_train, rtgs_train, timesteps_train)

#test_dataset
sample_num_test=done_idxs[idx_num]
#sample_num_test=1000
print('interaction number is:',sample_num_test)
obss_test=obss[sample_num_train:sample_num_test]
rtgs_test=rtgs[sample_num_train:sample_num_test]
actions_test=actions[sample_num_train:sample_num_test]
actions_neg_test=actions_neg[sample_num_train:sample_num_test]
actions_len_test=actions_len[sample_num_train:sample_num_test]
return_step_test=return_step[sample_num_train:sample_num_test]
timesteps_test=timesteps[sample_num_train:sample_num_test]
done_idxs_test=done_idxs[idx_num_train+1:idx_num+1]-sample_num_train
timesteps_test=timestep_paddle(timesteps_test)

test_dataset = StateActionReturnDataset(obss_test, args.context_length*3, actions_test,actions_neg_test, actions_len_test, return_step_test, done_idxs_test, rtgs_test, timesteps_test)

print('item number is:',vocab_size)

interaction number is: 43448
item number is: 8000


In [5]:
mconf = GPTConfig(vocab_size, train_dataset.block_size,
                  n_layer=2, n_head=8, n_embd=128, model_type=args.model_type, max_timestep=29)
model = GPT(mconf)

# initialize a trainer instance and kick off training
epochs = args.epochs

tconf = TrainerConfig(max_epochs=epochs, batch_size=args.batch_size, learning_rate=0.005,
                      lr_decay=False, warmup_tokens=512*20, final_tokens=2*len(train_dataset)*args.context_length*3,
                      num_workers=4, seed=args.seed, model_type=args.model_type, game=args.game, max_timestep=29)

trainer = Trainer(model, train_dataset, test_dataset, tconf)

trainer.train()

epoch 1 iter 270: train loss 0.75376. lr 5.000000e-03: 100%|█| 271/271 [09:05<00


KeyboardInterrupt: 

In [None]:
16.37