In [1]:
import csv
import ast
import logging
# make deterministic
import numpy as np
import torch
import torch.nn as nn
from torch.nn import functional as F
import math
from torch.utils.data import Dataset
from collections import deque
import random
import pickle
import blosc
import argparse
#from create_dataset import create_dataset
import pandas as pd
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from CasualGPT.utils import set_seed
from CasualGPT.GPT_model_accuracy import GPT, GPTConfig
from CasualGPT.GPT_trainer_accuracy import Trainer, TrainerConfig
from CasualGPT.utils import sample

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('--seed', type=int, default=123)
parser.add_argument('--context_length', type=int, default=30)
parser.add_argument('--epochs', type=int, default=5)
parser.add_argument('--model_type', type=str, default='reward_conditioned')
parser.add_argument('--num_steps', type=int, default=500000)
parser.add_argument('--num_buffers', type=int, default=50)
parser.add_argument('--game', type=str, default='Breakout')
parser.add_argument('--batch_size', type=int, default=128)
# 
parser.add_argument('--trajectories_per_buffer', type=int, default=10, help='Number of trajectories to sample from each of the buffers.')
parser.add_argument('--data_dir_prefix', type=str, default='./dqn_replay/')
parser.add_argument('-f')
args = parser.parse_args()

set_seed(args.seed)

class StateActionReturnDataset(Dataset):

    def __init__(self, data, block_size, actions, actions_neg, actions_len, return_step, done_idxs, rtgs, timesteps):        
        self.block_size = block_size
        self.vocab_size = 5010
        # self.vocab_size = actions.shape[0] 
        self.data = data
        self.actions = actions
        self.actions_neg = actions_neg
        self.actions_len = actions_len
        self.return_step = return_step
        self.done_idxs = done_idxs
        self.rtgs = rtgs
        self.timesteps = timesteps
    
    def __len__(self):
        return len(self.data) - self.block_size

    def __getitem__(self, idx):
        block_size = self.block_size // 3
        done_idx = idx + block_size
        for i in self.done_idxs:
            if i > idx and i>block_size: # first done_idx greater than idx
                done_idx = min(int(i), done_idx)
                break
        idx = done_idx - block_size
        # states = torch.tensor(np.array(self.data[idx:done_idx]), dtype=torch.float32).reshape(block_size, -1) # (block_size, 4*84*84)
        # states = states / 255.
        # states = torch.tensor(self.data[idx:done_idx], dtype=torch.long).unsqueeze(1)
        # actions = torch.tensor(self.actions[idx:done_idx], dtype=torch.long).unsqueeze(1) # (block_size, 1)
        states = torch.tensor(self.data[idx:done_idx], dtype=torch.long)
        actions = torch.tensor(self.actions[idx:done_idx], dtype=torch.long)
        actions_neg = torch.tensor(self.actions_neg[idx:done_idx], dtype=torch.long)
        actions_len = torch.tensor(self.actions_len[idx:done_idx], dtype=torch.long)
        return_step = torch.tensor(self.return_step[idx:done_idx], dtype=torch.float32)
        
        rtgs = torch.tensor(self.rtgs[idx:done_idx], dtype=torch.float32).unsqueeze(1)
        timesteps = torch.tensor(self.timesteps[idx:idx+1], dtype=torch.int64).unsqueeze(1)
        return states, actions, actions_neg, actions_len, return_step, rtgs, timesteps


In [3]:
# Load and prepare the dataset
df = pd.read_csv('./Data/df_feature_rtg.csv')
df['rtg'] = df['rtg'].apply(lambda x: eval(x))  # Convert the 'rtg' column to a list
df = df.drop(df.columns[7:], axis = 1)

# Separate features and targets
X = df.drop(columns=['rtg','user_id']).values
y = df['rtg'].tolist()

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert to PyTorch tensors
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32)

In [5]:
# Check if CUDA is available
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


# Define the Dataset
class UserDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

# Create DataLoader instances
train_dataset = UserDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

class RewardNet(nn.Module):
    def __init__(self, input_size):
        super(RewardNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 2)  # Still outputting a 2D vector for weights

    def forward(self, x):
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = nn.functional.softmax(self.fc3(x), dim=1)  # Apply softmax to the output
        return x

model = RewardNet(input_size=X_train.shape[1]).to(device)


# Define the custom loss function
class BalancedRewardLoss(nn.Module):
    def __init__(self, lambda_term, regularization_factor):
        super().__init__()
        self.lambda_term = lambda_term
        self.regularization_factor = regularization_factor

    def forward(self, predictions, targets):
        ws, wl = predictions[:, 0], predictions[:, 1]
        Rs, Rl = targets[:, 0], targets[:, 1]
        
        balanced_metric = ws * Rs + wl * Rl
        penalty = torch.abs(ws * Rs - wl * Rl)
        loss = -balanced_metric + self.lambda_term * penalty
        
        # Regularization to prevent weights from becoming too large
        #regularization = self.regularization_factor * (ws**2 + wl**2)
        
        return torch.mean(loss)

# Initialize the loss function with chosen lambda and regularization values
lambda_term = 0.5
regularization_factor = 0.01
criterion = BalancedRewardLoss(lambda_term, regularization_factor).to(device)


# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    epoch_losses = []

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)  # Transfer to GPU

        optimizer.zero_grad()  # Clear gradients for the next train
        outputs = model(inputs)  # Forward pass: compute the output
        loss = criterion(outputs, labels)  # Compute the loss
        loss.backward()  # Backward pass: compute the gradient
        optimizer.step()  # Update parameters based on gradients
        epoch_losses.append(loss.item())

    # Calculate the average loss for this epoch
    epoch_loss = sum(epoch_losses) / len(epoch_losses)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')


# Save the model if needed
torch.save(model.state_dict(), 'reward_net.pth')

Using device: cuda:1
Epoch 1/100, Loss: -6.2723
Epoch 2/100, Loss: -6.2743
Epoch 3/100, Loss: -6.2784
Epoch 4/100, Loss: -6.2777
Epoch 5/100, Loss: -6.2785
Epoch 6/100, Loss: -6.2773
Epoch 7/100, Loss: -6.2775
Epoch 8/100, Loss: -6.2773
Epoch 9/100, Loss: -6.2786
Epoch 10/100, Loss: -6.2787
Epoch 11/100, Loss: -6.2787
Epoch 12/100, Loss: -6.2779
Epoch 13/100, Loss: -6.2781
Epoch 14/100, Loss: -6.2786
Epoch 15/100, Loss: -6.2786
Epoch 16/100, Loss: -6.2789
Epoch 17/100, Loss: -6.2769
Epoch 18/100, Loss: -6.2774
Epoch 19/100, Loss: -6.2799
Epoch 20/100, Loss: -6.2779
Epoch 21/100, Loss: -6.2773
Epoch 22/100, Loss: -6.2778
Epoch 23/100, Loss: -6.2792
Epoch 24/100, Loss: -6.2800
Epoch 25/100, Loss: -6.2774
Epoch 26/100, Loss: -6.2765
Epoch 27/100, Loss: -6.2805
Epoch 28/100, Loss: -6.2776
Epoch 29/100, Loss: -6.2766
Epoch 30/100, Loss: -6.2783
Epoch 31/100, Loss: -6.2786
Epoch 32/100, Loss: -6.2774
Epoch 33/100, Loss: -6.2766
Epoch 34/100, Loss: -6.2774
Epoch 35/100, Loss: -6.2790
Epoch 36

In [6]:
# Assuming your full dataset tensors are X_tensor and y_tensor
# and that your model is already trained and transferred to the appropriate device.

# Create a Dataset for all users
full_dataset = UserDataset(X_tensor, y_tensor)

# Create a DataLoader for the full dataset
full_loader = DataLoader(full_dataset, batch_size=32, shuffle=False)

# Function to predict weights using the trained model
def predict_weights(model, data_loader):
    model.eval()  # Set the model to evaluation mode
    predicted_weights = []  # Store the predicted weights

    with torch.no_grad():  # No need to track the gradients
        for inputs, _ in data_loader:
            inputs = inputs.to(device)
            weights = model(inputs)
            predicted_weights.extend(weights.cpu().numpy())  # Move the weights to CPU and convert to numpy

    return np.array(predicted_weights)

# Generate weights for all users
all_user_weights = predict_weights(model, full_loader)

# Now `all_user_weights` holds the weights for all the users


In [7]:
import pandas as pd

# Assuming all_user_weights contains the predicted weights for each user
# as a NumPy array or a list of lists.

# Convert the 2D weights into a list of tuples (or 2D vectors)
weights_list = list(map(tuple, all_user_weights))

# Create a DataFrame from the user_id column
weights_df = pd.DataFrame(df['user_id'], columns=['user_id'])

# Add the weights column
weights_df['weights'] = weights_list

# Display the first few rows to verify
print(weights_df.head())

# If you need to save this DataFrame to a CSV file:
#weights_df.to_csv('user_weights.csv', index=False)

   user_id                   weights
0        2  (0.47422937, 0.52577066)
1        3   (0.48761874, 0.5123812)
2        4  (0.47192687, 0.52807313)
3        5     (0.49095994, 0.50904)
4        6  (0.48299673, 0.51700324)


In [8]:
# 4Rec accuracy

# data_load_num
idx_num=3000


idx_num_train = int(0.8 * idx_num)
idx_num_test = idx_num-idx_num_train

user_retain = pd.read_csv('./Data/DT_session_4_08_to_5_08_Pure_r2.csv')
done_idx_seq = pd.read_csv('./Data/done_idx_seq.csv')
user_retain['rtg'] = user_retain['rtg'].apply(ast.literal_eval)

# Merge 'user_retain' with 'weights_df' on 'user_id'
user_retain = user_retain.merge(weights_df, on='user_id')

# Apply the first weight to 'click' and the second weight to 'return'
user_retain['weighted_click'] = user_retain.apply(lambda row: row['click'] * row['weights'][0], axis=1).round(2)
user_retain['weighted_return'] = user_retain.apply(lambda row: row['return'] * row['weights'][1], axis=1).round(2)


user_retain['weighted_rtg'] = user_retain.apply(lambda row: np.multiply(row['rtg'], row['weights']), axis=1)

user_retain['weighted_rtg'] = user_retain['weighted_rtg'].apply(lambda lst: [round(num, 2) for num in lst])

user_retain.to_csv('DT_session_4_08_to_5_08_Pure_r2_weights.csv', index=False)

In [3]:
idx_num=3000

idx_num_train = int(0.8 * idx_num)
idx_num_test = idx_num-idx_num_train

user_retain = pd.read_csv('DT_session_4_08_to_5_08_Pure_r2_weights.csv')
done_idx_seq = pd.read_csv('./Data/done_idx_seq.csv')

rtgs=user_retain['weighted_rtg'].values
actions_len=user_retain['actions_len'].values
return_step=user_retain['weighted_return'].values
timesteps=user_retain['session'].values
done_idxs = done_idx_seq['done_idx'].values
obss = user_retain['obss'].values
actions = user_retain['actions'].values
actions_neg = user_retain['actions'].values

obss = np.array([ast.literal_eval(i) for i in obss])
obss = np.vstack(obss)

actions = np.array([ast.literal_eval(i) for i in actions])
actions = np.vstack(actions)

actions_neg = np.array([ast.literal_eval(i) for i in actions_neg])
actions_neg = np.vstack(actions_neg)

rtgs = np.array([ast.literal_eval(i) for i in rtgs])
rtgs = np.vstack(rtgs)

In [4]:
vocab_size=8000
# actions, obss, vocab_size = re_index(actions, obss)

def timestep_paddle(timesteps_train):
    time_flag_train=0
    timesteps_list_train=list(timesteps_train)
    for i in range(len(timesteps_list_train)):
        if timesteps_list_train[i]==0:
            time_flag_train+=1
            if time_flag_train==2:
                timesteps_list_train.insert(i,timesteps_list_train[i-1]+1)
                break
    timesteps_train=np.array(timesteps_list_train)
    return timesteps_train

#train_dataset
sample_num_train=done_idxs[idx_num_train]
obss_train=obss[:sample_num_train]
rtgs_train=rtgs[:sample_num_train]
actions_train=actions[:sample_num_train]
actions_neg_train=actions_neg[:sample_num_train]

actions_len_train=actions_len[:sample_num_train]
return_step_train=return_step[:sample_num_train]
timesteps_train=timesteps[:sample_num_train]
done_idxs_train=done_idxs[:idx_num_train+1]
timesteps_train=timestep_paddle(timesteps_train)

train_dataset = StateActionReturnDataset(obss_train, args.context_length*3, actions_train,actions_neg_train, actions_len_train, return_step_train, done_idxs_train, rtgs_train, timesteps_train)

#test_dataset
sample_num_test=done_idxs[idx_num]
print('interaction number is:',sample_num_test)
obss_test=obss[sample_num_train:sample_num_test]
rtgs_test=rtgs[sample_num_train:sample_num_test]
actions_test=actions[sample_num_train:sample_num_test]
actions_neg_test=actions_neg[sample_num_train:sample_num_test]
actions_len_test=actions_len[sample_num_train:sample_num_test]
return_step_test=return_step[sample_num_train:sample_num_test]
timesteps_test=timesteps[sample_num_train:sample_num_test]
done_idxs_test=done_idxs[idx_num_train+1:idx_num+1]-sample_num_train
timesteps_test=timestep_paddle(timesteps_test)

test_dataset = StateActionReturnDataset(obss_test, args.context_length*3, actions_test,actions_neg_test, actions_len_test, return_step_test, done_idxs_test, rtgs_test, timesteps_test)

print('item number is:',vocab_size)

interaction number is: 43448
item number is: 8000


In [5]:
mconf = GPTConfig(vocab_size, train_dataset.block_size,
                  n_layer=2, n_head=8, n_embd=128, model_type=args.model_type, max_timestep=29)
model = GPT(mconf)


# initialize a trainer instance and kick off training
epochs = args.epochs


tconf = TrainerConfig(max_epochs=epochs, batch_size=args.batch_size, learning_rate=0.005,
                      lr_decay=False, warmup_tokens=512*20, final_tokens=2*len(train_dataset)*args.context_length*3,
                      num_workers=4, seed=args.seed, model_type=args.model_type, game=args.game, max_timestep=29)

trainer = Trainer(model, train_dataset, test_dataset, tconf)

trainer.train()

epoch 1 iter 270: train loss 0.61464. lr 5.000000e-03: 100%|██████████| 271/271 [42:47<00:00,  9.47s/it]


bleu score is: 0.5687173818881003
rouge score is: 0.4434975067613252
hr is: 0.35307607283215026
NDCG is: 0.3665048447288429
CTR Precision is: 0.37023524657707924
544.5846126079559


epoch 2 iter 270: train loss 0.38501. lr 5.000000e-03: 100%|██████████| 271/271 [45:10<00:00, 10.00s/it]


bleu score is: 0.8152739921399593
rouge score is: 0.7850970223757608
hr is: 0.7229109882099394
NDCG is: 0.7381636943929228
CTR Precision is: 0.7450633081896553
299.0181610584259


epoch 3 iter 270: train loss 0.29641. lr 5.000000e-03: 100%|██████████| 271/271 [45:13<00:00, 10.01s/it]


bleu score is: 0.8839330047751863
rouge score is: 0.8571992266734281
hr is: 0.8258995071627787
NDCG is: 0.8335408262620829
CTR Precision is: 0.8478384888438133
260.7799005508423


epoch 4 iter 270: train loss 0.18537. lr 5.000000e-03: 100%|██████████| 271/271 [45:29<00:00, 10.07s/it]


bleu score is: 0.8992780436528061
rouge score is: 0.8842331706389457
hr is: 0.8489170654792089
NDCG is: 0.8589763387540844
CTR Precision is: 0.8708433696754563
218.8285894393921


epoch 5 iter 270: train loss 0.20670. lr 5.000000e-03: 100%|██████████| 271/271 [33:04<00:00,  7.32s/it]


bleu score is: 0.8921016628634213
rouge score is: 0.8896681140128466
hr is: 0.8381524942951317
NDCG is: 0.8515670134602821
CTR Precision is: 0.8604008858392492
197.68633222579956
