In [99]:
import pandas as pd
from scipy.stats import rankdata
from sklearn.preprocessing import LabelEncoder
import os
import numpy as np

In [100]:
class OPDDataset:
    def __init__(self, data_path):
        self.data = pd.read_csv(os.path.join(data_path, 'women.csv'), index_col=0)
        self.item_context = pd.read_csv(
            os.path.join(data_path, "item_context.csv"), index_col=0
        )
        self.data.sort_values("timestamp", inplace=True)
        self.action = self.data["item_id"].values
        self.position = (rankdata(self.data["position"].values, "dense") - 1).astype(
            int
        )
        self.reward = self.data["click"].values
        self.pscore = self.data["propensity_score"].values

        user_cols = self.data.columns.str.contains("user_feature")
        self.context = pd.get_dummies(
            self.data.loc[:, user_cols], drop_first=True
        ).values
        item_feature_0 = self.item_context["item_feature_0"].to_frame()
        item_feature_cat = self.item_context.drop(
            columns=["item_id", "item_feature_0"], axis=1
        ).apply(LabelEncoder().fit_transform)
        self.action_context = pd.concat(
            objs=[item_feature_cat, item_feature_0], axis=1
        ).values

In [33]:
# dataset = OPDDataset('data/open_bandit_dataset/bts/women/')

In [119]:
np.random.seed(2024)

In [120]:
data_path = 'data/open_bandit_dataset/bts/all/'
data = pd.read_csv(os.path.join(data_path, 'all.csv'), index_col=0)

: 

In [None]:
item_context = pd.read_csv(
    os.path.join(data_path, "item_context.csv"), index_col=0
)
data.sort_values("timestamp", inplace=True)
action = data["item_id"].values
position = (rankdata(data["position"].values, "dense") - 1).astype(
    int
)
reward = data["click"].values
pscore = data["propensity_score"].values

user_cols = data.columns.str.contains("user_feature")
context = pd.get_dummies(
    data.loc[:, user_cols], drop_first=True
).values
item_feature_0 = item_context["item_feature_0"].to_frame()
item_feature_cat = item_context.drop(
    columns=["item_id", "item_feature_0"], axis=1
).apply(LabelEncoder().fit_transform)
action_context = pd.concat(
    objs=[item_feature_cat, item_feature_0], axis=1
).values

In [None]:
action.shape, context.shape, action_context.shape, reward.shape, pscore.shape, position.shape

((12357200,), (12357200, 27), (80, 4), (12357200,), (12357200,), (12357200,))

In [None]:
# action = action[position == 1]
# context = context[position == 1].astype(float)
# reward = reward[position == 1]
# pscore = pscore[position == 1]

In [None]:
reward.sum(), len(reward)

(61208, 12357200)

In [None]:
reward_one = np.nonzero(reward == 1)[0]
reward_zero = np.nonzero(reward == 0)[0]

In [None]:
len(reward_one), len(reward_zero)

(61208, 12295992)

In [None]:
p = 0.8
n_one = len(reward_one)
n_zero = int((1-p) / p * len(reward_one))
reward_zero = np.random.choice(reward_zero, size=n_zero, replace=False)

In [None]:
len(reward_one), len(reward_zero)

(61208, 91811)

In [None]:
data_index = np.random.permutation(np.concatenate([reward_one, reward_zero]))

In [None]:
action = action[data_index]
context = context[data_index]
reward = reward[data_index]
pscore = pscore[data_index]

In [None]:
reward.sum(), len(reward)

(61208, 153019)

In [None]:
N = len(action)
p = [0.8, 0.1, 0.1]
n_train = int(p[0] * N)
n_val = int(p[1] * N)
data = {
    "train": {
        "action": action[:n_train],
        "context": context[:n_train],
        "reward": reward[:n_train],
        "pscore": pscore[:n_train]
    },
    "val": {
        "action": action[n_train:(n_train + n_val)],
        "context": context[n_train:(n_train + n_val)],
        "reward": reward[n_train:(n_train + n_val)],
        "pscore": pscore[n_train:(n_train + n_val)]
    },
    "test": {
        "action": action[n_train + n_val:],
        "context": context[n_train + n_val:],
        "reward": reward[n_train + n_val:],
        "pscore": pscore[n_train + n_val:]
    },
    'items': action_context
}

In [None]:
# for mode in ['train', 'val', 'test']:
#     np.savez_compressed(f'data/opd/{mode}.npz', action=data[mode]['action'], context=data[mode]['context'], 
#                         reward=data[mode]['reward'], pscore=data[mode]['pscore'])
# np.savez_compressed('data/opd/item_features.npz', features=action_context)

In [None]:
# n_samples = {'train': 1000000, 'val': 100000, 'test': 100000}
# sample_idx = {k: np.random.choice(range(len(data[k]['action'])), size=v, replace=False) for k, v in n_samples.items()}

In [117]:
# for mode in ['train', 'val', 'test']:
#     np.savez_compressed(f'data/opd/sample_1M_{mode}.npz', action=data[mode]['action'][sample_idx[mode]], context=data[mode]['context'][sample_idx[mode]], 
#                         reward=data[mode]['reward'][sample_idx[mode]], pscore=data[mode]['pscore'][sample_idx[mode]])

In [None]:
for mode in ['train', 'val', 'test']:
    np.savez_compressed(f'data/opd/bandit_data_sampled_0.8a_{mode}.npz', action=data[mode]['action'], context=data[mode]['context'], 
                        reward=data[mode]['reward'], pscore=data[mode]['pscore'])


In [98]:
np.savez_compressed('data/opd/item_features.npz', features=action_context)