In [None]:
%pip install numpy pandas torch xgboost scikit-learn
%pip install contextualbandits

In [None]:
import sys
import os

# Add the directory containing the Models folder to sys.path
sys.path.append('/Users/niekvandergaag/Documents/Thesis/Code')


# Now you can import from Models
from Models.helperfunctions import *
from Models.ETE import *
from Models.TSnocontext import *
from Datasets.Bibtex import *


In [None]:
import numpy as np
import pandas as pd
import xgboost as xgb

from sklearn.ensemble import RandomForestRegressor
from Models.helperfunctions import *
from Models.ETE import *
from Models.TSnocontext import *
from Models.helperfunctions import *
from Models.PruningWithParent import *

In [None]:
import numpy as re
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.datasets import load_svmlight_file
from contextualbandits.online import PartitionedTS, ExploreFirst, _BasePolicy, _BasePolicyWithExploit, BootstrappedTS
from copy import deepcopy
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier


X, y = parse_data("/Users/niekvandergaag/Documents/Thesis/Code/Datasets/Bibtex/Bibtex_data.txt")
print(X.shape)
print(y.shape)
#X, y = select_top_k(X, y, k=15)
print(X.shape)
print(y.shape)
nchoices = y.shape[1]
base_algorithm = LogisticRegression(solver='lbfgs', warm_start=True, max_iter=500)
base_algorithm2 = DecisionTreeClassifier(max_depth=8, min_samples_leaf=4, random_state=42)
beta_prior = ((3./nchoices, 4), 2) # until there are at least 2 observations of each class, will use this prior
beta_prior_ucb = ((5./nchoices, 4), 2) # UCB gives higher numbers, thus the higher positive prior
beta_prior_ts = ((2./np.log2(nchoices), 4), 2)
contexts = X # these are the features
rewards = y # these are the labels
n_arms = nchoices

In [None]:
# Check sparsity of the feature matrix (X)
sparsity = 1.0 - (np.count_nonzero(X) / float(X.size))
print(f"Feature sparsity of Bibtex dataset: {sparsity * 100:.2f}%")

# Optionally log sparsity to understand its relationship with performance
with open("feature_sparsity_log.txt", "a") as log_file:
    log_file.write(f"Dataset: Bibtex, Sparsity: {sparsity * 100:.2f}%\n")


In [None]:
context_size = X.shape[1] #number of features

partitioned_ts_agent = PartitionedTS(
    nchoices=nchoices,  
    beta_prior=beta_prior_ts,  # beta prior for Thompson Sampling
    random_state=4444,  # Set a random state for reproducibility
    njobs=-1,  # Use all CPU cores for parallel processing
    max_depth=6,  # Optional: You can pass additional decision tree parameters here
    min_samples_leaf=4, # Optional: Minimum samples per leaf
    ccp_alpha=0.00  # No pruning
)

TreeHeuristicWithPruning = PartitionedTSWithPruning(nchoices=nchoices, beta_prior=beta_prior_ts,
                                                    random_state=6666,
                                                    njobs=-1,
                                                    max_depth=6,
                                                    min_samples_leaf=4
                                                    )

# TreeHeuristicWithPruningParent = PartitionedTSWithPruningParent(nchoices=nchoices, beta_prior=beta_prior_ts,
#                                                     random_state=6666,
#                                                     njobs=-1,
#                                                     max_depth=6,
#                                                     min_samples_leaf=4,
#                                                     #max_features="sqrt"
#                                                     )

ts_nocontext_agent = ThompsonSamplingNoContext(nchoices=nchoices, beta_prior=(1,1))

ete_agent = ExploreFirst(deepcopy(base_algorithm2), nchoices = nchoices,
                             explore_rounds=1000, beta_prior=None, random_state = 8888)

bootstrapped_ts_agent = BootstrappedTS(deepcopy(base_algorithm2), nchoices = nchoices,
                                 beta_prior = beta_prior_ts, random_state = 2222)


models = [partitioned_ts_agent, TreeHeuristicWithPruning,  ts_nocontext_agent, ete_agent, bootstrapped_ts_agent]
#TreeHeuristicWithPruningParent,
#xgb_agent, rf_agent, 

In [None]:
print(X.shape)

In [None]:
# These lists will keep track of the rewards obtained by each policy
rewards_part_ts, rewards_TreePruned, rewards_tsnocontext, rewards_ete, rewards_bootstrapped = [list() for i in range(len(models))]
#"""rewards_xgb""", """rewards_rf""", rewards_TreePP, 

lst_rewards = [rewards_part_ts, rewards_TreePruned, rewards_tsnocontext, rewards_ete, rewards_bootstrapped]
#"""rewards_xgb""", """rewards_rf""", rewards_TreePP,

# batch size - algorithms will be refit after N rounds
batch_size = 50
# initial seed - all policies start with the same small random selection of actions/rewards
first_batch = X[:batch_size, :]
np.random.seed(1)
action_chosen = np.random.randint(nchoices, size=batch_size)
rewards_received = y[np.arange(batch_size), action_chosen]

# fitting models for the first time
for model in models:
    if isinstance(model, ThompsonSamplingNoContext):
        model.fit(None, actions=action_chosen, rewards=rewards_received)
    else:
        model.fit(X=first_batch, a=action_chosen, r=rewards_received)

# these lists will keep track of which actions does each policy choose
lst_a_pts, \
lst_a_THP, lst_a_tsnc, lst_a_ete, lst_a_bootstrapped, \
 = [action_chosen.copy() for i in range(len(models))]
#"""lst_a_xgb""", """lst_a_rf""", lst_a_THPP,

lst_actions = [lst_a_pts, lst_a_THP, lst_a_tsnc, lst_a_ete, lst_a_bootstrapped]
#"""lst_a_xgb""", """lst_a_rf""", lst_a_THPP,

# rounds are simulated from the full dataset
def simulate_rounds(model, rewards, actions_hist, X_global, y_global, batch_st, batch_end):
    np.random.seed(batch_st)
    
    #print(X_global[batch_st:batch_end, :].shape )
    ## choosing actions for this batch
    if isinstance(model, ThompsonSamplingNoContext):
        # Ensure to get an array of actions by calling pick_action for each sample in the batch
        actions_this_batch = np.array([model.predict() for _ in range(batch_end - batch_st)])
    else:
        actions_this_batch = model.predict(X_global[batch_st:batch_end, :]).astype('uint8')

    print(f"Actions chosen this batch: {actions_this_batch}")
    # print(f"Max action index: {actions_this_batch.max()}, nchoices: {nchoices}")
    
    # keeping track of the sum of rewards received
    rewards.append(y_global[np.arange(batch_st, batch_end), actions_this_batch].sum())
    
    # adding this batch to the history of selected actions
    new_actions_hist = np.append(actions_hist, actions_this_batch)
    
    # now refitting the algorithms after observing these new rewards
    np.random.seed(batch_st)
    if isinstance(model, ThompsonSamplingNoContext):
        for action, reward in zip(actions_this_batch, y_global[np.arange(batch_st, batch_end), actions_this_batch]):
            model.update(action, reward)
    else:
        model.fit(X_global[:batch_end, :], new_actions_hist, y_global[np.arange(batch_end), new_actions_hist])
    
    return new_actions_hist

# now running all the simulation
for i in range(int(np.floor(X.shape[0] / batch_size))):
    batch_st = (i + 1) * batch_size
    batch_end = (i + 2) * batch_size
    batch_end = np.min([batch_end, X.shape[0]])
    
    for model in range(len(models)):
        lst_actions[model] = simulate_rounds(models[model],
                                             lst_rewards[model],
                                             lst_actions[model],
                                             X, y,
                                             batch_st, batch_end)


In [None]:
import matplotlib.pyplot as plt
from pylab import rcParams
%matplotlib inline

def get_mean_reward(reward_lst, batch_size=batch_size):
    mean_rew=list()
    for r in range(len(reward_lst)):
        mean_rew.append(sum(reward_lst[:r+1]) * 1.0 / ((r+1)*batch_size))
    return mean_rew

rcParams['figure.figsize'] = 20, 10
lwd = 3
cmap = plt.get_cmap('tab20')
colors=plt.cm.tab20(np.linspace(0, 1, 20))

ax = plt.subplot(111)
plt.plot(get_mean_reward(rewards_part_ts), label="Partitioned Thompson Sampling",linewidth=lwd,color=colors[0])
# plt.plot(get_mean_reward(rewards_part_ts1), label="Partitioned Thompson Sampling1",linewidth=lwd,color=colors[14])
# plt.plot(get_mean_reward(rewards_part_ts2), label="Partitioned Thompson Sampling2",linewidth=lwd,color=colors[16])
plt.plot(get_mean_reward(rewards_ete), label="Explore-Then-Exploit",linewidth=lwd,color=colors[6])
plt.plot(get_mean_reward(rewards_TreePruned), label="TreeHeuristicWithPruning",linewidth=lwd, color=colors[2])
#plt.plot(get_mean_reward(rewards_TreePP), label="TreeHeuristicWithPruningParent", linewidth=lwd,color=colors[9])
# plt.plot(get_mean_reward(rewards_rf), label="Bernoulli RF Agent",linewidth=lwd,color=colors[4])
# plt.plot(get_mean_reward(rewards_xgb), label="XGBoost Agent",linewidth=lwd,color=colors[6])
plt.plot(get_mean_reward(rewards_bootstrapped), label="Bootstrapped Thompson Sampling",linewidth=lwd,color=colors[8])
plt.plot(get_mean_reward(rewards_tsnocontext), label="Thompson Sampling No Context",linewidth=lwd,color=colors[4])

# legend
box = ax.get_position()
ax.set_position([box.x0, box.y0 + box.height * 0.1,
                 box.width, box.height * 1.25])
ax.legend(loc='upper center', bbox_to_anchor=(0.15, 1.0),
          fancybox=True, ncol=1, prop={'size':15})


plt.tick_params(axis='both', which='major', labelsize=20)
plt.xticks([i*20 for i in range(8)], [i*1000 for i in range(8)])


plt.xlabel('Rounds (models updated every batch)', size=25)
plt.ylabel('Cumulative Mean Reward', size=25)
plt.title('Comparison of Contextual Bandit Policies\nBibtex Dataset\n(159 categories, 1836 attributes)',size=25)
plt.grid()
plt.show()

In [None]:
import re
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
from scipy.sparse import dok_matrix, csr_matrix

def parse_custom_svm(file_path, num_features=None):
    """
    Custom parser for .svm files with multilabel format.
    Parses features and multilabels explicitly and uses MultiLabelBinarizer for labels.
    """
    print(f"Loading dataset from {file_path}...")
    features = []
    labels = []
    
    with open(file_path, 'r') as file:
        for line in file:
            line = line.strip()
            if not line:
                continue

            # Separate labels from features by splitting the line at the first space
            parts = line.split(" ", 1)
            if len(parts) < 2:
                print(f"Warning: Skipping malformed line: {line}")
                continue

            label_part, feature_part = parts
            
            # Process labels as multilabels
            label_indices = [int(label) for label in label_part.split(",") if label.isdigit()]
            labels.append(label_indices)

            # Process features into a sparse dictionary format
            feature_dict = {}
            for feature in feature_part.split():
                try:
                    index, value = feature.split(":")
                    feature_dict[int(index) - 1] = float(value)  # Indices in SVM format are 1-based
                except ValueError:
                    print(f"Warning: Skipping malformed feature '{feature}' in line: {line}")
                    continue

            features.append(feature_dict)

    # Determine the number of features, if not specified
    if num_features is None:
        num_features = max(max(f.keys()) for f in features if f) + 1

    # Convert features to a sparse matrix
    X = dok_matrix((len(features), num_features), dtype=np.float32)
    for i, feature_dict in enumerate(features):
        for index, value in feature_dict.items():
            X[i, index] = value

    # Convert to CSR for efficient computation
    X = X.tocsr()

    # Binarize labels with MultiLabelBinarizer to handle multilabel outputs consistently
    mlb = MultiLabelBinarizer()
    y = mlb.fit_transform(labels)

    print(f"Dataset loaded with shape: X={X.shape}, y={y.shape}")
    return X, y

# Define paths and feature counts for each dataset (optional)
dataset_paths = {
    'Mediamill': ('/Users/niekvandergaag/Documents/Thesis/Code/Datasets/mediamill/mediamill.svm', 120),
    'Delicious': ('/Users/niekvandergaag/Documents/Thesis/Code/Datasets/delicious/delicious.svm', None),
    'Bookmarks': ('/Users/niekvandergaag/Documents/Thesis/Code/Datasets/bookmarks/bookmarks.svm', None),
    'EUR-Lex': ('/Users/niekvandergaag/Documents/Thesis/Code/Datasets/eurlex-directory-codes/eurlexdc.svm', None)
}

# Load each dataset
datasets = {}
for name, (path, num_features) in dataset_paths.items():
    try:
        X, y = parse_custom_svm(path, num_features)
        datasets[name] = (X, y)
        print(f"{name} loaded with shape: X={X.shape}, y={y.shape}")
    except Exception as e:
        print(f"Error loading {name}: {e}")


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from contextualbandits.online import PartitionedTS, ExploreFirst, BootstrappedTS
from copy import deepcopy
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

# Define the models
def initialize_models(nchoices):
    #base_algorithm = LogisticRegression(solver='lbfgs', warm_start=True, max_iter=500)
    base_algorithm2 = DecisionTreeClassifier(max_depth=8, min_samples_leaf=8)
    beta_prior_ts = ((2./np.log2(nchoices), 4), 2)
    
    models = [
        PartitionedTS(
            nchoices=nchoices,
            beta_prior=beta_prior_ts,
            random_state=4444,
            njobs=-1,
            max_depth=8,
            min_samples_leaf=8,
            ccp_alpha=0.00
        ),
        PartitionedTSWithPruning(
            nchoices=nchoices,
            beta_prior=beta_prior_ts,
            random_state=6666,
            njobs=-1,
            max_depth=8,
            min_samples_leaf=8,
            # ccp_alpha=0.001
        ),
        # PartitionedTSWithPruningParent(
        #     nchoices=nchoices,
        #     beta_prior=beta_prior_ts,
        #     random_state=8888,
        #     njobs=-1,
        #     #max_depth=8,
        #     min_samples_leaf=8,
        #     # ccp_alpha=0.001
        # ),
        ThompsonSamplingNoContext(nchoices=nchoices, beta_prior=(1, 1)),

        ExploreFirst(
            deepcopy(base_algorithm2),
            nchoices=nchoices,
            explore_rounds=1000,
            beta_prior=None,
            random_state=8888
        ),
        BootstrappedTS(
            deepcopy(base_algorithm2),
            nchoices=nchoices,
            beta_prior=beta_prior_ts,
            random_state=2222
        )
    ]
    
    return models

# Training simulation
def simulate_training(models, X, y, batch_size=50):
    nchoices = y.shape[1]
    first_batch = X[:batch_size, :]
    np.random.seed(1)
    action_chosen = np.random.randint(nchoices, size=batch_size)
    rewards_received = y[np.arange(batch_size), action_chosen]
    
    print(f"Initializing models for the first batch of size {batch_size}...")
    
    # Fit models for the first time
    for model_idx, model in enumerate(models):
        if isinstance(model, ThompsonSamplingNoContext):
            model.fit(None, actions=action_chosen, rewards=rewards_received)
        else:
            model.fit(X=first_batch, a=action_chosen, r=rewards_received)
        print(f"Model {model_idx + 1}/{len(models)} initialized.")
    
    # Initialize lists to track rewards and actions
    lst_rewards = [[] for _ in models]
    lst_actions = [action_chosen.copy() for _ in models]
    
    print("Starting batch simulations...")
    
    # Simulate rounds
    total_batches = int(np.floor(X.shape[0] / batch_size)) - 1
    for i in range(total_batches):
        batch_st = (i + 1) * batch_size
        batch_end = (i + 2) * batch_size
        batch_end = np.min([batch_end, X.shape[0]])
        
        for model_idx, model in enumerate(models):
            np.random.seed(batch_st)
            
            if isinstance(model, ThompsonSamplingNoContext):
                actions_this_batch = np.array([model.predict() for _ in range(batch_end - batch_st)])
            else:
                actions_this_batch = model.predict(X[batch_st:batch_end, :]).astype('uint8')
            
            # Collect rewards
            lst_rewards[model_idx].append(y[np.arange(batch_st, batch_end), actions_this_batch].sum())
            
            # Update actions
            lst_actions[model_idx] = np.append(lst_actions[model_idx], actions_this_batch)
            
            # Update models
            if isinstance(model, ThompsonSamplingNoContext):
                for action, reward in zip(actions_this_batch, y[np.arange(batch_st, batch_end), actions_this_batch]):
                    model.update(action, reward)
            else:
                model.fit(X[:batch_end, :], lst_actions[model_idx], y[np.arange(batch_end), lst_actions[model_idx]])
        
        if (i + 1) % 10 == 0 or (i + 1) == total_batches:
            print(f"Completed batch {i + 1}/{total_batches}.")
    
    print("Training completed.")
    return lst_rewards

algorithm_colors = {
    "Partitioned Thompson Sampling": "#1f77b4",  # Blue
    "TreeHeuristicWithPruning": "#ff7f0e",       # Orange
    "Thompson Sampling No Context": "#2ca02c",   # Green
    "Explore-Then-Exploit": "#d62728",           # Red
    "Bootstrapped Thompson Sampling": "#9467bd" # Purple
}
# Plotting results
def plot_results(reward_lists, labels, title):
    def get_mean_reward(reward_lst, batch_size):
        mean_rew = [sum(reward_lst[:r + 1]) / ((r + 1) * batch_size) for r in range(len(reward_lst))]
        return mean_rew
    
    batch_size = 50
    cmap = plt.get_cmap('tab20')
    colors = plt.cm.tab20(np.linspace(0, 1, len(labels)))
    
    plt.figure(figsize=(20, 10))
    for idx, rewards in enumerate(reward_lists):
        label = labels[idx]
        color = algorithm_colors.get(label, f"C{idx}")
        plt.plot(get_mean_reward(rewards, batch_size), label=label, linewidth=3, color=color)
    
    plt.tick_params(axis='both', which='major', labelsize=20)
    

    plt.xlabel('Rounds (models updated every batch)', size=20)
    plt.ylabel('Cumulative Mean Reward', size=20)
    plt.title(title, size=20)
    
    plt.legend(loc='best', prop={'size': 14})
    plt.grid()
    plt.show()

# Train models and plot results for each dataset
for dataset_name, (X, y) in datasets.items():
    print(f"\nTraining on {dataset_name} dataset...")
    nchoices = y.shape[1]
    models = initialize_models(nchoices)
    rewards = simulate_training(models, X, y)
    
    model_labels = [
        "Partitioned Thompson Sampling",
        "TreeHeuristicWithPruning",
        "Thompson Sampling No Context",
        "Explore-Then-Exploit",
        "Bootstrapped Thompson Sampling"
    ]
    
    plot_title = f"Comparison of Contextual Bandit Policies\n{dataset_name} Dataset\n({y.shape[1]} categories, {X.shape[1]} attributes)"
    plot_results(rewards, model_labels, plot_title)



In [None]:
%pip install pytest

In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
import openml
from contextualbandits.evaluation import _check_random_state
from contextualbandits.online import PartitionedTS, BootstrappedTS
from contextualbandits.online import ExploreFirst
from sklearn.preprocessing import LabelBinarizer
from sklearn.utils import resample, shuffle
from tqdm import trange
from sklearn.impute import SimpleImputer
import sys
import os

# Add the directory containing the Models folder to sys.path
sys.path.append('/Users/niekvandergaag/Documents/Thesis/Code')
import pytest
import pandas as pd
from Models.helperfunctions import *
from Models.TSnocontext import *
from Models.PruningWithParent import *

# Create the plots directory if it doesn't exist
if not os.path.exists('plots'):
    os.makedirs('plots')

openml.config.apikey = "" #give valid APIkey
openml.config.cache_directory = "omlcache"

DATASET_IDS = [
    2,5,7,
    9,
    10,
    #11,
    23,
    26,
    30,
    34,
    35,
    39,
    41,
    42,
    46,
    57,
    62,
    #70,71,75,76,116,117,119,125,129,133,134,136,138,144,148,149,150,155,156,157,158,159,160, these are all synthetic data
    163,171,180,181,182,183,184,185,
    186,
    187,
    188,
    244,
    245,
    248,
    249,
    253,
    255,
    259,
    263,
    265,
    266,
    272,
    275,
    276,
    277,
    278,
    279,
    285,
    313,
    327,
    328,
    329,
    338,
    340,
    342,
    343,
    372,
    373,
    374,
    375,
    378,
    379,
    380,
    381,
    382,
    383,
    384,
    385,
    386,
    387,
    388,
    389,
    390,
    391,
    392,
    393,
    394,
    395,
    396,
    397,
    398,
    399,
    400,
    401,
    443,
    449,
    452,
    453,
    454,
    455,
    457,
    458,
    460,
    462,
    469,
    473,
    474,
    477,
    480,
    488,
    679,
    694,
    952,
    1044,
    1047,
    1057,
    1077,
    1078,
    1079,
    1080,
    1081,
    1082,
    1083,
    1084,
    1086,
    1087,
    1088,
    1100,
    1102,
    1106,
    # 1109,
    # 1110,
    # 1113,
    # 1117,
    # 1177,
    # 1179,
    # 1183,
    # 1185,
    # 1186,
    # 1214,
    # 1222,
    # 1226,
    # 1351,
    # 1352,
    # 1353,
    # 1354,
    # 1355,
    # 1356,
    # 1357,
    # 1358,
    # 1359,
    # 1360,
    # 1361,
    # 1362,
    # 1363,
    # 1364,
    # 1365,
    # 1366,
    # 1367,
    # 1368,
    # 1387,
    # 1388,
    # 1389,
    # 1390,
    # 1391,
    # 1392,
    # 1393,
    # 1394,
    # 1395,
    # 1396,
    # 1397,
    # 1398,
    # 1399,
    # 1400,
    # 1401,
    # 1402,
    # 1403,
    # 1404,
    # 1405,
    # 1406,
    # 1407,
    # 1408,
    # 1409,
    # 1410,
    # 1459,
    # 1465,
    # 1466,
    # 1472,
    # 1475,
    # 1476,
    # 1477,
    # 1478,
    # 1481,
    # 1482,
    # 1483,
    # 1497,
    # 1508,
    # 1509,
    # 1512,
    # 1513,
    # 1515,
    # 1516,
    # 1517,
    # 1518,
    # 1519,
    # 1520,
    # 1523,
    # 1525,
    # 1526,
    # 1527,
    # 1528,
    # 1529,
    # 1530,
    # 1531,
    # 1532,
    # 1533,
    # 1534,
    # 1535,
    # 1536,
    # 1537,
    # 1538,
    # 1539,
    # 1540,
    # 1541,
    # 1542,
    # 1543,
    # 1544,
    # 1545,
    # 1546,
    # 1548,
    # 1549,
    # 1551,
    # 1552,
    # 1554,
    # 1555,
    # 1559,
    # 1560,
    # 1565,
    # 1567,
    # 1568,
    # 1569,
    # 1596,
    # 4340,
    # 4538,
    # 4541,
    # 4552,
]


def get_multiclass_dataset(dataset_id):
    """
    Retrieve a dataset and transform it so it can be used with the contextual bandit algorithms.
    Handles missing values and categorical data, including boolean data.
    """
    try:
        dataset = openml.datasets.get_dataset(
            dataset_id=dataset_id,
            download_data=False,
            download_qualities=False,
            download_features_meta_data=False,
        )
        print(f"\nProcessing {dataset.name}")
        X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)
    except Exception as e:
        print(f"Failed to process dataset {dataset_id}: {e}")
        return None, None

    # Separate numeric and categorical features
    X_numeric = X.select_dtypes(include=[np.number])
    X_categorical = X.select_dtypes(exclude=[np.number])

    # Handle boolean columns (convert them to object/string type)
    bool_columns = X_categorical.select_dtypes(include=[bool]).columns
    X_categorical[bool_columns] = X_categorical[bool_columns].astype(str)

    # Impute missing values for numeric features (if they exist)
    if not X_numeric.empty:
        imputer_numeric = SimpleImputer(strategy='mean')
        X_numeric_imputed = imputer_numeric.fit_transform(X_numeric)
    else:
        X_numeric_imputed = np.empty((X.shape[0], 0))  # No numeric data, create an empty array with correct row shape

    # Impute missing values for categorical features (if they exist)
    if not X_categorical.empty:
        imputer_categorical = SimpleImputer(strategy='most_frequent')
        X_categorical_imputed = imputer_categorical.fit_transform(X_categorical)

        # Ensure that the DataFrame has the right number of columns
        if X_categorical_imputed.shape[1] != len(X_categorical.columns):
            print(f"Warning: Shape mismatch after imputation. Adjusting column count.")
            X_categorical_imputed_df = pd.DataFrame(X_categorical_imputed, columns=X_categorical.columns[:X_categorical_imputed.shape[1]])
        else:
            X_categorical_imputed_df = pd.DataFrame(X_categorical_imputed, columns=X_categorical.columns)

        # OneHotEncode categorical features
        X_categorical_encoded = pd.get_dummies(X_categorical_imputed_df).to_numpy()
    else:
        X_categorical_encoded = np.empty((X.shape[0], 0))  # No categorical data, create an empty array with correct row shape

    # Ensure both arrays are 2D before concatenating
    if X_numeric_imputed.ndim == 1:
        X_numeric_imputed = X_numeric_imputed.reshape(-1, 1)
    if X_categorical_encoded.ndim == 1:
        X_categorical_encoded = X_categorical_encoded.reshape(-1, 1)

    # If we still encounter a dimensional mismatch, skip this dataset
    try:
        # Concatenate numeric and encoded categorical features
        X_imputed = np.hstack([X_numeric_imputed, X_categorical_encoded])
    except ValueError as e:
        print(f"Skipping dataset {dataset.name} due to dimension mismatch: {e}")
        return None, None

    # Ensure that all target values are strings to avoid issues during label encoding
    y = y.astype(str)


    scaler = StandardScaler()
    X_imputed = scaler.fit_transform(X_imputed)
    # Encode target variable
    lb = LabelBinarizer()

    try:
        y = lb.fit_transform(y)
    except Exception as e:
        print(f"Skipping dataset {dataset.name} due to label transformation error: {e}")
        return None, None

    # If the target has only one class, create a binary classification target
    if y.shape[1] == 1:
        y = np.concatenate([1 - y, y], axis=1)

    return shuffle(X_imputed, y)



def evaluateFullyLabeled(
    policy, X, y_onehot, online=False, shuffle=True, update_freq=50, random_state=1
):
    """
    Evaluates a policy on fully-labeled data

    Parameters
    ----------
    policy : obj
        Policy to be evaluated (already fitted to data). Must have a 'predict' method.
        If it is an online policy, it must also have a 'fit' method.
    X : array (n_samples, n_features)
        Covariates for each observation.
    y_onehot : array (n_samples, n_arms)
        Labels (zero or one) for each class for each observation.
    online : bool
        Whether the algorithm should be fit to batches of data with a 'partial_fit' method,
        or to all historical data each time.
    shuffle : bool
        Whether to shuffle the data (X and y_onehot) before passing through it.
        Be aware that data is shuffled in-place.
    update_freq : int
        Batch size - how many observations to predict before refitting the model.
    random_state : int, None, RandomState, or Generator
        Either an integer which will be used as seed for initializing a
        ``Generator`` object for random number generation, a ``RandomState``
        object (from NumPy) from which to draw an integer, or a ``Generator``
        object (from NumPy), which will be used directly.
        This is used when shuffling and when selecting actions at random for
        the first batch.

    Returns
    -------
    mean_rew : array (n_samples,)
        Mean reward obtained at each batch.
    """
    if type(X).__name__ == "DataFrame":
        X = X.as_matrix()
    if type(y_onehot).__name__ == "DataFrame":
        y_onehot = y_onehot.as_matrix()

    assert type(X).__name__ == "ndarray"
    assert type(y_onehot).__name__ == "ndarray"
    assert isinstance(online, bool)
    assert isinstance(shuffle, bool)
    assert isinstance(update_freq, int)
    assert X.shape[0] > update_freq
    assert X.shape[0] == y_onehot.shape[0]
    assert X.shape[0] > 0

    rs = _check_random_state(random_state)

    if shuffle:
        new_order = np.arange(X.shape[0])
        rs.shuffle(new_order)
        X = X[new_order, :]
        y_onehot = y_onehot[new_order, :]

    rewards_per_turn = list()
    history_actions = np.array([], dtype=int)
    history_propensities = np.array([], dtype=np.float64)
    n_choices = y_onehot.shape[1]

    ## initial seed
    batch_features = X[:update_freq, :]
    batch_actions = rs.integers(y_onehot.shape[1], size=update_freq)
    batch_rewards = y_onehot[np.arange(update_freq), batch_actions]
    batch_propensities = np.full(update_freq, 1 / n_choices)

    if online:
        policy.partial_fit(batch_features, batch_actions, batch_rewards)
    else:
        policy.fit(batch_features, batch_actions, batch_rewards)

    history_actions = np.append(history_actions, batch_actions)

    ## running the loop
    for i in range(1, int(np.floor(X.shape[0] / update_freq))):
        st = (i) * update_freq
        end = np.min([(i + 1) * update_freq, X.shape[0]])  # Ensure we don't go beyond the dataset size
        # end = (i + 1) * update_freq
        # end = np.min([end, X.shape[0]])

        batch_features = X[st:end, :]
        batch_actions = policy.predict(batch_features)
        # Check for correct shape before indexing
        assert len(batch_actions) == (end - st), f"Shape mismatch: {len(batch_actions)} vs {(end - st)}"

        batch_rewards = y_onehot[np.arange(st, end), batch_actions]

        # print(f"Batch features shape: {batch_features.shape}")
        # print(f"Batch actions shape: {batch_actions.shape}")
        # print(f"y_onehot shape: {y_onehot.shape}")
        #print(f"st: {st}, end: {end}")


        rewards_per_turn.append(batch_rewards.sum())

        # break if we've reached the end of the data'
        # TODO: commit this change upstream
        if end == X.shape[0]:
            # stop before fitting the models again
            break

        if online:
            policy.partial_fit(batch_features, batch_actions, batch_rewards)
        else:
            history_actions = np.append(history_actions, batch_actions)
            policy.fit(
                X[:end, :], history_actions, y_onehot[np.arange(end), history_actions]
            )

    ## outputting results
    def get_mean_reward(reward_lst, batch_size):
        mean_rew = list()
        for r in range(len(reward_lst)):
            mean_rew.append(sum(reward_lst[: r + 1]) / ((r + 1) * batch_size))
        return mean_rew

    def get_cumulative_reward(reward_lst):
        cum_rew = list()
        for r in range(len(reward_lst)):
            cum_rew.append(sum(reward_lst[: r + 1]))
        return cum_rew

    return np.array(get_mean_reward(rewards_per_turn, update_freq))


def plot_rewards(mean_rewards, ax=None, label=None, std=False):
    """
    Plot the cumulative rewards
    :param mean_rewards: the mean rewards
    :param ax: the axis to plot on
    :param label: the label for the plot
    :param std: whether to plot the standard deviation
    """
    assert ax is not None
    cumulative_rewards = np.cumsum(mean_rewards, axis=1)
    ax.plot(np.mean(cumulative_rewards, axis=0), label=label)
    if std:
        ax.fill_between(
            np.arange(cumulative_rewards.shape[1]),
            np.mean(cumulative_rewards, axis=0) - np.std(cumulative_rewards, axis=0),
            np.mean(cumulative_rewards, axis=0) + np.std(cumulative_rewards, axis=0),
            alpha=0.1,
        )

def select_top_k(X, y, k=10):
    """
    Select the top-k most frequent labels.
    """
    label_counts = y.sum(axis=0)
    top_k_labels = np.argsort(label_counts)[::-1][:k]
    y = y[:, top_k_labels]
    nonzero_indices = ~np.all(y == 0, axis=1)
    return X[nonzero_indices], y[nonzero_indices]


def reduce_success_rate(X, y, rate=0.1):
    """
    Reduce the success rates of the arms
    """
    n_negative_samples = int(X.shape[0] * (1 - rate) / rate)
    X_zero = resample(X, n_samples=n_negative_samples, random_state=0)
    y_zero = np.zeros((n_negative_samples, y.shape[1]))

    return shuffle(np.vstack([X, X_zero]), np.vstack([y, y_zero]))


def test_reduce_success_rate():
    X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]])
    y = np.array([[1, 0], [0, 1], [1, 0], [0, 1], [1, 0]])
    X, y = reduce_success_rate(X, y, 0.01)
    assert y.sum() == 5
    assert X.shape[0] == 500
    assert y.shape[0] == 500


def evaluate_policies(policies, X, y, runs=5, update_freq=50):
    fig, ax = plt.subplots(1, 1, figsize=(10, 10))

    for policy_class, policy_kwargs in policies:
        mean_rewards = []
        for random_state in trange(1, runs + 1):
            policy = policy_class(
                nchoices=y.shape[1], **policy_kwargs, random_state=random_state
            )
            mean_reward = evaluateFullyLabeled(
                policy,
                X,
                y,
                online=False,
                shuffle=True,
                update_freq=update_freq,
                random_state=random_state,
            )
            mean_rewards.append(mean_reward)

        plot_rewards(mean_rewards, ax=ax, label=policy_class.__name__, std=True)

    ax.legend()
    return fig


if __name__ == "__main__":
    for dataset_id in DATASET_IDS:
        # retrieve the covertype dataset
        X, y = get_multiclass_dataset(dataset_id)
        if X is None or y is None:
            print(f"Skipping dataset {dataset_id} due to retrieval error.")
            continue

        # select the top 5 arms
        X, y = select_top_k(X, y, 5)

        # reduce the success rate of the arms
        X, y = reduce_success_rate(X, y, 0.1)

        print(
            f"Conversion rates for the {y.shape[1]} actions: {np.round(np.mean(y, axis=0), 4)}"
        )

        # reduce the size of the dataset
        if len(y) > 10_000:
            X, y = shuffle(X, y, n_samples=10_000, random_state=0)

        fig = evaluate_policies(
            [
                # (PartitionedTSWithPruningParent, {"min_samples_leaf": 10}),
                (PartitionedTSWithPruning, {"min_samples_leaf": 10}),
                (PartitionedTS, {"min_samples_leaf": 10}),
                (ThompsonSamplingNoContext, {}),
            ],
            X,
            y,
            runs=5,
            update_freq=100
        )
        
        # save the figure
        fig.savefig(f"plots/{dataset_id}.png")

In [None]:
import os
import numpy as np
import pandas as pd

def evaluate_policies_and_save(policies, X, y, dataset_id, runs=5, update_freq=50):
    fig, ax = plt.subplots(1, 1, figsize=(10, 10))
    all_mean_rewards = []

    for policy_class, policy_kwargs in policies:
        mean_rewards = []
        for random_state in trange(1, runs + 1):
            policy = policy_class(
                nchoices=y.shape[1], **policy_kwargs, random_state=random_state
            )
            mean_reward = evaluateFullyLabeled(
                policy,
                X,
                y,
                online=False,
                shuffle=True,
                update_freq=update_freq,
                random_state=random_state,
            )
            mean_rewards.append(mean_reward)

        # Save mean rewards for the policy and dataset
        policy_name = policy_class.__name__
        np.savetxt(f"plots/{dataset_id}_{policy_name}_mean_rewards.csv", mean_rewards, delimiter=",")

        all_mean_rewards.append(mean_rewards)
        plot_rewards(mean_rewards, ax=ax, label=policy_class.__name__, std=True)

    ax.legend()
    return fig, all_mean_rewards

# Example usage in main loop
if __name__ == "__main__":
    for dataset_id in DATASET_IDS:
        # retrieve the covertype dataset
        X, y = get_multiclass_dataset(dataset_id)
        if X is None or y is None:
            print(f"Skipping dataset {dataset_id} due to retrieval error.")
            continue

        # select the top 5 arms
        X, y = select_top_k(X, y, 5)

        # reduce the success rate of the arms
        X, y = reduce_success_rate(X, y, 0.1)

        print(
            f"Conversion rates for the {y.shape[1]} actions: {np.round(np.mean(y, axis=0), 4)}"
        )

        # reduce the size of the dataset
        if len(y) > 10_000:
            X, y = shuffle(X, y, n_samples=10_000, random_state=0)

        fig, _ = evaluate_policies_and_save(
            [
                # (PartitionedTSWithPruningParent, {"min_samples_leaf": 10}),
                (PartitionedTSWithPruning, {"min_samples_leaf": 10}),
                (PartitionedTS, {"min_samples_leaf": 10}),
                (ThompsonSamplingNoContext, {}),
            ],
            X,
            y,
            dataset_id=dataset_id,
            runs=5,
            update_freq=100
        )

        # save the figure
        fig.savefig(f"plots/{dataset_id}.png")


In [None]:

import os
import numpy as np
import matplotlib.pyplot as plt

def load_rewards(dataset_ids, policy_name):
    """
    Load rewards from CSV files for each dataset and policy.
    Handle different array lengths by padding with NaNs or truncating.
    """
    all_rewards = []
    max_length = 0

    # First, find the maximum length of the rewards arrays
    for dataset_id in dataset_ids:
        filename = f"plots/{dataset_id}_{policy_name}_mean_rewards.csv"
        if os.path.exists(filename):
            rewards = np.loadtxt(filename, delimiter=",")
            if rewards.size > 0:  # Only process non-empty arrays
                if rewards.ndim == 1:
                    rewards = rewards.reshape(1, -1)  # Convert 1D arrays to 2D
                max_length = max(max_length, rewards.shape[1])  # Track max length
        else:
            print(f"Warning: File {filename} not found.")

    # Load the rewards and pad/truncate them to the same length
    for dataset_id in dataset_ids:
        filename = f"plots/{dataset_id}_{policy_name}_mean_rewards.csv"
        if os.path.exists(filename):
            rewards = np.loadtxt(filename, delimiter=",")
            if rewards.size > 0:  # Only process non-empty arrays
                if rewards.ndim == 1:
                    rewards = rewards.reshape(1, -1)  # Convert 1D arrays to 2D
                if rewards.shape[1] < max_length:
                    # Pad with NaNs to make the lengths equal
                    pad_width = max_length - rewards.shape[1]
                    rewards = np.pad(rewards, ((0, 0), (0, pad_width)), constant_values=np.nan)
                elif rewards.shape[1] > max_length:
                    # Truncate to the maximum length
                    rewards = rewards[:, :max_length]

                all_rewards.append(rewards)
            else:
                print(f"Skipping {filename} because it's empty or invalid.")
        else:
            print(f"Warning: File {filename} not found.")

    if len(all_rewards) == 0:
        print(f"No valid rewards found for policy {policy_name}.")
        return []

    print(f"Loaded rewards for {policy_name}: {len(all_rewards)} datasets processed.")
    return all_rewards


def plot_aggregated_rewards(dataset_ids, policies):
    fig, ax = plt.subplots(1, 1, figsize=(10, 10))

    for policy_name in policies:
        all_rewards = load_rewards(dataset_ids, policy_name)

        if not all_rewards:  # Skip if no rewards are loaded
            continue

        # Compute mean and standard deviation across datasets, ignoring NaNs
        mean_rewards_list = []
        std_rewards_list = []

        for rewards in all_rewards:
            # Compute cumulative rewards
            cumulative_rewards = np.nancumsum(rewards, axis=1)
            mean_rewards_list.append(np.nanmean(cumulative_rewards, axis=0))
            std_rewards_list.append(np.nanstd(cumulative_rewards, axis=0))

        if mean_rewards_list:
            mean_rewards = np.nanmean(mean_rewards_list, axis=0)
            std_rewards = np.nanmean(std_rewards_list, axis=0)

            # Plot the aggregated results
            ax.plot(mean_rewards, label=policy_name)
            ax.fill_between(
                np.arange(len(mean_rewards)),
                mean_rewards - std_rewards,
                mean_rewards + std_rewards,
                alpha=0,
            )

    ax.legend()
    ax.set_title('Comparison of Contextual Bandit Policies\n97 OpenML Datasets')
    ax.set_xlabel('Iterations')
    ax.set_ylabel('Cumulative Reward')
    plt.grid()
    plt.show()




if __name__ == "__main__":
    policies = [ "PartitionedTSWithPruning", "PartitionedTS", "ThompsonSamplingNoContext"]
    plot_aggregated_rewards(DATASET_IDS, policies)
