In [2]:
# Run this at the start of the notebook
# Note that some of the packages and dependencies that require recogym can be out of sync, you may want to manually fix these dependencies.
# If you want to run this notebook, you will have to install the correct dependencies and packages.
# Please run the below commands if you are running into any issues after installing recogym.
!pip install recogym
!pip install scipy==1.11.4
!pip install setuptools==65.5.0 "wheel<0.40.0"
!pip install opencv-python>=3.
!pip install pip==24.0
!pip install gym==0.18.0

# If you are running into more problems, or are unable to setup the dependencies, try running this cell and notebook on google colab

Collecting gym==0.18.0
  Downloading gym-0.18.0.tar.gz (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pyglet<=1.5.0,>=1.4.0 (from gym==0.18.0)
  Downloading pyglet-1.5.0-py2.py3-none-any.whl.metadata (7.6 kB)
Collecting Pillow<=7.2.0 (from gym==0.18.0)
  Downloading Pillow-7.2.0.tar.gz (39.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m39.1/39.1 MB[0m [31m27.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting cloudpickle<1.7.0,>=1.2.0 (from gym==0.18.0)
  Downloading cloudpickle-1.6.0-py3-none-any.whl.metadata (4.3 kB)
Downloading cloudpickle-1.6.0-py3-none-any.whl (23 kB)
Downloading pyglet-1.5.0-py2.py3-none-any.whl (1.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m47.3 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels 

In [1]:
from numpy.random.mtrand import RandomState
from recogym import Configuration
from recogym.agents import Agent
from sklearn.linear_model import LogisticRegression
from recogym import verify_agents
from recogym.agents import OrganicUserEventCounterAgent, organic_user_count_args
from recogym.evaluate_agent import verify_agents, plot_verify_agents


import gym, recogym
from copy import deepcopy
from recogym import env_1_args
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

%config InlineBackend.figure_format = 'retina'
plt.rcParams['figure.figsize'] = [6, 3]

# Set hyperparameters for our recogym environment
ABTestNumberOfUsers=5000
NumberOfProducts=10
NumberOfSamples = 20
env_1_args['phi_var']=0.0
env_1_args['number_of_flips']=0
env_1_args['sigma_mu_organic'] = 0.0
env_1_args['sigma_omega']=1
env_1_args['random_seed'] = 42
env_1_args['num_products'] = NumberOfProducts
env_1_args['K'] = 5
env_1_args['number_of_flips'] = 5

env = gym.make('reco-gym-v1')
env.init_gym(env_1_args)

In [3]:
data = deepcopy(env).generate_logs(ABTestNumberOfUsers)

Organic Users: 0it [00:00, ?it/s]
Users: 100%|██████████| 5000/5000 [02:27<00:00, 33.91it/s]


# Logistic Regression Model

In [4]:
import math
import numpy as np
import pandas as pd

# Define a logistic regression model
def build_train_data(data):
    num_products = int(data.v.max() + 1)
    number_of_users = int(data.u.max()) + 1

    history = []
    actions = []
    outs = []

    for user_id in range(number_of_users):
        views = np.zeros((0, num_products))
        for _, user_datum in data[data['u'] == user_id].iterrows():
            if user_datum['z'] == 'organic':
                assert pd.isna(user_datum['a'])
                assert pd.isna(user_datum['c'])
                assert (not pd.isna(user_datum['v']))

                view = int(user_datum['v'])

                tmp_view = np.zeros(num_products)

                tmp_view[view] = 1

                views = np.append(tmp_view[np.newaxis, :], views, axis = 0)
            else:
                assert (user_datum['z'] == 'bandit')
                assert (not pd.isna(user_datum['a']))
                assert (not pd.isna(user_datum['c']))
                assert (pd.isna(user_datum['v']))

                action = int(user_datum['a'])
                action_flags = np.zeros(num_products, dtype = np.int8)
                action_flags[int(action)] = 1

                click = int(user_datum['c'])

                history.append(views.sum(0))
                actions.append(action_flags)
                outs.append(click)

    return np.array(outs), history, actions

In [5]:
clicks, history, actions = build_train_data(data)

In [6]:
from recogym.agents import FeatureProvider

class CrossFeatureProvider(FeatureProvider):

    def __init__(self, config):
        super(CrossFeatureProvider, self).__init__(config)
        self.feature_data = None

    def observe(self, observation):
        for session in observation.sessions():
            self.feature_data[session['v']] += 1

    def features(self, observation):
        return self.feature_data

    def reset(self):
        self.feature_data = np.zeros((self.config.num_products))

# Define the Model for logistic regression
class ModelBasedAgent(Agent):
    def __init__(self, env, feature_provider, model):
        self.env = env
        self.feature_provider = feature_provider
        self.model = model
        self.reset()

    # Recommend an action based on observation state and past history
    def act(self, observation, reward, done):
        self.feature_provider.observe(observation)
        cross_features = np.kron(np.eye(env.config.num_products),self.feature_provider.features(observation))
        prob = self.model.predict_proba(cross_features)[:, 1]
        action = np.argmax(prob)
        prob = np.zeros_like(prob)
        prob[action] = 1.0
        return {
            **super().act(observation, reward, done),
            **{
                'a': action,
                'ps': 1.,
                'ps-a': prob,
            }
        }

    def reset(self):
        self.feature_provider.reset()

In [7]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from recogym.agents import Agent
from recogym.agents import FeatureProvider

# Define a simple Neural Network
class NeuralNetworkClassifier:
    def __init__(self, input_dim):
        self.model = Sequential([
            Dense(32,input_dim=input_dim, activation='relu'),
            Dense(64, activation='relu'),
            Dense(32, activation='relu'),
            Dense(env.config.num_products, activation='softmax')
        ])
        self.model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

    def predict_proba(self, X):
        return self.model.predict(X)

    def fit(self, X, y, epochs=10, batch_size=32):
        self.model.fit(X, y, epochs=epochs, batch_size=batch_size)

2024-12-17 00:00:44.872289: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-17 00:00:44.912042: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-17 00:00:44.923909: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-17 00:00:44.959635: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
def build_history_agent(env_args, data):
    outs, history, actions = build_train_data(data)
    features = np.vstack([np.kron(aa,hh) for hh, aa in zip(history, actions)])

    config = Configuration(env_args)
    logreg = LogisticRegression(
        solver = 'lbfgs',
        max_iter = 5000,
        random_state = config.random_seed
    )

    log_reg_fit = logreg.fit(features, outs)

    return ModelBasedAgent(
        config,
        CrossFeatureProvider(config),
        log_reg_fit
    )


In [9]:
# Define the agent class that utilizes a Neural Network
class NNModelBasedAgent(Agent):
    def __init__(self, env, feature_provider, model):
        self.env = env
        self.feature_provider = feature_provider
        self.model = model
        self.reset()

    def act(self, observation, reward, done):
      """Act method returns an action based on current observation and past history"""

      self.feature_provider.observe(observation)
      cross_features = np.kron(np.eye(env.config.num_products),self.feature_provider.features(observation))

      prob = self.model.predict(cross_features, verbose=0)[:1]

      if prob.shape[1] != env.config.num_products:
          return

      action = np.argmax(prob)

      prob = np.zeros_like(prob)
      prob[0, action] = 1.0

      return {
          **super().act(observation, reward, done),
          **{
              'a': action,
              'ps': 1.,
              'ps-a': prob,
          }
      }


    def reset(self):
        self.feature_provider.reset()

def build_history_nn_agent(env_args, data):
    outs, history, actions = build_train_data(data)
    features = np.vstack([np.kron(aa,hh) for hh, aa in zip(history, actions)])



    config = Configuration(env_args)
    model = Sequential([
        Dense(32, activation='relu', input_dim=100),
        Dense(64, activation='relu'),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    log_reg_fit = model.fit(features, outs)

    return NNModelBasedAgent(
        config,
        CrossFeatureProvider(config),
        model
    )

In [10]:
likelihood_logreg =  build_history_agent(env_1_args, data)

In [11]:
neural_net_agent = build_history_nn_agent(env_1_args, data)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m12008/12008[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 2ms/step - accuracy: 0.9865 - loss: 0.1027


In [12]:
organic_counter_agent = OrganicUserEventCounterAgent(Configuration({
            **organic_user_count_args,
            **env_1_args,
            'select_randomly': True,
        }))

In [None]:
# Note that this takes a very long time to run. Upwards of 7 hours. Maybe more depending on the machine you are running it on.
# This is where we actually measure the performance of our agents and simulate an actual test environment.
result = verify_agents(env, 5000, {'likelihood nn': neural_net_agent, 'organic count': organic_counter_agent, 'likelihood agent': likelihood_logreg})
fig = plot_verify_agents(result)
plt.show()


Organic Users: 0it [00:00, ?it/s]
Users:   1%|          | 36/5000 [04:04<5:51:58,  4.25s/it]