In [1]:
import pandas as pd
import numpy as np

In [2]:
df_train = pd.DataFrame([
    ['1', 'A', 1],
    ['1', 'B', 0],
    ['1', 'C', 0],
    ['2', 'A', 0],
    ['2', 'B', 1],
    ['2', 'C', 0],
    ['3', 'A', 0],
    ['3', 'B', 0],
    ['3', 'C', 1],
], columns=['user_id', 'item_id', 'response'])

df_train

Unnamed: 0,user_id,item_id,response
0,1,A,1
1,1,B,0
2,1,C,0
3,2,A,0
4,2,B,1
5,2,C,0
6,3,A,0
7,3,B,0
8,3,C,1


In [3]:
df_users_features = pd.DataFrame([
    ['1', 1, 0, 0],
    ['2', 0, 1, 0],
    ['3', 0, 0, 1],
    ['4', 1, 0, 0],
    ['5', 0, 1, 0],
    ['6', 0, 0, 1],
], columns=['user_id', 'u0', 'u1', 'u2'])

df_users_features

Unnamed: 0,user_id,u0,u1,u2
0,1,1,0,0
1,2,0,1,0
2,3,0,0,1
3,4,1,0,0
4,5,0,1,0
5,6,0,0,1


In [4]:
class LinUCB:

    def __init__(self):

        self.alpha = 1.0                  # exploration parameter
        self.l2_lambda = 1.0          # regularization parameter

        self.beta = None                    # (XtX + l2_lambda * I_d)^-1 * Xty = A^-1 * Xty
        self.A = None                       # (XtX + l2_lambda * I_d)
        self.A_inv = None                   # (XtX + l2_lambda * I_d)^-1
        self.Xty = None

    def init(self, num_features: int):
        # By default, assume that
        # A is the identity matrix and Xty is set to 0
        self.Xty = np.zeros(num_features)
        self.A = self.l2_lambda * np.identity(num_features)
        self.A_inv = self.A.copy()
        self.beta = np.dot(self.A_inv, self.Xty)

    def fit(self, X: np.ndarray, y: np.ndarray):

        # X transpose
        Xt = X.T

        # Update A
        self.A = self.A + np.dot(Xt, X)
        self.A_inv = np.linalg.inv(self.A)

        # Add new Xty values to old
        self.Xty = self.Xty + np.dot(Xt, y)

        # Recalculate beta coefficients
        self.beta = np.dot(self.A_inv, self.Xty)

    def predict(self, x: np.ndarray):

        # Calculating x_A_inv
        x_A_inv = np.dot(x, self.A_inv)

        # Upper confidence bound = alpha * sqrt(x A^-1 xt). Notice that, x = xt
        # ucb values are claculated for all the contexts in one single go. type(ucb): np.ndarray
        ucb = self.alpha * np.sqrt(np.sum(x_A_inv * x, axis=1))

        # Calculate linucb expectation y = x * b + ucb
        return np.dot(x, self.beta) + ucb

In [5]:
arm_A = LinUCB()
arm_A.init(3)
arm_A.fit(df_users_features[['u0', 'u1', 'u2']].iloc[:3], df_train[df_train['item_id'] == 'A']['response'])

In [6]:
arm_B = LinUCB()
arm_B.init(3)
arm_B.fit(df_users_features[['u0', 'u1', 'u2']].iloc[:3], df_train[df_train['item_id'] == 'B']['response'])

In [7]:
arm_C = LinUCB()
arm_C.init(3)
arm_C.fit(df_users_features[['u0', 'u1', 'u2']].iloc[:3], df_train[df_train['item_id'] == 'C']['response'])

In [8]:
context = np.array([[1, 0, 0]])
arm_A_score = arm_A.predict(np.array(context))
arm_B_score = arm_B.predict(np.array(context))
arm_C_score = arm_C.predict(np.array(context))

print(f"Arm A score: {arm_A_score}")
print(f"Arm B score: {arm_B_score}")
print(f"Arm C score: {arm_C_score}")

Arm A score: [1.20710678]
Arm B score: [0.70710678]
Arm C score: [0.70710678]


In [9]:
context = np.array([[0, 1, 0]])
arm_A_score = arm_A.predict(np.array(context))
arm_B_score = arm_B.predict(np.array(context))
arm_C_score = arm_C.predict(np.array(context))

print(f"Arm A score: {arm_A_score}")
print(f"Arm B score: {arm_B_score}")
print(f"Arm C score: {arm_C_score}")

Arm A score: [0.70710678]
Arm B score: [1.20710678]
Arm C score: [0.70710678]


In [10]:
context = np.array([[0, 0, 1]])
arm_A_score = arm_A.predict(np.array(context))
arm_B_score = arm_B.predict(np.array(context))
arm_C_score = arm_C.predict(np.array(context))

print(f"Arm A score: {arm_A_score}")
print(f"Arm B score: {arm_B_score}")
print(f"Arm C score: {arm_C_score}")

Arm A score: [0.70710678]
Arm B score: [0.70710678]
Arm C score: [1.20710678]


In [14]:
# Example of how to train an singler recommender to generate top-4 recommendations

# Import 
from mab2rec import BanditRecommender, LearningPolicy
from mab2rec.pipeline import train, score

# LinGreedy recommender to select top-4 items with 10% random exploration 
rec = BanditRecommender(LearningPolicy.LinUCB(), top_k=3)

# Train on (user, item, response) interactions in train data using user features 
train(rec, data=df_train, 
      user_features=df_users_features)

In [17]:
rec.recommend(contexts=[[1, 0, 0]], return_scores=True, apply_sigmoid=False)

(['A', 'C', 'B'], [1.2071067811865475, 0.7071067811865476, 0.7071067811865476])

In [18]:
rec.recommend(contexts=[[0, 1, 0]], return_scores=True, apply_sigmoid=False)

(['B', 'C', 'A'], [1.2071067811865475, 0.7071067811865476, 0.7071067811865476])

In [19]:
rec.recommend(contexts=[[0, 0, 1]], return_scores=True, apply_sigmoid=False)

(['C', 'B', 'A'], [1.2071067811865475, 0.7071067811865476, 0.7071067811865476])