In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import OneHotEncoder

In [3]:
# For MAB it's
action_list = [] # Filled with the estimated reward

In [4]:
# For Contextual Bandit we're gonna have
# NxM matrix
# N = Number of context or the feature
# M = Number of actions can be taken (bandit)
action = np.array([[], []], np.int32)

In [5]:
sample_data = {
    'user_id': range(1, 9),  # Unique users
    'gender': ['Male', 'Female', 'Male', 'Female', 'Male', 'Female', 'Male', 'Female'],
    'location': ['South', 'North', 'West', 'East', 'South', 'North', 'West', 'East'],
    'age_group': ['Young', 'Adult', 'Senior', 'Young', 'Adult', 'Senior', 'Young', 'Adult'],
    'previous_purchases': np.random.randint(0, 20, size=8),  # Number of past purchases
    'device_type': ['Mobile', 'Desktop', 'Tablet', 'Mobile', 'Desktop', 'Tablet', 'Mobile', 'Desktop'],
    'chosen_product': ['product A', 'product B', 'product C', 'product D', 
                       'product E', 'product F', 'product G', 'product H'],  # Action taken
    'reward': np.random.choice([0, 1], size=8)  # Simulated reward (1=success, 0=failure)
}

raw_data = pd.DataFrame(data=sample_data, index=[0, 1, 2, 3, 4, 5, 6, 7])

# One hot encode the data

encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')

encoded_data = encoder.fit_transform(raw_data[['gender', 'location', 'age_group', 'device_type']])
encoded_columns = encoder.get_feature_names_out(['gender', 'location', 'age_group', 'device_type'])

one_hot_data = pd.DataFrame(encoded_data, columns=encoded_columns)

raw_data = raw_data[["user_id", "previous_purchases", "chosen_product", "reward"]]

data = pd.concat([raw_data, one_hot_data], axis=1)
data


Unnamed: 0,user_id,previous_purchases,chosen_product,reward,gender_Female,gender_Male,location_East,location_North,location_South,location_West,age_group_Adult,age_group_Senior,age_group_Young,device_type_Desktop,device_type_Mobile,device_type_Tablet
0,1,16,product A,1,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
1,2,5,product B,0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
2,3,12,product C,0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0
3,4,15,product D,0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
4,5,15,product E,1,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
5,6,16,product F,0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
6,7,5,product G,1,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
7,8,14,product H,1,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0


In [12]:
NUM_OF_ACTION = len(data["chosen_product"].unique())
NUM_OF_CONTEXT = 12

ALPHA = 2

NUM_OF_TRIALS = 500

In [13]:
# Logging for regret calculation

# Best expected reward for each round
best_expected_reward = []

# Actual reward
result_history = []

# Random Exploration
random_reward = []

In [None]:
class LinUCB_Bandit:
    
    def __init__ (self):
        # True Weight (hidden from the user)
        self.true_weight = np.random.randn(NUM_OF_CONTEXT)   # Theta_a


        self.feature_covariance_matrix = np.identity(NUM_OF_CONTEXT) # Identity matrix with a dimension of the number of action
        self.weighted_reward_matrix = np.zeros((NUM_OF_CONTEXT,1))

        # Estimation of the true weight, should get closer to the true weight by the end of the trials
        self.estimated_weight = np.zeros(NUM_OF_CONTEXT)

        self.number_pulled = 0

    def return_reward(self, context = np.random.rand(NUM_OF_CONTEXT)):

        

        return context, reward

    def update_ucb_matrix(self, context, reward):
        
        # Resize context array
        context_resized = np.array([context])

        # Count feature covarianve matrix (A_b)
        self.feature_covariance_matrix += np.matmul(context_resized.transpose(), context_resized)

        # Count weight sum reward (b_b)
        self.weighted_reward_matrix += reward * context_resized.transpose()

        inverted_feature_cov = np.linalg.inv(self.feature_covariance_matrix)
        self.estimated_weight = np.matmul(inverted_feature_cov, self.weighted_reward_matrix)

        # return self.feature_covariance_matrix, self.weighted_reward_matrix
    
    def calculate_ucb_value(self, context):

        # Resize context array
        context_resized = np.array([context])

        # Inverted feature covariance
        # inverted_feature_cov = c

        # Count estimated weight
        # self.estimated_weight = np.matmul(inverted_feature_cov, self.weighted_reward_matrix)

        # Expected reward of the bandit with respect to the current context
        expected_reward = np.dot(context_resized, self.estimated_weight)

        # Confidence bound
        confidence_bound = np.sqrt(np.matmul(np.matmul(context_resized, np.linalg.inv(self.feature_covariance_matrix)), context_resized.transpose()))
        
        # Getting Upper confidence bound by adding the confidence bound times by alpha (multiplier)
        upper_confidence_bound = expected_reward + (ALPHA * confidence_bound)

        return context, context_resized, np.linalg.inv(self.feature_covariance_matrix), self.estimated_weight, expected_reward, confidence_bound, upper_confidence_bound 
        # return upper_confidence_bound
        # np.linalg.inv(linUCB.feature_covariance_matrix)





# Test

In [17]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression
from scipy.stats import norm

# 1. Generate synthetic data for users and products
np.random.seed(42)

# Number of users and products
n_users = 1000
n_products = 10

# User features: age, gender (categorical), income (continuous)
user_data = pd.DataFrame({
    'age': np.random.randint(18, 65, size=n_users),
    'gender': np.random.choice(['male', 'female'], size=n_users),
    'income': np.random.randint(20000, 100000, size=n_users)
})

# Product features: category (categorical), price (continuous)
product_data = pd.DataFrame({
    'category': np.random.choice(['electronics', 'clothing', 'books'], size=n_products),
    'price': np.random.uniform(10, 1000, size=n_products)
})

# 2. One-hot encode categorical features for users and products

# One-hot encode user features
encoder_user = OneHotEncoder(sparse_output=False)
user_encoded = encoder_user.fit_transform(user_data[['gender']])

# One-hot encode product features
encoder_product = OneHotEncoder(sparse_output=False)
product_encoded = encoder_product.fit_transform(product_data[['category']])

# 3. Define LinUCB Algorithm
class LinUCB:
    def __init__(self, n_users, n_products, alpha=1.0):
        self.alpha = alpha
        self.n_users = n_users
        self.n_products = n_products
        
        # Initialize the A and b matrices for each product (action)
        self.A = np.array([np.identity(5) for _ in range(n_products)])  # n_features=5 after encoding
        self.b = np.array([np.zeros(5) for _ in range(n_products)])

    def recommend(self, user_idx, product_idx, user_context, product_context):
        """
        Recommend a product for a user based on the LinUCB algorithm.
        """
        x = np.hstack([user_context[user_idx], product_context[product_idx]])
        A_inv = np.linalg.inv(self.A[product_idx])
        theta = A_inv @ self.b[product_idx]
        p = theta.T @ x
        confidence = self.alpha * np.sqrt(x.T @ A_inv @ x)
        return p + confidence

    def update(self, user_idx, product_idx, user_context, product_context, reward):
        """
        Update the A and b matrices based on the observed reward.
        """
        x = np.hstack([user_context[user_idx], product_context[product_idx]])
        self.A[product_idx] += np.outer(x, x)
        self.b[product_idx] += reward * x

# 4. Simulate Interaction (User-Product Clicks)
def simulate_interaction(n_users, n_products, user_data, product_data, linucb):
    rewards = []
    user_context = np.hstack([user_data[['age', 'income']].values, user_encoded])  # User context includes age, income, and gender
    product_context = np.hstack([product_data[['price']].values, product_encoded])  # Product context includes price and category

    for user_idx in range(n_users):
        # Choose the product with the highest expected reward
        expected_rewards = np.array([linucb.recommend(user_idx, p_idx, user_context, product_context) for p_idx in range(n_products)])
        recommended_product = np.argmax(expected_rewards)

        # Simulate reward (click or not click based on product recommendation)
        # Let's simulate the reward as a binary outcome based on user and product characteristics
        reward = np.random.rand() < (0.1 * user_data.loc[user_idx, 'income'] / 100000 + 0.5 * (1 if user_data.loc[user_idx, 'gender'] == 'male' else 0))

        # Update LinUCB model
        linucb.update(user_idx, recommended_product, user_context, product_context, reward)
        
        rewards.append((user_idx, recommended_product, reward))

    return rewards

# 5. Initialize and run LinUCB
linucb = LinUCB(n_users, n_products, alpha=1.0)
rewards = simulate_interaction(n_users, n_products, user_data, product_data, linucb)

# 6. Analyze Results
rewards_df = pd.DataFrame(rewards, columns=['user_idx', 'product_idx', 'reward'])
print(rewards_df.head())


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 8 is different from 5)

In [18]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression
from scipy.stats import norm

# 1. Generate synthetic data for users and products
np.random.seed(42)

# Number of users and products
n_users = 1000
n_products = 10

# User features: age, gender (categorical), income (continuous)
user_data = pd.DataFrame({
    'age': np.random.randint(18, 65, size=n_users),
    'gender': np.random.choice(['male', 'female'], size=n_users),
    'income': np.random.randint(20000, 100000, size=n_users)
})

# Product features: category (categorical), price (continuous)
product_data = pd.DataFrame({
    'category': np.random.choice(['electronics', 'clothing', 'books'], size=n_products),
    'price': np.random.uniform(10, 1000, size=n_products)
})

# 2. One-hot encode categorical features for users and products

# One-hot encode user features
encoder_user = OneHotEncoder(sparse_output=False)
user_encoded = encoder_user.fit_transform(user_data[['gender']])

# One-hot encode product features
encoder_product = OneHotEncoder(sparse_output=False)
product_encoded = encoder_product.fit_transform(product_data[['category']])

# 3. Define LinUCB Algorithm
class LinUCB:
    def __init__(self, n_users, n_products, n_user_features, n_product_features, alpha=1.0):
        self.alpha = alpha
        self.n_users = n_users
        self.n_products = n_products
        
        # Initialize the A and b matrices for each product (action)
        # Total number of features = n_user_features + n_product_features
        self.A = np.array([np.identity(n_user_features + n_product_features) for _ in range(n_products)])
        self.b = np.array([np.zeros(n_user_features + n_product_features) for _ in range(n_products)])

    def recommend(self, user_idx, product_idx, user_context, product_context):
        """
        Recommend a product for a user based on the LinUCB algorithm.
        """
        # Concatenate the user and product context
        x = np.hstack([user_context[user_idx], product_context[product_idx]])
        A_inv = np.linalg.inv(self.A[product_idx])
        theta = A_inv @ self.b[product_idx]
        p = theta.T @ x
        confidence = self.alpha * np.sqrt(x.T @ A_inv @ x)
        return p + confidence

    def update(self, user_idx, product_idx, user_context, product_context, reward):
        """
        Update the A and b matrices based on the observed reward.
        """
        x = np.hstack([user_context[user_idx], product_context[product_idx]])
        self.A[product_idx] += np.outer(x, x)
        self.b[product_idx] += reward * x

# 4. Simulate Interaction (User-Product Clicks)
def simulate_interaction(n_users, n_products, user_data, product_data, linucb):
    rewards = []
    user_context = np.hstack([user_data[['age', 'income']].values, user_encoded])  # User context includes age, income, and gender
    product_context = np.hstack([product_data[['price']].values, product_encoded])  # Product context includes price and category

    for user_idx in range(n_users):
        # Choose the product with the highest expected reward
        expected_rewards = np.array([linucb.recommend(user_idx, p_idx, user_context, product_context) for p_idx in range(n_products)])
        recommended_product = np.argmax(expected_rewards)

        # Simulate reward (click or not click based on product recommendation)
        # Let's simulate the reward as a binary outcome based on user and product characteristics
        reward = np.random.rand() < (0.1 * user_data.loc[user_idx, 'income'] / 100000 + 0.5 * (1 if user_data.loc[user_idx, 'gender'] == 'male' else 0))

        # Update LinUCB model
        linucb.update(user_idx, recommended_product, user_context, product_context, reward)
        
        rewards.append((user_idx, recommended_product, reward))

    return rewards

# 5. Initialize and run LinUCB
n_user_features = user_encoded.shape[1] + 2  # 2 for age and income
n_product_features = product_encoded.shape[1] + 1  # 1 for price
linucb = LinUCB(n_users, n_products, n_user_features, n_product_features, alpha=1.0)
rewards = simulate_interaction(n_users, n_products, user_data, product_data, linucb)

# 6. Analyze Results
rewards_df = pd.DataFrame(rewards, columns=['user_idx', 'product_idx', 'reward'])
print(rewards_df.head())


   user_idx  product_idx  reward
0         0            7    True
1         1            2   False
2         2            5   False
3         3            6   False
4         4            3    True
