In [1]:
import numpy as np
from copy import copy
import matplotlib.pyplot as plt

In [2]:
class SocialInfluence:
    def __init__(self, n_nodes, n_steps):
        self.cat = 4
        self.user_cat = np.random.randint(1,self.cat+1,n_nodes)
        self.edges = np.random.binomial(n=1,p=0.6,size=[n_nodes,n_nodes])
        self.act = np.ndarray((n_nodes,n_nodes), dtype=float)
        for i in range(n_nodes):
            for j in range(n_nodes):
                if i == j:
                    #Diagonal represents the click probability of the user
                    self.act[i][j] = 1#0.5 + np.random.rand(1)[0]*0.5
                else:
                    #print(i,j,"not diag")
                    #print(act[i][j])
                    if (self.user_cat[i] != self.user_cat[j] or self.edges[i][j]!=1):
                        #print("existing edge",act[i][j])
                        self.act[i][j] = 0
                        #print(act[i][j])
                    else:
                        self.act[i][j] = 0.2 + np.random.rand(1)[0]*0.5
        self.init_prob_matrix = self.act
        print(self.act)
        #np.random.uniform(0.0,0.1,(n_nodes,n_nodes))
        self.n_steps_max = n_steps
        self.n_nodes = n_nodes

    def simulate_episode(self, activated_nodes):
        self.prob_matrix = self.init_prob_matrix.copy()
        #n_nodes = prob_matrix.shape[0]
        self.initial_active_nodes = np.zeros(self.n_nodes)
        #for node in activated_nodes:
        self.initial_active_nodes[activated_nodes] = 1
        self.history = np.array([self.initial_active_nodes])
        self.active_nodes = self.initial_active_nodes
        self.newly_active_nodes = self.active_nodes
        self.t=0
        while(self.t<self.n_steps_max and np.sum(self.newly_active_nodes)>0):
            self.p = (self.prob_matrix.T* self.active_nodes).T
            self.activated_edges = self.p>np.random.rand(self.p.shape[0], self.p.shape[1])
            self.prob_matrix = self.prob_matrix*((self.p!=0)==self.activated_edges)
            self.newly_active_nodes = (np.sum(self.activated_edges,axis=0)>0) * (1 - self.active_nodes)
            self.history = np.concatenate((self.history, [self.newly_active_nodes]),axis = 0)
            self.t+=1
        return self.history

In [3]:
class LinearMabEnviroment():
    def __init__(self, n_arms, dim):
        self.theta = np.random.dirichlet(np.ones(dim), size = 1)
        #print(self.theta)
        self.arms_features = np.random.binomial(1, 0.5, size=(n_arms,dim))
        #print(self.arms_features)
        self.p = np.zeros(n_arms)
        for i in range(0,n_arms):
            self.p[i] = np.dot(self.theta, self.arms_features[i])

    def round(self, pulled_arm):
        self.history = episode.simulate_episode(pulled_arm)
        return np.sum(self.history)
        #return 1 if np.random.random() < self.p[pulled_arm] else 0

    # def opt(self):
    #     return np.max(self.p)

In [4]:
class LinUcbLearner():
    def  __init__(self, arms_features):
        self.arms =arms_features
        self.dim = arms_features.shape[1]
        self.collected_rewards = []
        self.pulled_arms = []
        self.c = 2.0
        self.M = np.identity(self.dim)
        self.b = np.atleast_2d(np.zeros(self.dim)).T
        self.theta = np.dot(np.linalg.inv(self.M), self.b)

    def compute_ucbs(self):
        self.theta = np.dot(np.linalg.inv(self.M), self.b)
        ucbs = []
        for arm in self.arms:
            arm = np.atleast_2d(arm).T
            ucb = np.dot(self.theta.T, arm) + self.c * np.sqrt(np.dot(arm.T, np.dot(np.linalg.inv(self.M), arm)))#need to understand
            ucbs.append(ucb[0][0])
        return ucbs

    def pull_arm(self):
        ucbs = self.compute_ucbs()
        #print(ucbs)
        return np.argmax(ucbs)

    def update_estimation(self, arm_idx, reward):
        arm = np.atleast_2d(self.arms[arm_idx]).T
        self.M += np.dot(arm, arm.T)
        self.b += reward*arm

    def update(self, arm_idx, reward):
        self.pulled_arms.append(arm_idx)
        self.collected_rewards.append(reward)
        self.update_estimation(arm_idx, reward)

In [5]:
n_arms = 15
T = 100
n_experiments = 10
lin_ucb_rewards_per_experiment = []

In [6]:
env = LinearMabEnviroment(n_arms=n_arms, dim=10)
episode = SocialInfluence(n_arms,10)

[[1.         0.56412465 0.         0.         0.         0.
  0.         0.         0.         0.34641984 0.         0.
  0.         0.         0.        ]
 [0.67653922 1.         0.40203244 0.         0.         0.
  0.         0.         0.         0.50759108 0.         0.
  0.         0.         0.        ]
 [0.         0.24362912 1.         0.63273925 0.         0.
  0.         0.         0.         0.46303612 0.         0.
  0.         0.         0.        ]
 [0.47445594 0.2043475  0.56364841 1.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.        ]
 [0.         0.         0.         0.         1.         0.
  0.22171058 0.         0.         0.         0.55250882 0.
  0.44256056 0.         0.        ]
 [0.         0.         0.         0.         0.         1.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.61942115]
 [0.         0.         0.         0.         0.34145729 0.
  1.

In [7]:
for e in range(0,n_experiments):
    lin_ucb_learner = LinUcbLearner(arms_features=env.arms_features)
    for t in range(0,T):
        pulled_arm = lin_ucb_learner.pull_arm()
        reward = env.round(pulled_arm)
        lin_ucb_learner.update(pulled_arm, reward)
    lin_ucb_rewards_per_experiment.append(lin_ucb_learner.collected_rewards)