In [1]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

class GeneralLearner:

    def __init__(self, n_arms):
        self.n_arms = n_arms
        self.t = 0
        self.reward_per_arm = x = [[] for i in range(n_arms)]
        self.collected_rewards = np.array([])

    def update_observations(self, pulled_arm, reward):
        print(pulled_arm)
        for arm in pulled_arm:
            self.reward_per_arm[arm].append(reward)
        self.collected_rewards = np.append(self.collected_rewards, reward)

    def pull_arm(self):
        pass    

    def update(self,pulled_arm, reward):
        self.update_observations(pulled_arm, reward)
        

class GPTS_Learner(GeneralLearner):
    def __init__(self, n_arms, arms, adv_id):
        super().__init__(n_arms)
        self.arms = arms
        self.means = np.zeros((5,self.n_arms))
        self.sigmas = np.ones((5,self.n_arms)) * 10
        self.pulled_arms = []
        ## parameters of the GPTS
        alpha = 10.0
        kernel = C(1.0, (1e-3, 1e3)) * RBF(1.0, (1e-3, 1e3))
        self.gp = GaussianProcessRegressor(kernel=kernel, alpha=alpha ** 2, normalize_y=True, n_restarts_optimizer=9, )

    ## extend the funtion update_observations of the superclass because we want to
    ## update also the list of the pulled arms (together with the rewards and the reward per arm)
    def update_observations(self, arm_idx, reward):
        super().update_observations(arm_idx, reward)
        ## per ogni categoria append l'arm corrispondente alla bid, 
        ## pulled_arms[[arm_1, arm_3, ..., ..., ...][arm2, ..., ..., ..., ...]...]
        self.pulled_arms.append(self.arms[arm_idx])

    ## funtion that updates the model(means, sigmas) looking at the new rewards obtained from the enviroment
    def update_model(self):
        x = np.atleast_2d(self.pulled_arms).T
        y = self.collected_rewards
        if len(self.pulled_arms) > 1:
            self.gp.fit(x, y)
        self.means, self.sigmas = self.gp.predict(np.atleast_2d(self.arms).T, return_std=True)
        self.sigmas = np.maximum(self.sigmas, 1e-2)

    ## update the value of the current round and update observation and model
    def update(self, pulled_arm, reward):
        self.t += 1
        self.update_observations(pulled_arm, reward)
        self.update_model()

    ## funtion that pulls the arm, it returns the argmax of the distribution given the means and the sigmas
    def pull_arm(self):
        sampled_values = np.random.normal(self.means, self.sigmas)
        return np.argmax(sampled_values,-1)


ModuleNotFoundError: No module named 'numpy'

In [140]:
n_arms=5
min_bid=0.0
max_bid=4.0
bids=np.linspace(min_bid, max_bid, n_arms)
sigma=10
learner=GPTS_Learner(n_arms=n_arms, arms=bids, adv_id=1)

In [141]:
def clicks(x):
    return 1-np.exp(-4*x)

class Advertising_envirorment:
    def __init__(self, bids, sigma):
        self.bids=bids
        self.means=clicks(bids)
        self.sigmas=np.ones((5,len(bids)))*sigma

        #per ora ha una struttura semplice
        #andrà modificato affinchè ritorni un reward in funzione delle features
    def round(self, pulled_arm):
        return np.sum(np.random.normal(self.means[pulled_arm], self.sigmas[pulled_arm]))

    def round_all(self, pulled_arms):
        table=[]
        for pulled_arm in pulled_arms:
            table.append(self.round(pulled_arm))
        return table

In [142]:
T=12
env= Advertising_envirorment(bids=bids, sigma=sigma)
gpts_reward_per_experiment=[]



In [143]:
for t in range(0,T-1):
        pulled_arm=learner.pull_arm()
        print("pulled arm:",pulled_arm)
        reward=env.round(pulled_arm=pulled_arm)
        print("reward",reward)
        learner.update(pulled_arm=pulled_arm, reward=reward)
    
gpts_reward_per_experiment.append(learner.collected_rewards)

pulled arm: [2 3 4 0 0]
reward 53.35390998654669
[2 3 4 0 0]
pulled arm: 4
reward -12.781267859281687
4


TypeError: 'numpy.int64' object is not iterable