# Learning from experts

In [165]:
import numpy as np
import numpy.random as random

In [166]:
# number of experts
n = 100
# number of rounds played
T = 10000

## Problem setup

We assume the existance of a set of predetermined answers: *Yes* or *No*.

In [167]:
list_of_correct_answers = random.choice([False, True], T)

Every expert gets a probability with which they give a wrong answer.

In [168]:
probs = [random.random()**(1/2) for _ in range(n)]

def get_expert_answers(true_answer):
    """Generate expert answers based on the true answer and a given probability 
    of lying for each expert."""
    expert_answers = np.empty_like(probs, dtype=bool)
    rnd = random.random(n)
    experts_that_lie = (rnd <= probs)
    expert_answers[experts_that_lie] = np.logical_not(true_answer)
    expert_answers[~experts_that_lie] = true_answer
    
    return expert_answers

## The algorithm

If an expert $i$ is wrong its weight for the next round gets decreased according to the rule:
$$w_{t+1}(i) = w_t(i) (1-\alpha).$$

The next method implements the *randomized and the deterministic* method via the `randomized` flag.

In [169]:
def WMA(n, T, alpha=0.1, randomized=False):
    """Run the weighted majority algorithm with `n` experts for `T` timesteps"""
    
    # initialize weights
    weights = np.ones(n)
    numof_your_mistakes = 0
    numof_expert_mistakes = np.zeros((n,T+1))
    
    for t in range(T):       
        true_answer = list_of_correct_answers[t]
        expert_answers = get_expert_answers(true_answer)
        your_answer = form_your_answer(expert_answers, weights, randomized)
          
        # check result      
        if your_answer != true_answer:
            numof_your_mistakes += 1
            
        # update weights and expert mistakes
        wrong = expert_answers != true_answer
        numof_expert_mistakes[wrong, t+1] = numof_expert_mistakes[wrong, t]+1
        numof_expert_mistakes[~wrong, t+1] = numof_expert_mistakes[~wrong, t]
        weights[expert_answers != true_answer] *= 1-alpha
                
                
    return numof_your_mistakes, numof_expert_mistakes        

The next function is just part of WMA.

In [175]:
def form_your_answer(expert_answers, weights, randomized):
    """Return your answer in one round of the (randomized) WMA algorithm based 
    on the answers of the experts and the corresponding weights"""
    
    if randomized:
        #your_answer = random.choice(...)
        raise NotImplementedError
    else:          
        weight_yes = weights[expert_answers].sum()
        # weight_no = 
        #your_answer = 
        raise NotImplementedError
            
    return your_answer

## Experiments

In [171]:
alpha = 0.1
my_mistakes_det, expert_mistakes_det = WMA(n, T, alpha)

In [172]:
my_mistakes_rand, expert_mistakes_rand = WMA(n, T, alpha, randomized=True)

In [173]:
print(f'I made {my_mistakes_det} mistakes.')
print(f'Best expert had {np.min(expert_mistakes_det[:, -1])} mistakes')

I made 663 mistakes.
Best expert had 635.0 mistakes


In [174]:
print(f'I made {my_mistakes_rand} mistakes.')
print(f'Best expert had {np.min(expert_mistakes_rand[:, -1])} mistakes')

I made 708 mistakes.
Best expert had 662.0 mistakes


# Tasks

TODO:
- finish the `form_your_answer` function
- plot the regret and experiment with number of rounds / experts / $\alpha$

Bonus points: If you do any of the following

- try different strategies for the experts (not just random lying)
    + you could try and manually "hardcode" an adversary that bases their strategy on your answer (always tells the truth until they have the highest weight, then they start lying)
- implement the multiplicative weights algorithm (i.e. allow for general losses, not just 0/1) and run similar experiments