# Assignment RL Flappy Bird, Alexandre SELVESTREL

Make the imports

In [1]:
import os, sys
import gymnasium as gym
import time
from tqdm import tqdm
import text_flappy_bird_gym
import pickle
import wandb
import datetime
import numpy as np
import matplotlib.pyplot as plt
import flappy_bird_gym



## Easy version of flappy bird (the one specific to the assignment):

Define how to process the date for the full screen

In [2]:
def process(obs,n,k,g):# obs is 20x15 initially
    line_pnj = round(n/3) - 1
    new_pos = 0
    found = False
    for j in range(k):
        if obs[line_pnj,j] == 1:
            new_pos = j
            found = True
    if found is False:
        raise ValueError("bird not found")
    obstacles_bas = []
    i = line_pnj +1
    while (len(obstacles_bas) <2) and i<n:
            found = False
            for j in range(g + 1):
                 if (obs[i,j] == 2) and (found is False):
                    obstacles_bas.append(i)
                    found = True
            i += 1
    if len(obstacles_bas) == 0:
        raise ValueError("no obstacle found")
    dist_vert = [bas - line_pnj for bas in obstacles_bas]
    gauche = []
    droite = []
    for i in obstacles_bas:
        line = obs[i,:]
        found  = False
        for j,elem in enumerate(line):
            if (elem == 0) and (found is False):
                gauche.append(j)
                found = True
            elif (elem == 2) and (found is True):
                droite.append(j - 1)
                found = False
            elif line[k - 1] == 0:
                droite.append(k - 1)
                found = False
    for a in range(len(obstacles_bas)):
        if droite[a] - gauche[a] != g-1:
            print(obs)
            print('taille erreur',gauche[a],droite[a])
            raise ValueError("gap mauvaise taille")
    center = [gauche[i] + round(g/2) for i in range(len(droite))]
    state = tuple([(new_pos,dist_vert[i],center[i] - new_pos) for i in range(len(dist_vert))])
    return state

Define the policies

In [3]:
class MC:
    def __init__(self,eps,alpha,n_max_step,height,width,gap,trained = False, processed = True) -> None:
        self.eps = eps
        self.alpha = alpha
        self.n_max_step = n_max_step
        self.nsteps = 0
        self.processed = processed
        self.height = height
        self.width = width
        self.gap = gap
        if trained is False:
            self.q = {}
        else:
            if self.processed is True:
                with open('data/MC.pkl', 'rb') as f:
                    self.q = pickle.load(f)
            else:
                with open('data/MC_non_processed.pkl', 'rb') as f:
                    self.q = pickle.load(f)

    def reset(self):
        self.episode = []
        self.nsteps = 0

    def chose_action(self,observation, training = True):
        if self.processed is True:
            state = observation
            #print(state)
        else:
            state = process(observation,n = self.width, k = self.height, g = self.gap)
            #print('state',state)
            #print(observation)
            #time.sleep(0.5)
        if (state,0) not in list(self.q.keys()):
            self.q[(state,0)] = np.random.uniform(-10,10)
        if (state,1) not in list(self.q.keys()):
            self.q[(state,1)] = np.random.uniform(-10,10)
        if training:
            if np.random.uniform(0,1) < self.eps:
                action = np.random.choice([0,1])
                self.episode.append({'state':state,'action':action})
                self.nsteps += 1
                return action
        action = 0 if self.q[(state,0)] > self.q[(state,1)] else 1
        self.episode.append({'state':state,'action':action})
        self.nsteps += 1
        return action
    
    def update(self,reward,next_observation,done):
        if self.nsteps == self.n_max_step:
            reward = 100
        if done:
            reward = -10
        self.episode[-1]['reward'] = reward
        if (done is False) and (self.nsteps < self.n_max_step):
            pass
        else:
            G = 0
            for j in range(len(self.episode)):
                i = len(self.episode) - j - 1
                G += self.episode[i]['reward']
                self.q[(self.episode[i]['state'],self.episode[i]['action'])] += self.alpha*(G - self.q[(self.episode[i]['state'],self.episode[i]['action'])])
    
    def save(self):
        if self.processed is True:
            with open('data/MC.pkl', 'wb') as f:
                pickle.dump(self.q, f)
        else:
            with open('data/MC_non_processed.pkl', 'wb') as f:
                pickle.dump(self.q, f)




In [4]:
class Sarsa:
    def __init__(self,eps,alpha,n_max_step,height,width,gap,trained = False, processed = True, lambd = 0.9) -> None:
        self.eps = eps
        self.alpha = alpha
        self.n_max_step = n_max_step
        self.nsteps = 0
        self.processed = processed
        self.height = height
        self.width = width
        self.gap = gap
        self.lambd = lambd
        if trained is False:
            self.q = {}
            self.e = {}
        else:
            if self.processed is True:
                with open('data/Sarsa_q.pkl', 'rb') as f:
                    self.q = pickle.load(f)
                with open('data/Sarsa_e.pkl', 'rb') as f:
                    self.e = pickle.load(f)
            else:
                with open('data/Sarsa_non_processed_q.pkl', 'rb') as f:
                    self.q = pickle.load(f)
                with open('data/Sarsa_non_processed_e.pkl', 'rb') as f:
                    self.e = pickle.load(f)

    def reset(self):
        self.nsteps = 0

    def chose_action(self,observation, training = True):
        if self.processed is True:
            state = observation
        else:
            state = process(observation,n = self.width, k = self.height, g = self.gap)
        self.current_state = state
        if (state,0) not in list(self.q.keys()):
            self.q[(state,0)] = np.random.uniform(-10,10)
            self.e[(state,0)] = 0
        if (state,1) not in list(self.q.keys()):
            self.q[(state,1)] = np.random.uniform(-10,10)
            self.e[(state,1)] = 0
        if training:
            if np.random.uniform(0,1) < self.eps:
                action = np.random.choice([0,1])
                self.nsteps += 1
                return action
        action = 0 if self.q[(state,0)] > self.q[(state,1)] else 1
        self.nsteps += 1
        self.current_action = action   
        return action
    
    def update(self,reward,next_observation,done):
        if self.processed is True or done:
            pass
        else:
            next_observation = process(next_observation,n = self.width, k = self.height, g = self.gap)
        if (self.nsteps == self.n_max_step) or done:
            max_q = 0
        else:
            if (next_observation,0) not in list(self.q.keys()):
                self.q[(next_observation,0)] = np.random.uniform(-10,10)
                self.e[(next_observation,0)] = 0
            if (next_observation,1) not in list(self.q.keys()):
                self.q[(next_observation,1)] = np.random.uniform(-10,10)
                self.e[(next_observation,1)] = 0
            max_q = max(self.q[(next_observation,0)],self.q[(next_observation,1)])
        delta = reward + max_q - self.q[(self.current_state,self.current_action)]
        self.e[(self.current_state,self.current_action)] += 1
        for key in self.q.keys():
            self.q[key] += self.alpha*delta*self.e[key]
            self.e[key] *= self.lambd

    def save(self):
        if self.processed is True:
            with open('data/Sarsa_q.pkl', 'wb') as f:
                pickle.dump(self.q, f)
            with open('data/Sarsa_e.pkl', 'wb') as f:
                pickle.dump(self.e, f)
        else:
            with open('data/Sarsa_non_processed_q.pkl', 'wb') as f:
                pickle.dump(self.q, f)
            with open('data/Sarsa_non_processed_e.pkl', 'wb') as f:
                pickle.dump(self.e, f)


Run the code:

In [10]:
def training(env, policy,n_episodes, n_max_step,n_testing_moments = 100):
    li_scores = []
    for num_ep in tqdm(range(n_episodes)):
        done = False
        obs, info = env.reset()
        policy.reset()
        sep_test = n_episodes // n_testing_moments
        n_step = 0
        # iterate
        while (done is False) and (n_step < n_max_step):
            n_step += 1
            # Select next action
            action = policy.chose_action(obs,training = True)
            # Appy action and return new observation of the environment
            obs, reward, done, _, info = env.step(action)
            policy.update(reward,obs,done)
        if num_ep % sep_test == 0:
            score_moyen, n_terminated = testing(env,policy,n_episodes=1,n_max_step= n_max_step)
            li_scores.append(score_moyen)
            print('score',score_moyen)
            if do_wandb:
                wandb.log({"score": score_moyen}, step=num_ep)
    return li_scores
                

def testing(env, policy,n_episodes, n_max_step):
    score_moyen = 0
    policy.reset()
    for _ in range(n_episodes):
        done = False
        obs,info = env.reset()
        n_step = 0
        # iterate
        n_terminated = 0 

        while (done is False) and (n_step < n_max_step):
            n_step += 1
            # Select next action
            action = policy.chose_action(obs,training = False)
            # Appy action and return new observation of the environment
            obs, reward, done, _, info = env.step(action)
            if n_step == n_max_step:
                n_terminated += 1
        score_moyen += n_step
    score_moyen = score_moyen/n_episodes
    return score_moyen, n_terminated

def show(env, policy,n_episodes):
    policy.reset()
    for _ in range(n_episodes):
        done = False
        obs, info = env.reset()
        n_step = 0
        # iterate
        while (done is False):
            n_step += 1
            # Select next action
            action = policy.chose_action(obs,training = False)
            # Appy action and return new observation of the environment
            obs, reward, done, _, info = env.step(action)
            os.system("clear")
            sys.stdout.write(env.render())
            time.sleep(0.1) # FPS
            print(obs)
            print(n_step)


if __name__ == '__main__':
    trained = False
    continue_training = False
    n_max_step = 1000
    n_training = 5000
    n_testing = 100
    eps = 0.01
    alpha = 0.2
    #name_env = 'TextFlappyBird-v0' #ne dépend pas de la qualité de l'environnement
    name_env = 'TextFlappyBird-screen-v0' #dépendant de la qualité de l'environnement
    name_policy = 'MC'
    #name_policy = 'Sarsa'
    height = 15
    width = 20
    gap = 4
    do_wandb = True
    
    if name_env == 'TextFlappyBird-v0':
        processed = True
    elif name_env == 'TextFlappyBird-screen-v0':
        processed = False
    else:
        raise ValueError("This environment doesn't exist!")
    for run in range(5):
        date = datetime.datetime.now().strftime('%dth%mmo_%Hh%Mmin%Ss')
        #print(date)
        name = name_policy + ' processed= ' + str(processed) +' height: '+ str(height) + ' width: '+ str(width) + ' gap: ' +str(gap) +'date: '+ str(date)
        #print(name)
        if do_wandb:
            wandb.init(
            # set the wandb project where this run will be logged
            project="FlappyBird-RL",
            name=name,
            # track hyperparameters and run metadata
            config={
            "processed": processed ,
            "name_policy": name_policy,
            "height": height,
            "width": width,
            "gap": gap,
            "True_flappy": False,
            "eps": eps,
            "alpha": alpha,
            })
        env = gym.make(name_env, height = height, width = width, pipe_gap = gap)
        if name_policy == 'MC':
            policy = MC(eps = eps,alpha = alpha, n_max_step = n_max_step,height = height,width = width,gap = gap ,trained = trained, processed = processed)
        elif name_policy:
            policy = Sarsa(eps = eps,alpha = alpha, n_max_step = n_max_step,height = height,width = width,gap = gap ,trained = trained, processed = processed, lambd = 0.9)
        if continue_training or not trained:
            li_scores = training(env,policy,n_episodes= n_training,n_max_step= n_max_step)
            file_name = 'data/scores' + name + '.pkl'
            #with open(file_name, 'wb') as f:
            #    pickle.dump(li_scores, f)
        for _ in range(1):
            score_moyen, n_terminated = testing(env,policy,n_episodes= n_testing,n_max_step= n_max_step)
            print("score moyen",score_moyen,"nombre reussite totale", n_terminated)
        # Render the game
        #show(env,policy,1)
        policy.save()
        env.close()
        if do_wandb:
            wandb.finish()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
score,▂▆▃▁▃▃▁▁▁▅▃█▅

0,1
score,33.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

  1%|          | 36/5000 [00:00<00:29, 167.67it/s]

score 5.0


  1%|▏         | 69/5000 [00:00<00:36, 134.22it/s]

score 6.0


  3%|▎         | 131/5000 [00:00<00:33, 146.25it/s]

score 23.0


  4%|▎         | 176/5000 [00:01<00:34, 138.10it/s]

score 13.0


  4%|▍         | 218/5000 [00:01<00:39, 121.52it/s]

score 36.0


  5%|▌         | 269/5000 [00:02<00:43, 109.71it/s]

score 23.0


  6%|▋         | 315/5000 [00:02<00:46, 101.60it/s]

score 13.0


  7%|▋         | 360/5000 [00:03<00:48, 95.71it/s] 

score 33.0


  8%|▊         | 409/5000 [00:03<00:55, 83.01it/s]

score 33.0


  9%|▉         | 470/5000 [00:04<00:47, 94.59it/s]

score 18.0


 10%|█         | 522/5000 [00:04<00:46, 96.77it/s]

score 13.0


 11%|█         | 559/5000 [00:05<00:57, 77.69it/s]

score 19.0


 12%|█▏        | 612/5000 [00:06<01:04, 68.37it/s]

score 23.0


 13%|█▎        | 662/5000 [00:06<00:58, 74.58it/s]

score 22.0


 14%|█▍        | 711/5000 [00:07<00:53, 79.66it/s]

score 13.0


 15%|█▌        | 761/5000 [00:08<01:08, 62.02it/s]

score 19.0


 16%|█▌        | 809/5000 [00:08<01:04, 65.47it/s]

score 13.0


 17%|█▋        | 859/5000 [00:09<01:06, 62.60it/s]

score 23.0


 18%|█▊        | 908/5000 [00:10<01:09, 59.16it/s]

score 23.0


 19%|█▉        | 957/5000 [00:11<01:09, 57.83it/s]

score 21.0


 20%|██        | 1012/5000 [00:12<01:07, 58.71it/s]

score 33.0


 21%|██        | 1056/5000 [00:13<01:20, 48.71it/s]

score 53.0


 22%|██▏       | 1113/5000 [00:14<01:16, 50.98it/s]

score 33.0


 23%|██▎       | 1162/5000 [00:15<01:11, 53.43it/s]

score 23.0


 24%|██▍       | 1211/5000 [00:16<01:08, 55.29it/s]

score 33.0


 25%|██▌       | 1260/5000 [00:17<01:13, 50.66it/s]

score 23.0


 26%|██▌       | 1309/5000 [00:18<01:15, 48.75it/s]

score 43.0


 27%|██▋       | 1357/5000 [00:19<01:13, 49.28it/s]

score 23.0


 28%|██▊       | 1408/5000 [00:20<01:38, 36.33it/s]

score 23.0


 29%|██▉       | 1457/5000 [00:21<01:22, 43.02it/s]

score 33.0


 30%|███       | 1505/5000 [00:23<01:46, 32.67it/s]

score 73.0


 31%|███       | 1553/5000 [00:24<02:02, 28.12it/s]

score 53.0


 32%|███▏      | 1609/5000 [00:26<01:42, 32.99it/s]

score 43.0


 33%|███▎      | 1657/5000 [00:28<02:10, 25.55it/s]

score 73.0


 34%|███▍      | 1704/5000 [00:29<01:59, 27.50it/s]

score 73.0


 35%|███▌      | 1753/5000 [00:31<02:12, 24.48it/s]

score 85.0


 36%|███▌      | 1805/5000 [00:34<02:47, 19.08it/s]

score 203.0


 37%|███▋      | 1855/5000 [00:36<02:27, 21.29it/s]

score 103.0


 38%|███▊      | 1904/5000 [00:38<02:25, 21.27it/s]

score 53.0


 39%|███▉      | 1954/5000 [00:41<03:07, 16.24it/s]

score 263.0


 40%|████      | 2003/5000 [00:44<03:13, 15.49it/s]

score 203.0


 41%|████      | 2053/5000 [00:47<02:26, 20.07it/s]

score 113.0


 42%|████▏     | 2106/5000 [00:50<03:04, 15.67it/s]

score 213.0


 43%|████▎     | 2153/5000 [00:53<02:28, 19.20it/s]

score 53.0


 44%|████▍     | 2204/5000 [00:56<01:58, 23.68it/s]

score 123.0


 45%|████▌     | 2253/5000 [00:59<03:11, 14.38it/s]

score 73.0


 46%|████▌     | 2303/5000 [01:02<02:50, 15.78it/s]

score 293.0


 47%|████▋     | 2351/5000 [01:05<01:54, 23.18it/s]

score 23.0


 48%|████▊     | 2404/5000 [01:09<02:57, 14.65it/s]

score 323.0


 49%|████▉     | 2453/5000 [01:13<02:43, 15.59it/s]

score 213.0


 50%|█████     | 2502/5000 [01:18<04:28,  9.32it/s]

score 113.0


 51%|█████     | 2550/5000 [01:21<03:10, 12.89it/s]

score 73.0


 52%|█████▏    | 2603/5000 [01:25<02:19, 17.24it/s]

score 83.0


 53%|█████▎    | 2652/5000 [01:29<03:50, 10.17it/s]

score 203.0


 54%|█████▍    | 2703/5000 [01:33<03:07, 12.24it/s]

score 33.0


 55%|█████▌    | 2754/5000 [01:36<02:46, 13.47it/s]

score 33.0


 56%|█████▌    | 2802/5000 [01:40<03:12, 11.40it/s]

score 433.0


 57%|█████▋    | 2852/5000 [01:43<02:08, 16.73it/s]

score 63.0


 58%|█████▊    | 2903/5000 [01:48<02:58, 11.73it/s]

score 23.0


 59%|█████▉    | 2955/5000 [01:53<02:44, 12.47it/s]

score 43.0


 60%|██████    | 3003/5000 [01:56<02:29, 13.36it/s]

score 63.0


 61%|██████    | 3051/5000 [01:59<02:32, 12.80it/s]

score 213.0


 62%|██████▏   | 3101/5000 [02:03<01:56, 16.30it/s]

score 83.0


 63%|██████▎   | 3152/5000 [02:08<02:40, 11.53it/s]

score 173.0


 64%|██████▍   | 3203/5000 [02:13<03:08,  9.51it/s]

score 303.0


 65%|██████▌   | 3251/5000 [02:18<04:27,  6.53it/s]

score 853.0


 66%|██████▌   | 3303/5000 [02:24<02:43, 10.37it/s]

score 453.0


 67%|██████▋   | 3351/5000 [02:30<04:37,  5.94it/s]

score 233.0


 68%|██████▊   | 3401/5000 [02:35<04:51,  5.49it/s]

score 133.0


 69%|██████▉   | 3451/5000 [02:44<04:25,  5.83it/s]

score 93.0


 70%|███████   | 3501/5000 [02:50<04:43,  5.28it/s]

score 623.0


 71%|███████   | 3552/5000 [02:58<05:01,  4.80it/s]

score 1000.0


 72%|███████▏  | 3602/5000 [03:04<03:42,  6.28it/s]

score 43.0


 73%|███████▎  | 3652/5000 [03:11<03:26,  6.54it/s]

score 523.0


 74%|███████▍  | 3702/5000 [03:18<03:44,  5.78it/s]

score 1000.0


 75%|███████▌  | 3753/5000 [03:26<02:50,  7.31it/s]

score 333.0


 76%|███████▌  | 3801/5000 [03:31<02:30,  7.96it/s]

score 283.0


 77%|███████▋  | 3853/5000 [03:37<01:35, 12.04it/s]

score 73.0


 78%|███████▊  | 3900/5000 [03:42<01:06, 16.48it/s]

score 533.0


 79%|███████▉  | 3954/5000 [03:49<02:08,  8.13it/s]

score 83.0


 80%|████████  | 4001/5000 [03:53<01:54,  8.75it/s]

score 243.0


 81%|████████  | 4052/5000 [04:00<02:36,  6.05it/s]

score 213.0


 82%|████████▏ | 4103/5000 [04:08<01:20, 11.16it/s]

score 33.0


 83%|████████▎ | 4154/5000 [04:13<01:13, 11.55it/s]

score 193.0


 84%|████████▍ | 4201/5000 [04:19<01:55,  6.91it/s]

score 553.0


 85%|████████▌ | 4251/5000 [04:27<03:25,  3.65it/s]

score 1000.0


 86%|████████▌ | 4301/5000 [04:36<03:34,  3.25it/s]

score 663.0


 87%|████████▋ | 4353/5000 [04:43<00:55, 11.75it/s]

score 123.0


 88%|████████▊ | 4405/5000 [04:47<00:33, 17.87it/s]

score 23.0


 89%|████████▉ | 4452/5000 [04:51<00:47, 11.44it/s]

score 23.0


 90%|█████████ | 4501/5000 [04:56<00:42, 11.67it/s]

score 53.0


 91%|█████████ | 4552/5000 [05:01<00:47,  9.51it/s]

score 173.0


 92%|█████████▏| 4602/5000 [05:06<00:54,  7.27it/s]

score 13.0


 93%|█████████▎| 4652/5000 [05:13<01:03,  5.50it/s]

score 723.0


 94%|█████████▍| 4701/5000 [05:19<00:44,  6.72it/s]

score 113.0


 95%|█████████▌| 4752/5000 [05:26<00:24,  9.95it/s]

score 96.0


 96%|█████████▌| 4802/5000 [05:31<00:24,  8.20it/s]

score 383.0


 97%|█████████▋| 4852/5000 [05:37<00:19,  7.61it/s]

score 53.0


 98%|█████████▊| 4901/5000 [05:45<00:16,  6.18it/s]

score 213.0


 99%|█████████▉| 4951/5000 [05:52<00:06,  7.88it/s]

score 383.0


100%|██████████| 5000/5000 [06:00<00:00, 13.89it/s]


score moyen 394.06 nombre reussite totale 0


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
score,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▁▁▃▂▂▁▁▁▂▄▂█▅▃▅▂▂▆▁▂▆▄▄

0,1
score,383.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888889738, max=1.0…

  1%|          | 36/5000 [00:00<00:31, 158.05it/s]

score 13.0


  1%|▏         | 67/5000 [00:00<00:35, 139.70it/s]

score 43.0


  3%|▎         | 126/5000 [00:00<00:37, 128.86it/s]

score 4.0


  3%|▎         | 165/5000 [00:01<00:39, 120.96it/s]

score 13.0


  4%|▍         | 214/5000 [00:01<00:41, 114.07it/s]

score 13.0


  5%|▌         | 263/5000 [00:02<00:43, 108.74it/s]

score 13.0


  6%|▋         | 313/5000 [00:02<00:42, 110.48it/s]

score 23.0


  7%|▋         | 362/5000 [00:03<00:44, 103.60it/s]

score 33.0


  8%|▊         | 421/5000 [00:03<00:43, 105.29it/s]

score 13.0


  9%|▉         | 455/5000 [00:03<00:44, 103.16it/s]

score 13.0


 10%|█         | 517/5000 [00:04<00:50, 89.11it/s] 

score 23.0


 11%|█         | 561/5000 [00:05<00:49, 90.15it/s]

score 23.0


 12%|█▏        | 620/5000 [00:05<00:50, 86.66it/s]

score 43.0


 13%|█▎        | 660/5000 [00:06<00:52, 82.90it/s]

score 13.0


 14%|█▍        | 713/5000 [00:06<00:54, 78.81it/s]

score 43.0


 15%|█▌        | 765/5000 [00:07<00:51, 82.24it/s]

score 13.0


 16%|█▋        | 817/5000 [00:08<00:54, 77.09it/s]

score 15.0


 17%|█▋        | 860/5000 [00:09<01:05, 63.39it/s]

score 23.0


 18%|█▊        | 912/5000 [00:09<01:08, 59.98it/s]

score 43.0


 19%|█▉        | 965/5000 [00:10<01:01, 65.57it/s]

score 13.0


 20%|██        | 1007/5000 [00:11<01:18, 50.59it/s]

score 73.0


 21%|██        | 1056/5000 [00:12<01:16, 51.72it/s]

score 23.0


 22%|██▏       | 1107/5000 [00:13<01:38, 39.42it/s]

score 33.0


 23%|██▎       | 1162/5000 [00:15<01:20, 47.59it/s]

score 23.0


 24%|██▍       | 1201/5000 [00:15<01:22, 46.10it/s]

score 53.0


 25%|██▌       | 1256/5000 [00:17<01:45, 35.42it/s]

score 73.0


 26%|██▌       | 1307/5000 [00:18<01:53, 32.63it/s]

score 63.0


 27%|██▋       | 1357/5000 [00:20<01:40, 36.29it/s]

score 13.0


 28%|██▊       | 1408/5000 [00:21<01:15, 47.49it/s]

score 45.0


 29%|██▉       | 1451/5000 [00:22<01:44, 33.95it/s]

score 143.0


 30%|███       | 1504/5000 [00:24<02:17, 25.49it/s]

score 93.0


 31%|███       | 1557/5000 [00:26<01:25, 40.26it/s]

score 93.0


 32%|███▏      | 1605/5000 [00:27<01:50, 30.70it/s]

score 13.0


 33%|███▎      | 1654/5000 [00:29<02:12, 25.26it/s]

score 23.0


 34%|███▍      | 1702/5000 [00:31<02:15, 24.36it/s]

score 63.0


 35%|███▌      | 1756/5000 [00:33<02:16, 23.85it/s]

score 23.0


 36%|███▌      | 1807/5000 [00:35<01:38, 32.50it/s]

score 33.0


 37%|███▋      | 1853/5000 [00:37<02:30, 20.85it/s]

score 23.0


 38%|███▊      | 1906/5000 [00:39<02:14, 22.96it/s]

score 33.0


 39%|███▉      | 1959/5000 [00:41<01:44, 29.15it/s]

score 36.0


 40%|████      | 2003/5000 [00:44<03:41, 13.53it/s]

score 84.0


 41%|████      | 2054/5000 [00:47<02:37, 18.75it/s]

score 73.0


 42%|████▏     | 2104/5000 [00:49<02:41, 17.93it/s]

score 33.0


 43%|████▎     | 2156/5000 [00:52<02:40, 17.71it/s]

score 23.0


 44%|████▍     | 2205/5000 [00:55<02:18, 20.12it/s]

score 14.0


 45%|████▌     | 2254/5000 [00:58<02:59, 15.28it/s]

score 83.0


 46%|████▌     | 2305/5000 [01:01<02:45, 16.24it/s]

score 193.0


 47%|████▋     | 2353/5000 [01:04<03:22, 13.06it/s]

score 193.0


 48%|████▊     | 2406/5000 [01:08<02:30, 17.24it/s]

score 43.0


 49%|████▉     | 2453/5000 [01:12<03:10, 13.34it/s]

score 93.0


 50%|█████     | 2502/5000 [01:16<05:38,  7.38it/s]

score 543.0


 51%|█████     | 2555/5000 [01:21<03:16, 12.46it/s]

score 393.0


 52%|█████▏    | 2604/5000 [01:25<03:59, 10.01it/s]

score 83.0


 53%|█████▎    | 2651/5000 [01:30<04:42,  8.32it/s]

score 193.0


 54%|█████▍    | 2702/5000 [01:35<04:47,  7.99it/s]

score 14.0


 55%|█████▌    | 2756/5000 [01:42<03:17, 11.36it/s]

score 133.0


 56%|█████▌    | 2806/5000 [01:48<03:52,  9.43it/s]

score 243.0


 57%|█████▋    | 2852/5000 [01:54<04:12,  8.51it/s]

score 93.0


 58%|█████▊    | 2904/5000 [01:59<03:29, 10.01it/s]

score 63.0


 59%|█████▉    | 2953/5000 [02:04<03:00, 11.32it/s]

score 153.0


 60%|██████    | 3000/5000 [02:07<02:13, 15.02it/s]

score 63.0


 61%|██████    | 3054/5000 [02:13<02:46, 11.66it/s]

score 83.0


 62%|██████▏   | 3104/5000 [02:16<02:34, 12.29it/s]

score 174.0


 63%|██████▎   | 3152/5000 [02:21<03:36,  8.54it/s]

score 263.0


 64%|██████▍   | 3202/5000 [02:27<04:19,  6.93it/s]

score 543.0


 65%|██████▌   | 3251/5000 [02:33<03:29,  8.34it/s]

score 275.0


 66%|██████▌   | 3304/5000 [02:40<04:12,  6.71it/s]

score 733.0


 67%|██████▋   | 3351/5000 [02:48<03:32,  7.77it/s]

score 123.0


 68%|██████▊   | 3401/5000 [02:56<06:31,  4.09it/s]

score 563.0


 69%|██████▉   | 3452/5000 [03:04<04:39,  5.54it/s]

score 543.0


 70%|███████   | 3503/5000 [03:10<04:00,  6.23it/s]

score 900.0


 71%|███████   | 3553/5000 [03:16<03:27,  6.99it/s]

score 633.0


 72%|███████▏  | 3603/5000 [03:24<04:13,  5.51it/s]

score 93.0


 73%|███████▎  | 3651/5000 [03:34<04:14,  5.31it/s]

score 914.0


 74%|███████▍  | 3701/5000 [03:41<03:08,  6.90it/s]

score 353.0


 75%|███████▌  | 3751/5000 [03:48<05:11,  4.01it/s]

score 883.0


 76%|███████▌  | 3800/5000 [03:54<02:28,  8.09it/s]

score 53.0


 77%|███████▋  | 3851/5000 [04:01<04:08,  4.63it/s]

score 203.0


 78%|███████▊  | 3901/5000 [04:09<03:01,  6.06it/s]

score 223.0


 79%|███████▉  | 3951/5000 [04:18<03:38,  4.80it/s]

score 153.0


 80%|████████  | 4001/5000 [04:27<03:46,  4.42it/s]

score 213.0


 81%|████████  | 4051/5000 [04:37<03:43,  4.24it/s]

score 185.0


 82%|████████▏ | 4101/5000 [04:45<02:43,  5.50it/s]

score 163.0


 83%|████████▎ | 4151/5000 [04:54<01:39,  8.51it/s]

score 373.0


 84%|████████▍ | 4201/5000 [05:01<01:24,  9.43it/s]

score 264.0


 85%|████████▌ | 4252/5000 [05:09<03:38,  3.42it/s]

score 1000.0


 86%|████████▌ | 4303/5000 [05:18<02:11,  5.29it/s]

score 863.0


 87%|████████▋ | 4351/5000 [05:23<01:09,  9.37it/s]

score 83.0


 88%|████████▊ | 4404/5000 [05:32<02:11,  4.53it/s]

score 1000.0


 89%|████████▉ | 4451/5000 [05:39<01:33,  5.90it/s]

score 353.0


 90%|█████████ | 4501/5000 [05:47<01:12,  6.90it/s]

score 283.0


 91%|█████████ | 4551/5000 [05:54<01:16,  5.86it/s]

score 273.0


 92%|█████████▏| 4602/5000 [06:01<00:57,  6.89it/s]

score 595.0


 93%|█████████▎| 4651/5000 [06:09<00:54,  6.43it/s]

score 453.0


 94%|█████████▍| 4700/5000 [06:17<00:32,  9.12it/s]

score 143.0


 95%|█████████▌| 4753/5000 [06:23<00:20, 12.09it/s]

score 94.0


 96%|█████████▌| 4802/5000 [06:27<00:18, 10.43it/s]

score 114.0


 97%|█████████▋| 4851/5000 [06:33<00:20,  7.10it/s]

score 325.0


 98%|█████████▊| 4903/5000 [06:40<00:14,  6.59it/s]

score 483.0


 99%|█████████▉| 4952/5000 [06:47<00:06,  7.84it/s]

score 32.0


100%|██████████| 5000/5000 [06:55<00:00, 12.02it/s]


score moyen 414.88 nombre reussite totale 0


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
score,▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▂▁▂▁▅▂▂▁▁▃▆▅▅▇▁▃▂▄▇█▃▄▂▁

0,1
score,32.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

  1%|          | 37/5000 [00:00<00:27, 182.60it/s]

score 4.0


  1%|▏         | 74/5000 [00:00<00:32, 151.46it/s]

score 13.0


  2%|▏         | 118/5000 [00:00<00:37, 129.20it/s]

score 13.0


  4%|▎         | 176/5000 [00:01<00:37, 128.29it/s]

score 13.0


  4%|▍         | 216/5000 [00:01<00:38, 125.71it/s]

score 13.0


  5%|▌         | 268/5000 [00:02<00:42, 112.39it/s]

score 13.0


  6%|▋         | 314/5000 [00:02<00:44, 105.36it/s]

score 13.0


  7%|▋         | 367/5000 [00:03<00:54, 85.03it/s] 

score 13.0


  8%|▊         | 411/5000 [00:03<00:47, 96.98it/s]

score 13.0


  9%|▉         | 464/5000 [00:04<00:52, 85.78it/s] 

score 13.0


 10%|█         | 513/5000 [00:04<01:11, 63.10it/s]

score 23.0


 11%|█         | 560/5000 [00:05<01:06, 66.65it/s]

score 23.0


 12%|█▏        | 614/5000 [00:06<01:04, 68.33it/s]

score 23.0


 13%|█▎        | 662/5000 [00:07<01:16, 56.49it/s]

score 33.0


 14%|█▍        | 706/5000 [00:08<01:14, 57.83it/s]

score 23.0


 15%|█▌        | 756/5000 [00:09<01:25, 49.48it/s]

score 23.0


 16%|█▌        | 806/5000 [00:10<01:52, 37.28it/s]

score 43.0


 17%|█▋        | 853/5000 [00:11<01:34, 44.11it/s]

score 13.0


 18%|█▊        | 908/5000 [00:12<01:45, 38.89it/s]

score 33.0


 19%|█▉        | 961/5000 [00:13<01:31, 44.14it/s]

score 33.0


 20%|██        | 1007/5000 [00:15<01:46, 37.32it/s]

score 33.0


 21%|██        | 1056/5000 [00:16<01:39, 39.68it/s]

score 43.0


 22%|██▏       | 1106/5000 [00:17<01:49, 35.64it/s]

score 43.0


 23%|██▎       | 1155/5000 [00:19<02:02, 31.26it/s]

score 33.0


 24%|██▍       | 1207/5000 [00:20<01:51, 33.89it/s]

score 33.0


 25%|██▌       | 1252/5000 [00:22<02:20, 26.77it/s]

score 33.0


 26%|██▌       | 1304/5000 [00:24<02:33, 24.06it/s]

score 133.0


 27%|██▋       | 1357/5000 [00:26<01:55, 31.44it/s]

score 33.0


 28%|██▊       | 1405/5000 [00:28<02:15, 26.61it/s]

score 203.0


 29%|██▉       | 1457/5000 [00:31<02:26, 24.15it/s]

score 83.0


 30%|███       | 1503/5000 [00:33<05:08, 11.35it/s]

score 425.0


 31%|███       | 1552/5000 [00:36<03:37, 15.83it/s]

score 213.0


 32%|███▏      | 1606/5000 [00:38<01:57, 28.86it/s]

score 43.0


 33%|███▎      | 1651/5000 [00:41<03:05, 18.03it/s]

score 245.0


 34%|███▍      | 1706/5000 [00:43<02:32, 21.67it/s]

score 383.0


 35%|███▌      | 1754/5000 [00:46<03:13, 16.79it/s]

score 64.0


 36%|███▌      | 1802/5000 [00:48<02:08, 24.98it/s]

score 25.0


 37%|███▋      | 1856/5000 [00:50<02:41, 19.42it/s]

score 143.0


 38%|███▊      | 1904/5000 [00:54<03:03, 16.85it/s]

score 33.0


 39%|███▉      | 1955/5000 [00:57<03:12, 15.78it/s]

score 293.0


 40%|████      | 2002/5000 [01:01<05:16,  9.47it/s]

score 513.0


 41%|████      | 2052/5000 [01:03<03:15, 15.09it/s]

score 133.0


 42%|████▏     | 2105/5000 [01:07<02:57, 16.30it/s]

score 113.0


 43%|████▎     | 2155/5000 [01:11<04:56,  9.60it/s]

score 233.0


 44%|████▍     | 2204/5000 [01:15<04:34, 10.19it/s]

score 313.0


 45%|████▌     | 2253/5000 [01:20<05:14,  8.74it/s]

score 533.0


 46%|████▌     | 2305/5000 [01:24<03:10, 14.15it/s]

score 233.0


 47%|████▋     | 2354/5000 [01:28<04:12, 10.47it/s]

score 543.0


 48%|████▊     | 2401/5000 [01:34<05:04,  8.53it/s]

score 123.0


 49%|████▉     | 2451/5000 [01:40<03:34, 11.89it/s]

score 63.0


 50%|█████     | 2502/5000 [01:46<07:50,  5.31it/s]

score 1000.0


 51%|█████     | 2554/5000 [01:52<04:10,  9.78it/s]

score 110.0


 52%|█████▏    | 2603/5000 [01:58<06:18,  6.33it/s]

score 1000.0


 53%|█████▎    | 2652/5000 [02:03<04:07,  9.47it/s]

score 253.0


 54%|█████▍    | 2703/5000 [02:10<05:48,  6.59it/s]

score 933.0


 55%|█████▌    | 2751/5000 [02:16<09:30,  3.95it/s]

score 1000.0


 56%|█████▌    | 2805/5000 [02:22<03:27, 10.58it/s]

score 513.0


 57%|█████▋    | 2855/5000 [02:29<04:05,  8.74it/s]

score 953.0


 58%|█████▊    | 2903/5000 [02:33<01:55, 18.17it/s]

score 32.0


 59%|█████▉    | 2953/5000 [02:38<02:57, 11.55it/s]

score 83.0


 60%|██████    | 3003/5000 [02:44<02:07, 15.70it/s]

score 164.0


 61%|██████    | 3054/5000 [02:50<03:57,  8.20it/s]

score 633.0


 62%|██████▏   | 3099/5000 [02:55<03:28,  9.10it/s]

score 94.0


 63%|██████▎   | 3151/5000 [03:02<06:54,  4.47it/s]

score 843.0


 64%|██████▍   | 3204/5000 [03:08<02:31, 11.84it/s]

score 183.0


 65%|██████▌   | 3253/5000 [03:13<04:49,  6.04it/s]

score 1000.0


 66%|██████▌   | 3301/5000 [03:20<04:39,  6.07it/s]

score 273.0


 67%|██████▋   | 3351/5000 [03:27<06:07,  4.49it/s]

score 63.0


 68%|██████▊   | 3402/5000 [03:37<05:56,  4.48it/s]

score 813.0


 69%|██████▉   | 3452/5000 [03:43<02:55,  8.81it/s]

score 23.0


 70%|███████   | 3501/5000 [03:49<05:08,  4.85it/s]

score 1000.0


 71%|███████   | 3553/5000 [03:57<03:47,  6.37it/s]

score 150.0


 72%|███████▏  | 3604/5000 [04:05<01:58, 11.76it/s]

score 83.0


 73%|███████▎  | 3655/5000 [04:08<01:04, 20.82it/s]

score 183.0


 74%|███████▍  | 3702/5000 [04:11<01:39, 13.02it/s]

score 73.0


 75%|███████▌  | 3752/5000 [04:15<01:39, 12.53it/s]

score 253.0


 76%|███████▌  | 3801/5000 [04:20<03:21,  5.94it/s]

score 163.0


 77%|███████▋  | 3852/5000 [04:26<02:03,  9.27it/s]

score 24.0


 78%|███████▊  | 3901/5000 [04:32<01:36, 11.34it/s]

score 143.0


 79%|███████▉  | 3951/5000 [04:40<04:33,  3.83it/s]

score 713.0


 80%|████████  | 4003/5000 [04:50<02:17,  7.28it/s]

score 443.0


 81%|████████  | 4051/5000 [05:01<04:58,  3.18it/s]

score 1000.0


 82%|████████▏ | 4101/5000 [05:13<06:19,  2.37it/s]

score 1000.0


 83%|████████▎ | 4151/5000 [05:24<04:37,  3.06it/s]

score 153.0


 84%|████████▍ | 4204/5000 [05:31<01:48,  7.34it/s]

score 374.0


 85%|████████▌ | 4251/5000 [05:38<03:02,  4.11it/s]

score 1000.0


 86%|████████▌ | 4301/5000 [05:48<02:42,  4.31it/s]

score 153.0


 87%|████████▋ | 4351/5000 [05:56<02:49,  3.84it/s]

score 1000.0


 88%|████████▊ | 4401/5000 [06:02<02:14,  4.44it/s]

score 1000.0


 89%|████████▉ | 4451/5000 [06:11<01:19,  6.91it/s]

score 163.0


 90%|█████████ | 4501/5000 [06:18<00:48, 10.21it/s]

score 283.0


 91%|█████████ | 4551/5000 [06:25<01:24,  5.30it/s]

score 533.0


 92%|█████████▏| 4601/5000 [06:36<02:05,  3.17it/s]

score 403.0


 93%|█████████▎| 4652/5000 [06:44<01:00,  5.78it/s]

score 533.0


 94%|█████████▍| 4702/5000 [06:55<01:02,  4.75it/s]

score 923.0


 95%|█████████▌| 4751/5000 [07:05<00:58,  4.27it/s]

score 263.0


 96%|█████████▌| 4801/5000 [07:15<00:33,  5.99it/s]

score 163.0


 97%|█████████▋| 4854/5000 [07:19<00:08, 16.56it/s]

score 133.0


 98%|█████████▊| 4901/5000 [07:24<00:12,  7.86it/s]

score 123.0


 99%|█████████▉| 4951/5000 [07:31<00:14,  3.43it/s]

score 883.0


100%|██████████| 5000/5000 [07:37<00:00, 10.92it/s]


score moyen 227.07 nombre reussite totale 0


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
score,▁▁▁▁▁▁▁▁▁▁▁▁▄▃▁▁▅▃▅▂█▃█▁▂▇▃▇▂▂▂▂█▂▂█▅▅▂▇

0,1
score,883.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777776363, max=1.0…

  1%|          | 36/5000 [00:00<00:29, 167.42it/s]

score 6.0


  1%|▏         | 70/5000 [00:00<00:34, 144.53it/s]

score 17.0


  3%|▎         | 128/5000 [00:00<00:37, 130.83it/s]

score 6.0


  3%|▎         | 172/5000 [00:01<00:35, 137.18it/s]

score 23.0


  4%|▍         | 214/5000 [00:01<00:36, 130.68it/s]

score 13.0


  5%|▌         | 270/5000 [00:02<00:40, 116.51it/s]

score 32.0


  6%|▋         | 320/5000 [00:02<00:40, 116.45it/s]

score 13.0


  7%|▋         | 365/5000 [00:02<00:48, 95.14it/s] 

score 13.0


  8%|▊         | 415/5000 [00:03<00:52, 86.53it/s]

score 23.0


  9%|▉         | 465/5000 [00:04<00:49, 92.09it/s]

score 16.0


 10%|█         | 511/5000 [00:04<00:45, 97.75it/s] 

score 23.0


 11%|█▏        | 563/5000 [00:05<00:53, 83.18it/s]

score 23.0


 12%|█▏        | 617/5000 [00:05<01:00, 72.54it/s]

score 23.0


 13%|█▎        | 664/5000 [00:06<01:00, 71.25it/s]

score 47.0


 14%|█▍        | 711/5000 [00:07<00:58, 73.68it/s]

score 23.0


 15%|█▌        | 759/5000 [00:07<01:00, 69.60it/s]

score 13.0


 16%|█▌        | 812/5000 [00:08<01:09, 60.50it/s]

score 23.0


 17%|█▋        | 861/5000 [00:09<01:05, 63.01it/s]

score 23.0


 18%|█▊        | 909/5000 [00:10<01:13, 55.71it/s]

score 23.0


 19%|█▉        | 962/5000 [00:11<01:03, 63.28it/s]

score 43.0


 20%|██        | 1009/5000 [00:12<01:14, 53.63it/s]

score 73.0


 21%|██        | 1059/5000 [00:13<01:11, 54.97it/s]

score 23.0


 22%|██▏       | 1107/5000 [00:14<01:28, 44.09it/s]

score 27.0


 23%|██▎       | 1157/5000 [00:15<01:22, 46.61it/s]

score 33.0


 24%|██▍       | 1210/5000 [00:16<01:13, 51.69it/s]

score 23.0


 25%|██▌       | 1262/5000 [00:17<01:14, 50.40it/s]

score 33.0


 26%|██▌       | 1308/5000 [00:18<01:23, 44.04it/s]

score 83.0


 27%|██▋       | 1356/5000 [00:19<01:43, 35.23it/s]

score 23.0


 28%|██▊       | 1405/5000 [00:20<01:28, 40.78it/s]

score 43.0


 29%|██▉       | 1456/5000 [00:21<01:22, 42.98it/s]

score 23.0


 30%|███       | 1506/5000 [00:22<01:24, 41.54it/s]

score 33.0


 31%|███       | 1561/5000 [00:24<01:20, 42.94it/s]

score 63.0


 32%|███▏      | 1606/5000 [00:25<01:18, 43.09it/s]

score 33.0


 33%|███▎      | 1658/5000 [00:26<01:30, 37.10it/s]

score 23.0


 34%|███▍      | 1705/5000 [00:28<01:46, 30.86it/s]

score 23.0


 35%|███▌      | 1759/5000 [00:29<01:37, 33.24it/s]

score 73.0


 36%|███▌      | 1810/5000 [00:30<01:11, 44.49it/s]

score 83.0


 37%|███▋      | 1851/5000 [00:31<01:17, 40.58it/s]

score 83.0


 38%|███▊      | 1904/5000 [00:33<01:25, 36.17it/s]

score 33.0


 39%|███▉      | 1959/5000 [00:35<01:27, 34.69it/s]

score 83.0


 40%|████      | 2006/5000 [00:36<01:57, 25.50it/s]

score 173.0


 41%|████      | 2058/5000 [00:38<01:17, 38.20it/s]

score 56.0


 42%|████▏     | 2104/5000 [00:39<02:07, 22.75it/s]

score 23.0


 43%|████▎     | 2155/5000 [00:41<01:57, 24.15it/s]

score 33.0


 44%|████▍     | 2210/5000 [00:43<01:53, 24.54it/s]

score 33.0


 45%|████▌     | 2256/5000 [00:45<01:51, 24.71it/s]

score 123.0


 46%|████▌     | 2305/5000 [00:47<01:47, 25.02it/s]

score 183.0


 47%|████▋     | 2353/5000 [00:50<02:53, 15.22it/s]

score 83.0


 48%|████▊     | 2404/5000 [00:52<02:44, 15.79it/s]

score 153.0


 49%|████▉     | 2453/5000 [00:55<02:35, 16.34it/s]

score 13.0


 50%|█████     | 2502/5000 [00:58<02:03, 20.24it/s]

score 173.0


 51%|█████     | 2556/5000 [01:01<01:59, 20.44it/s]

score 103.0


 52%|█████▏    | 2603/5000 [01:04<03:06, 12.88it/s]

score 93.0


 53%|█████▎    | 2656/5000 [01:06<01:30, 25.84it/s]

score 53.0


 54%|█████▍    | 2702/5000 [01:09<02:24, 15.88it/s]

score 193.0


 55%|█████▌    | 2752/5000 [01:11<01:55, 19.40it/s]

score 43.0


 56%|█████▌    | 2803/5000 [01:15<03:34, 10.23it/s]

score 23.0


 57%|█████▋    | 2855/5000 [01:18<02:02, 17.53it/s]

score 233.0


 58%|█████▊    | 2903/5000 [01:21<02:14, 15.61it/s]

score 133.0


 59%|█████▉    | 2953/5000 [01:24<02:02, 16.73it/s]

score 113.0


 60%|██████    | 3003/5000 [01:27<01:57, 16.97it/s]

score 334.0


 61%|██████    | 3051/5000 [01:29<01:53, 17.16it/s]

score 83.0


 62%|██████▏   | 3105/5000 [01:35<02:28, 12.78it/s]

score 373.0


 63%|██████▎   | 3154/5000 [01:39<02:31, 12.20it/s]

score 23.0


 64%|██████▍   | 3201/5000 [01:42<01:31, 19.57it/s]

score 202.0


 65%|██████▌   | 3252/5000 [01:46<02:41, 10.81it/s]

score 303.0


 66%|██████▌   | 3303/5000 [01:50<02:26, 11.56it/s]

score 44.0


 67%|██████▋   | 3352/5000 [01:54<02:03, 13.35it/s]

score 106.0


 68%|██████▊   | 3402/5000 [01:58<02:06, 12.65it/s]

score 213.0


 69%|██████▉   | 3454/5000 [02:03<01:53, 13.67it/s]

score 197.0


 70%|███████   | 3504/5000 [02:07<01:55, 12.98it/s]

score 123.0


 71%|███████   | 3551/5000 [02:11<01:55, 12.49it/s]

score 213.0


 72%|███████▏  | 3603/5000 [02:17<01:53, 12.32it/s]

score 63.0


 73%|███████▎  | 3653/5000 [02:22<02:27,  9.14it/s]

score 583.0


 74%|███████▍  | 3703/5000 [02:26<01:45, 12.31it/s]

score 33.0


 75%|███████▌  | 3752/5000 [02:34<02:48,  7.39it/s]

score 33.0


 76%|███████▌  | 3800/5000 [02:39<02:17,  8.72it/s]

score 403.0


 77%|███████▋  | 3852/5000 [02:44<02:11,  8.75it/s]

score 163.0


 78%|███████▊  | 3902/5000 [02:49<01:33, 11.73it/s]

score 63.0


 79%|███████▉  | 3951/5000 [02:55<02:00,  8.69it/s]

score 393.0


 80%|████████  | 4003/5000 [03:02<02:00,  8.28it/s]

score 253.0


 81%|████████  | 4053/5000 [03:06<01:47,  8.84it/s]

score 23.0


 82%|████████▏ | 4101/5000 [03:12<01:13, 12.31it/s]

score 93.0


 83%|████████▎ | 4151/5000 [03:18<01:53,  7.47it/s]

score 283.0


 84%|████████▍ | 4203/5000 [03:26<02:44,  4.86it/s]

score 1000.0


 85%|████████▌ | 4251/5000 [03:32<02:08,  5.82it/s]

score 1000.0


 86%|████████▌ | 4303/5000 [03:39<01:39,  6.99it/s]

score 553.0


 87%|████████▋ | 4352/5000 [03:46<02:01,  5.32it/s]

score 1000.0


 88%|████████▊ | 4401/5000 [03:49<00:28, 20.86it/s]

score 82.0


 89%|████████▉ | 4456/5000 [03:52<00:30, 18.03it/s]

score 53.0


 90%|█████████ | 4504/5000 [03:55<00:39, 12.65it/s]

score 163.0


 91%|█████████ | 4551/5000 [04:00<00:47,  9.37it/s]

score 492.0


 92%|█████████▏| 4600/5000 [04:06<00:41,  9.63it/s]

score 113.0


 93%|█████████▎| 4652/5000 [04:12<00:32, 10.74it/s]

score 23.0


 94%|█████████▍| 4704/5000 [04:16<00:36,  8.14it/s]

score 23.0


 95%|█████████▌| 4751/5000 [04:21<00:28,  8.67it/s]

score 361.0


 96%|█████████▌| 4804/5000 [04:24<00:10, 19.42it/s]

score 43.0


 97%|█████████▋| 4854/5000 [04:27<00:08, 16.51it/s]

score 213.0


 98%|█████████▊| 4903/5000 [04:31<00:09,  9.73it/s]

score 66.0


 99%|█████████▉| 4951/5000 [04:36<00:04,  9.92it/s]

score 413.0


100%|██████████| 5000/5000 [04:41<00:00, 17.76it/s]


score moyen 196.2 nombre reussite totale 0


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
score,▁▁▁▁▁▁▁▁▂▁▁▁▁▁▂▁▃▁▂▃▃▂▁▃▅▁▁▄▄█▆▂▁▄█▂▇▁▁▆

0,1
score,413.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

  1%|          | 38/5000 [00:00<00:34, 145.48it/s]

score 10.0


  1%|▏         | 66/5000 [00:00<00:42, 117.22it/s]

score 23.0


  2%|▏         | 118/5000 [00:00<00:40, 121.28it/s]

score 13.0


  3%|▎         | 169/5000 [00:01<00:41, 115.76it/s]

score 13.0


  4%|▍         | 218/5000 [00:01<00:43, 111.19it/s]

score 13.0


  5%|▌         | 265/5000 [00:02<00:45, 103.55it/s]

score 13.0


  6%|▋         | 322/5000 [00:02<00:48, 96.25it/s] 

score 22.0


  7%|▋         | 360/5000 [00:03<00:59, 78.40it/s]

score 68.0


  8%|▊         | 417/5000 [00:04<00:59, 76.87it/s]

score 43.0


  9%|▉         | 463/5000 [00:04<00:57, 79.16it/s]

score 22.0


 10%|█         | 517/5000 [00:05<01:03, 70.32it/s]

score 26.0


 11%|█         | 561/5000 [00:05<00:55, 79.51it/s]

score 13.0


 12%|█▏        | 612/5000 [00:06<01:06, 66.21it/s]

score 13.0


 13%|█▎        | 664/5000 [00:07<01:06, 64.81it/s]

score 17.0


 14%|█▍        | 707/5000 [00:08<01:14, 57.35it/s]

score 22.0


 15%|█▌        | 757/5000 [00:09<01:20, 52.41it/s]

score 25.0


 16%|█▌        | 810/5000 [00:10<01:16, 54.46it/s]

score 39.0


 17%|█▋        | 859/5000 [00:11<01:20, 51.56it/s]

score 42.0


 18%|█▊        | 909/5000 [00:12<01:17, 52.51it/s]

score 13.0


 19%|█▉        | 954/5000 [00:13<01:59, 33.84it/s]

score 53.0


 20%|██        | 1006/5000 [00:14<01:49, 36.58it/s]

score 100.0


 21%|██        | 1062/5000 [00:15<01:22, 47.87it/s]

score 53.0


 22%|██▏       | 1109/5000 [00:16<01:21, 47.55it/s]

score 93.0


 23%|██▎       | 1155/5000 [00:18<02:00, 31.90it/s]

score 23.0


 24%|██▍       | 1209/5000 [00:19<01:37, 38.73it/s]

score 26.0


 25%|██▌       | 1253/5000 [00:21<01:55, 32.57it/s]

score 23.0


 26%|██▌       | 1306/5000 [00:22<01:58, 31.09it/s]

score 33.0


 27%|██▋       | 1354/5000 [00:24<02:19, 26.16it/s]

score 23.0


 28%|██▊       | 1406/5000 [00:26<02:18, 25.86it/s]

score 53.0


 29%|██▉       | 1454/5000 [00:28<02:57, 19.93it/s]

score 62.0


 30%|███       | 1504/5000 [00:30<02:38, 22.08it/s]

score 23.0


 31%|███       | 1555/5000 [00:32<02:10, 26.31it/s]

score 33.0


 32%|███▏      | 1606/5000 [00:34<02:22, 23.80it/s]

score 93.0


 33%|███▎      | 1656/5000 [00:36<02:50, 19.60it/s]

score 163.0


 34%|███▍      | 1706/5000 [00:39<02:09, 25.49it/s]

score 23.0


 35%|███▌      | 1752/5000 [00:42<04:10, 12.95it/s]

score 173.0


 36%|███▌      | 1803/5000 [00:45<03:40, 14.52it/s]

score 32.0


 37%|███▋      | 1850/5000 [00:47<02:39, 19.75it/s]

score 173.0


 38%|███▊      | 1904/5000 [00:51<03:19, 15.54it/s]

score 343.0


 39%|███▉      | 1954/5000 [00:54<03:59, 12.73it/s]

score 103.0


 40%|████      | 2004/5000 [00:58<03:01, 16.51it/s]

score 173.0


 41%|████      | 2057/5000 [01:01<02:52, 17.05it/s]

score 444.0


 42%|████▏     | 2100/5000 [01:05<06:49,  7.07it/s]

score 53.0


 43%|████▎     | 2152/5000 [01:09<03:37, 13.09it/s]

score 33.0


 44%|████▍     | 2203/5000 [01:14<03:18, 14.06it/s]

score 23.0


 45%|████▌     | 2256/5000 [01:19<02:43, 16.79it/s]

score 33.0


 46%|████▌     | 2302/5000 [01:23<04:19, 10.39it/s]

score 183.0


 47%|████▋     | 2351/5000 [01:27<04:04, 10.82it/s]

score 153.0


 48%|████▊     | 2401/5000 [01:32<05:07,  8.44it/s]

score 123.0


 49%|████▉     | 2453/5000 [01:37<03:54, 10.85it/s]

score 233.0


 50%|█████     | 2504/5000 [01:41<02:29, 16.75it/s]

score 23.0


 51%|█████     | 2553/5000 [01:48<07:44,  5.27it/s]

score 483.0


 52%|█████▏    | 2601/5000 [01:53<06:11,  6.46it/s]

score 63.0


 53%|█████▎    | 2652/5000 [01:59<08:01,  4.88it/s]

score 1000.0


 54%|█████▍    | 2703/5000 [02:06<07:08,  5.37it/s]

score 343.0


 55%|█████▌    | 2751/5000 [02:11<03:54,  9.60it/s]

score 143.0


 56%|█████▌    | 2803/5000 [02:16<05:21,  6.84it/s]

score 193.0


 57%|█████▋    | 2851/5000 [02:21<02:14, 15.95it/s]

score 123.0


 58%|█████▊    | 2902/5000 [02:28<04:15,  8.20it/s]

score 484.0


 59%|█████▉    | 2954/5000 [02:34<02:59, 11.41it/s]

score 93.0


 60%|██████    | 3004/5000 [02:40<03:02, 10.93it/s]

score 253.0


 61%|██████    | 3050/5000 [02:47<02:53, 11.24it/s]

score 83.0


 62%|██████▏   | 3101/5000 [02:55<07:40,  4.12it/s]

score 743.0


 63%|██████▎   | 3151/5000 [03:01<05:07,  6.01it/s]

score 1000.0


 64%|██████▍   | 3201/5000 [03:07<03:48,  7.87it/s]

score 283.0


 65%|██████▌   | 3257/5000 [03:13<01:44, 16.61it/s]

score 223.0


 66%|██████▌   | 3305/5000 [03:17<02:11, 12.89it/s]

score 473.0


 67%|██████▋   | 3356/5000 [03:21<02:35, 10.59it/s]

score 23.0


 68%|██████▊   | 3402/5000 [03:26<04:31,  5.88it/s]

score 253.0


 69%|██████▉   | 3452/5000 [03:30<04:00,  6.44it/s]

score 703.0


 70%|███████   | 3501/5000 [03:34<03:04,  8.10it/s]

score 313.0


 71%|███████   | 3553/5000 [03:40<02:26,  9.86it/s]

score 233.0


 72%|███████▏  | 3603/5000 [03:47<03:08,  7.42it/s]

score 23.0


 73%|███████▎  | 3652/5000 [03:52<03:39,  6.14it/s]

score 313.0


 74%|███████▍  | 3701/5000 [03:57<03:05,  7.00it/s]

score 463.0


 75%|███████▌  | 3753/5000 [04:04<02:05,  9.90it/s]

score 23.0


 76%|███████▌  | 3801/5000 [04:11<04:54,  4.08it/s]

score 1000.0


 77%|███████▋  | 3852/5000 [04:20<02:51,  6.70it/s]

score 23.0


 78%|███████▊  | 3901/5000 [04:25<02:11,  8.38it/s]

score 443.0


 79%|███████▉  | 3951/5000 [04:33<05:04,  3.45it/s]

score 1000.0


 80%|████████  | 4002/5000 [04:39<02:38,  6.31it/s]

score 283.0


 81%|████████  | 4052/5000 [04:48<02:53,  5.47it/s]

score 39.0


 82%|████████▏ | 4101/5000 [04:56<03:10,  4.73it/s]

score 1000.0


 83%|████████▎ | 4152/5000 [05:02<02:29,  5.66it/s]

score 663.0


 84%|████████▍ | 4202/5000 [05:10<01:10, 11.24it/s]

score 53.0


 85%|████████▌ | 4252/5000 [05:19<01:25,  8.76it/s]

score 42.0


 86%|████████▌ | 4303/5000 [05:23<00:51, 13.58it/s]

score 73.0


 87%|████████▋ | 4352/5000 [05:30<01:12,  8.99it/s]

score 363.0


 88%|████████▊ | 4402/5000 [05:38<01:54,  5.23it/s]

score 43.0


 89%|████████▉ | 4451/5000 [05:46<01:45,  5.23it/s]

score 43.0


 90%|█████████ | 4501/5000 [05:53<01:30,  5.49it/s]

score 313.0


 91%|█████████ | 4551/5000 [06:02<01:50,  4.06it/s]

score 693.0


 92%|█████████▏| 4602/5000 [06:10<01:13,  5.43it/s]

score 213.0


 93%|█████████▎| 4651/5000 [06:18<01:40,  3.48it/s]

score 193.0


 94%|█████████▍| 4704/5000 [06:29<00:46,  6.43it/s]

score 113.0


 95%|█████████▌| 4753/5000 [06:40<00:50,  4.86it/s]

score 1000.0


 96%|█████████▌| 4801/5000 [06:49<01:11,  2.79it/s]

score 853.0


 97%|█████████▋| 4853/5000 [06:58<00:19,  7.67it/s]

score 94.0


 98%|█████████▊| 4904/5000 [07:05<00:08, 11.61it/s]

score 53.0


 99%|█████████▉| 4952/5000 [07:09<00:05,  9.28it/s]

score 15.0


100%|██████████| 5000/5000 [07:14<00:00, 11.49it/s]


score moyen 221.39 nombre reussite totale 0


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
score,▁▁▁▁▁▁▁▁▂▂▁▁▁▂▂▃▂▁▁▂▁█▂▄▃█▄▃▃▃█▄▁▆▁▁▆▂▇▁

0,1
score,15.0


## True Flappy Bird

Define a new way to process the data

In [5]:
def process_true(array,bin_width, bin_height):
    width, height = array
    width = round(width*(bin_width//2))
    height = round(height*(bin_height//2))
    return (width, height)

In [6]:
class MC_True:
    def __init__(self,eps,alpha,n_max_step,height,width,gap,trained = False) -> None:
        self.eps = eps
        self.alpha = alpha
        self.n_max_step = n_max_step
        self.nsteps = 0
        self.height = height
        self.width = width
        self.gap = gap
        if trained is False:
            self.q = {}
        else:
            with open('data/MC_true.pkl', 'rb') as f:
                self.q = pickle.load(f)

    def reset(self):
        self.episode = []
        self.nsteps = 0

    def chose_action(self,observation, training = True):
        #print(observation)
        state = process_true(observation,self.width,self.height)
        #print(state)
        if (state,0) not in list(self.q.keys()):
            self.q[(state,0)] = np.random.uniform(-10,10)
        if (state,1) not in list(self.q.keys()):
            self.q[(state,1)] = np.random.uniform(-10,10)
        if training:
            if np.random.uniform(0,1) < self.eps:
                action = np.random.choice([0,1])
                self.episode.append({'state':state,'action':action})
                self.nsteps += 1
                return action
        action = 0 if self.q[(state,0)] > self.q[(state,1)] else 1
        self.episode.append({'state':state,'action':action})
        self.nsteps += 1
        return action
    
    def update(self,reward,next_observation,done):
        #print(3*abs(self.episode[-1]['state'][1])/self.width)
        reward = reward - 5*abs(self.episode[-1]['state'][1])/self.width
        if self.nsteps == self.n_max_step:
            reward = 10000
        if done:
            reward = -30
            #print('boum')
        self.episode[-1]['reward'] = reward
        if (done is False) and (self.nsteps < self.n_max_step):
            pass
        else:
            G = 0
            for j in range(len(self.episode)):
                i = len(self.episode) - j - 1
                G += self.episode[i]['reward']
                self.q[(self.episode[i]['state'],self.episode[i]['action'])] += self.alpha*(G - self.q[(self.episode[i]['state'],self.episode[i]['action'])])
        
    
    def save(self):
        #print(self.q)
        with open('data/MC_true.pkl', 'wb') as f:
            pickle.dump(self.q, f)

In [8]:
def training_true(env, policy,n_episodes, n_max_step, n_testing_moments = 100):
    sep_test = n_episodes // n_testing_moments
    for num_ep in tqdm(range(n_episodes)):
        done = False
        obs = env.reset()
        policy.reset()
        n_step = 0
        # iterate
        while (done is False) and (n_step < n_max_step):
            n_step += 1
            # Select next action
            #time.sleep(0.2)
            action = policy.chose_action(obs,training = True)
            #print(obs)
            #env.render()
            # Appy action and return new observation of the environment
            obs, reward, done, _ = env.step(action)
            policy.update(reward,obs,done)
        if num_ep % sep_test == 0:
            score_moyen, n_terminated = testing_true(env,policy,n_episodes=1,n_max_step= n_max_step)
            if do_wandb:
                wandb.log({"score": score_moyen}, step=num_ep)

def testing_true(env, policy,n_episodes, n_max_step):
    score_moyen = 0
    policy.reset()
    for _ in range(n_episodes):
        done = False
        obs = env.reset()
        n_step = 0
        # iterate
        n_terminated = 0 

        while (done is False) and (n_step < n_max_step):
            n_step += 1
            # Select next action
            action = policy.chose_action(obs,training = False)
            # Appy action and return new observation of the environment
            obs, reward, done, _ = env.step(action)
            if n_step == n_max_step:
                n_terminated += 1
        score_moyen += n_step
    score_moyen = score_moyen/n_episodes
    return score_moyen, n_terminated


def show_true(env, policy,n_episodes):
    policy.reset()
    for _ in range(n_episodes):
        done = False
        obs = env.reset()
        n_step = 0
        # iterate
        while (done is False):
            n_step += 1
            # Select next action
            action = policy.chose_action(obs,training = False)
            # Appy action and return new observation of the environment
            obs, reward, done, _ = env.step(action)
            env.render()
            time.sleep(1 / 30)  # FPS
            print(obs)
            print(n_step)


if __name__ == '__main__':
    trained = True
    continue_training = False
    n_max_step = 10000
    n_training = 5000
    n_testing = 100
    eps = 0.01
    alpha = 0.5
    env = flappy_bird_gym.make("FlappyBird-v0")
    height = 40
    width = 50
    gap = 4
    do_wandb = False
    processed = True


    for run in range(5):
        date = datetime.datetime.now().strftime('%dth%mmo_%Hh%Mmin%Ss')
        #print(date)
        name = 'MC processed= ' + str(processed) +' height: '+ str(height) + ' width: '+ str(width) + ' gap: ' +str(gap) +'date: '+ str(date)
        #print(name)
        if do_wandb:
            wandb.init(
            # set the wandb project where this run will be logged
            project="FlappyBird-RL",
            name=name,
            # track hyperparameters and run metadata
            config={
            "processed": processed ,
            "name_policy": 'MC',
            "height": height,
            "width": width,
            "gap": gap,
            "True_flappy": True,
            "eps": eps,
            "alpha": alpha,
            })
        policy = MC_True(eps = eps,alpha = alpha, n_max_step = n_max_step,height = height,width = width,gap = gap ,trained = trained)
        if continue_training or not trained:
            training_true(env,policy,n_episodes= n_training,n_max_step= n_max_step)
        for _ in range(1):
            score_moyen, n_terminated = testing_true(env,policy,n_episodes= n_testing,n_max_step= n_max_step)
            print("score moyen",score_moyen,"nombre reussite totale", n_terminated)
        # Render the game
        if continue_training or not trained:
            policy.save()
        env.close()
        if do_wandb:
            wandb.finish()
    #show_true(env,policy,1)

score moyen 470.38 nombre reussite totale 0


  logger.warn(


[1.64236111 0.02734375]
1
[1.62847222 0.04296875]
2
[1.61458333 0.05664062]
3
[1.60069444 0.06835938]
4
[1.58680556 0.078125  ]
5
[1.57291667 0.0859375 ]
6
[1.55902778 0.09179688]
7
[1.54513889 0.09570312]
8
[1.53125    0.09765625]
9
[1.51736111 0.11523438]
10
[1.50347222 0.1328125 ]
11
[1.48958333 0.1484375 ]
12
[1.47569444 0.16210938]
13
[1.46180556 0.17382812]
14
[1.44791667 0.18359375]
15
[1.43402778 0.19140625]
16
[1.42013889 0.19726562]
17
[1.40625    0.20117188]
18
[1.39236111 0.203125  ]
19
[1.37847222 0.203125  ]
20
[1.36458333 0.20117188]
21
[1.35069444 0.19726562]
22
[1.33680556 0.19140625]
23
[1.32291667 0.18359375]
24
[1.30902778 0.17382812]
25
[1.29513889 0.16210938]
26
[1.28125   0.1484375]
27
[1.26736111 0.1328125 ]
28
[1.25347222 0.11523438]
29
[1.23958333 0.09570312]
30
[1.22569444 0.07617188]
31
[1.21180556 0.05664062]
32
[1.19791667 0.03710938]
33
[1.18402778 0.01757812]
34
[ 1.17013889 -0.00195312]
35
[ 1.15625    -0.02148438]
36
[ 1.14236111 -0.04101562]
37
[ 1.12

KeyboardInterrupt: 

: 