In [1]:
import gymnasium as gym
import numpy as np
import time
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions.categorical import Categorical
import matplotlib.pyplot as plt
from cellitaire.environment.cellitaire_env import CellitaireEnv
from cellitaire.environment.rewards.reward import *

In [2]:
class PPOMemory:
    def __init__(self, batch_size):
        self.states = []
        self.probs = []
        self.vals = []
        self.actions = []
        self.rewards = []
        self.dones = []

        self.batch_size = batch_size

    def generate_batches(self):
        n_states = len(self.states)
        batch_start = np.arange(0, n_states, self.batch_size)
        indices = np.arange(n_states, dtype=np.int64)
        np.random.shuffle(indices)
        batches = [indices[i:i+self.batch_size] for i in batch_start]

        return np.array(self.states), np.array(self.actions), np.array(self.probs), np.array(self.vals), np.array(self.rewards), np.array(self.dones), batches
        
    def store_memory(self, state, action, probs, vals, reward, done):
        self.states.append(state)
        self.probs.append(probs)
        self.vals.append(vals)
        self.rewards.append(reward)
        self.dones.append(done)
        self.actions.append(action)

    def clear_memory(self):
        self.states = []
        self.probs = []
        self.vals = []
        self.actions = []
        self.rewards = []
        self.dones = []

In [3]:
class ActorNetwork(nn.Module):
    def __init__(
        self, 
        n_actions, 
        input_dims, 
        alpha,
        chkpt_dir='tmp/ppo', 
        num_embeddings=53, 
        embedding_dim=30, 
        embeddings_in_state=85,
        num_hidden_layers=1,
        hidden_dim=256
    ):
        super(ActorNetwork, self).__init__()
        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        self.embeddings_in_state = embeddings_in_state

        # Create embedding layer only if embeddings_in_state > 0.
        if embeddings_in_state > 0:
            self.embedding_layer = nn.Embedding(num_embeddings, embedding_dim).to(self.device)
            input_layer_dim = input_dims[0] - embeddings_in_state + (embeddings_in_state * embedding_dim)
        else:
            self.embedding_layer = None
            input_layer_dim = input_dims[0]

        self.checkpoint_file = os.path.join(chkpt_dir, 'actor_torch_ppo')
        self.actor = nn.Sequential(
            nn.Linear(input_layer_dim, hidden_dim),
            nn.ReLU()
        )
        
        for _ in range(num_hidden_layers):
            self.actor.append(nn.Linear(hidden_dim, hidden_dim))
            self.actor.append(nn.ReLU())
        
        self.actor.append(nn.Linear(hidden_dim, n_actions))
        self.actor.append(nn.Softmax(dim=-1))
        
        self.optimizer = optim.Adam(self.parameters(), lr=alpha)
        self.to(self.device)
        self.n_actions = n_actions

    def forward(self, state):
        if self.embeddings_in_state > 0:
            embeddings = self.embedding_layer(state[:, :self.embeddings_in_state].int()).view(state.shape[0], 1, -1)
            state = state.view(state.shape[0], 1, -1)[:, :, self.embeddings_in_state:]

            
            # Concatenate the remaining state features with the flattened embeddings.
            new_state = torch.cat((state, embeddings), dim=2)
        else:
            new_state = state.view(state.shape[0], 1, -1)
        
        # Pass the processed state through the actor network.
        logits = self.actor(new_state)
        return logits

    def save_checkpoint(self):
        torch.save(self.state_dict(), self.checkpoint_file)

    def load_checkpoint(self):
        self.load_state_dict(torch.load(self.checkpoint_file))


In [4]:
class CriticNetwork(nn.Module):
    def __init__(
        self, 
        input_dims, 
        alpha, 
        num_hidden_layers=1, 
        hidden_dim=256, 
        chkpt_dir='tmp/ppo'
    ):
        super(CriticNetwork, self).__init__()

        self.checkpoint_file = os.path.join(chkpt_dir, 'critic_torch_ppo')

        self.critic = nn.Sequential(
            nn.Linear(*input_dims, hidden_dim),
            nn.ReLU()
        )
        
        for _ in range(num_hidden_layers):
            self.critic.append(nn.Linear(hidden_dim, hidden_dim))
            self.critic.append(nn.ReLU())

        self.critic.append(nn.Linear(hidden_dim, 1))

        self.optimizer = optim.Adam(self.parameters(), lr=alpha)
        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        self.to(self.device)

    def forward(self, state):
        value = self.critic(state)
        return value

    def save_checkpoint(self):
        torch.save(self.state_dict(), self.checkpoint_file)

    def load_checkpoint(self):
        self.load_state_dict(torch.load(self.checkpoint_file))

In [5]:
class Agent:
    def __init__(
        self, 
        n_actions, 
        input_dims, 
        gamma=0.99, 
        alpha=0.0003, 
        gae_lambda=0.95,
        policy_clip=0.2, 
        batch_size=64, 
        n_epochs=10,
        num_hidden_layers_actor=1,
        hidden_dim_actor=256,
        num_hidden_layers_critic=1,
        hidden_dim_critic=256,
        embeddings_in_state_actor=85,
        embedding_dim_actor=30
    ):
        self.gamma = gamma
        self.policy_clip = policy_clip
        self.n_epochs = n_epochs
        self.gae_lambda = gae_lambda
        self.n_actions = n_actions

        self.actor = ActorNetwork(
            n_actions, 
            input_dims, 
            alpha, 
            num_hidden_layers=num_hidden_layers_actor, 
            hidden_dim=hidden_dim_actor,
            embeddings_in_state=embeddings_in_state_actor,
            embedding_dim=embedding_dim_actor
        )
        self.critic = CriticNetwork(
            input_dims, 
            alpha,
            num_hidden_layers=num_hidden_layers_critic,
            hidden_dim=hidden_dim_critic
        )
        actor_param_count = sum(p.numel() for p in self.actor.parameters())
        critic_param_count = sum(p.numel() for p in self.critic.parameters())
        print(f'NUM - PARAMS {actor_param_count + critic_param_count}')
        self.memory = PPOMemory(batch_size)
       
    def remember(self, state, action, probs, vals, reward, done):
        self.memory.store_memory(state, action, probs, vals, reward, done)

    def save_models(self):
        #print('... saving models ...')
        self.actor.save_checkpoint()
        self.critic.save_checkpoint()

    def load_models(self):
        #print('... loading models ...')
        self.actor.load_checkpoint()
        self.critic.load_checkpoint()

    '''
    def choose_action(self, observation, legal_actions):
        state = torch.tensor(np.array([observation]), dtype=torch.float).to(self.actor.device)
        
        dist = self.actor(state)
        legal_actions = torch.tensor(np.array(legal_actions)).to(self.actor.device)
        dist = Categorical(dist)
        value = self.critic(state)
        action = dist.sample()

        probs = torch.squeeze(dist.log_prob(action)).item()
        action = torch.squeeze(action).item()
        value = torch.squeeze(value).item()

        return action, probs, value
    '''
    
    '''
    def choose_action(self, observation, legal_actions):
        # Convert observation to tensor and send it to the actor's device.
        state = torch.tensor(np.array([observation]), dtype=torch.float).to(self.actor.device)

        # Get logits from the actor; assumed shape: (batch, 1, k)
        logits = self.actor(state)

        # Convert legal_actions to a tensor on the same device.
        legal_actions = torch.tensor(np.array(legal_actions)).to(self.actor.device)

        # Mask logits so that only legal actions remain.
        # If logits has shape (n, 1, k), we index into the third dimension.
        masked_logits = logits[:, 0, legal_actions]  # shape: (n, len(legal_actions))

        # Build a Categorical using the masked logits.
        dist = Categorical(logits=masked_logits)

        # Get the value estimate from the critic.
        value = self.critic(state)

        # Sample an action index from the distribution.
        # This gives a relative index in the filtered (legal) logits.
        relative_index = dist.sample()  # shape: (n,)

        # Map the relative index back to the original action space.
        action = legal_actions[relative_index]

        # Compute the log probability for the sampled action.
        probs = torch.squeeze(dist.log_prob(relative_index)).item()

        # Squeeze and convert to Python scalars.
        action = torch.squeeze(action).item()
        value = torch.squeeze(value).item()

        return action, probs, value
    '''
    
    def choose_action(self, observation, legal_actions=None):
        state = torch.tensor(np.array([observation]), dtype=torch.float).to(self.actor.device)

        # Get logits from the actor; assumed shape: (n, 1, k)
        logits = self.actor(state)

        # If legal_actions is provided, mask the logits to include only those actions.
        if legal_actions is not None:
            legal_actions = torch.tensor(np.array(legal_actions)).to(self.actor.device)
            masked_logits = logits[:, 0, legal_actions]  # shape: (n, len(legal_actions))
            dist = Categorical(logits=masked_logits)
        else:
            # Use all logits if no legal actions mask is provided.
            full_logits = logits.squeeze(1)  # shape: (n, k)
            dist = Categorical(logits=full_logits)

        value = self.critic(state)

        # Sample an action index from the distribution.
        sampled_index = dist.sample()  # shape: (n,)

        # Map back to the original action if a mask was applied.
        if legal_actions is not None:
            action = legal_actions[sampled_index]
        else:
            action = sampled_index

        # Get the log probability of the sampled action.
        log_prob = torch.squeeze(dist.log_prob(sampled_index)).item()

        # Squeeze and convert to Python scalars.
        action = torch.squeeze(action).item()
        value = torch.squeeze(value).item()

        return action, log_prob, value


    def learn(self):
        device = self.actor.device  # Assuming this is a CUDA device
        for _ in range(self.n_epochs):
            # Retrieve batch data
            state_arr, action_arr, old_prob_arr, vals_arr, reward_arr, dones_arr, batches = \
                self.memory.generate_batches()
            
            # Convert arrays to torch tensors on GPU
            rewards = torch.tensor(reward_arr, dtype=torch.float32, device=device)
            values = torch.tensor(vals_arr, dtype=torch.float32, device=device)
            dones = torch.tensor(dones_arr, dtype=torch.float32, device=device)
            
            # Compute deltas for GAE: delta_t = r_t + gamma * V(t+1) * (1-done) - V(t)
            deltas = rewards[:-1] + self.gamma * values[1:] * (1 - dones[:-1]) - values[:-1]
            
            # Compute advantage vector using the vectorized discounted cumulative sum.
            advantage = torch.zeros_like(rewards, device=device)
            advantage[:-1] = discount_cumsum(deltas, self.gamma * self.gae_lambda)
            
            # Loop over minibatches
            for batch in batches:
                states = torch.tensor(state_arr[batch], dtype=torch.float, device=device)
                old_probs = torch.tensor(old_prob_arr[batch], device=device)
                actions = torch.tensor(action_arr[batch], device=device)
    
                # Forward pass through the actor and critic networks
                dist = self.actor(states)
                dist = Categorical(dist)
                critic_value = self.critic(states).squeeze()
                
                # Calculate probability ratio and losses
                new_probs = dist.log_prob(actions)
                prob_ratio = new_probs.exp() / old_probs.exp()
                weighted_probs = advantage[batch] * prob_ratio
                weighted_clipped_probs = torch.clamp(prob_ratio, 1 - self.policy_clip, 1 + self.policy_clip) * advantage[batch]
                actor_loss = -torch.min(weighted_probs, weighted_clipped_probs).mean()
    
                returns = advantage[batch] + values[batch]
                critic_loss = (returns - critic_value).pow(2).mean()
    
                total_loss = actor_loss + 0.5 * critic_loss
                
                # Update the networks
                self.actor.optimizer.zero_grad()
                self.critic.optimizer.zero_grad()
                total_loss.backward()
                self.actor.optimizer.step()
                self.critic.optimizer.step()
    
        self.memory.clear_memory()             

def discount_cumsum(x, discount):
    """
    Compute discounted cumulative sums of vector x with discount factor.
    For each index t, returns sum_{l=0}^{T-t-1} discount^l * x[t+l].
    """
    T = x.size(0)
    # Create a vector of discount factors
    discount_factors = discount ** torch.arange(T, device=x.device, dtype=x.dtype)
    # Multiply elementwise and compute cumulative sums in reverse order
    x_discounted = x * discount_factors
    reversed_x = torch.flip(x_discounted, dims=[0])
    cumsum_reversed = torch.cumsum(reversed_x, dim=0)
    discounted_cumsum = torch.flip(cumsum_reversed, dims=[0])
    # Divide by discount factors to get the proper values
    return discounted_cumsum / discount_factors




In [6]:
torch.cuda.is_available()

True

In [7]:
board_rows = 7
board_cols = 12
num_reserved = 6
test_reward = CombinedReward([
    PlacedCardInFoundationReward(weight=2),
    WinReward(),
    #ConstantReward(weight=-0.01),
    #PlayedLegalMoveReward(weight=1),
    PeriodicPlacedCardInFoundationReward(weight=2, reward_period=3),
    CreatedMovesReward(weight=0.0001, num_reserved=num_reserved, foundation_count_dropoff=30)
])
print([reward.weight for reward in test_reward.rewards_list])
env = CellitaireEnv(test_reward, rows=board_rows, cols=board_cols, num_reserved=num_reserved, max_moves=600)

[2, 1000000, 2, 0.0001]


In [10]:
N = 1000
batch_size = 1000
n_epochs = 6
alpha = 1e-6
num_hidden_layers_actor=2
hidden_dim_actor=4096
num_hidden_layers_critic=4
hidden_dim_critic=4096
#embeddings_in_state_actor=1
embeddings_in_state_actor = board_rows * board_cols + 1
embedding_dim=30

env.reset()

agent = Agent(
    
    n_actions=env.action_space.n, 
    input_dims=(board_rows * board_cols + 7,), 
    batch_size=batch_size, 
    alpha=alpha, 
    n_epochs=n_epochs,
    num_hidden_layers_actor=num_hidden_layers_actor,
    hidden_dim_actor=hidden_dim_actor,
    embeddings_in_state_actor=embeddings_in_state_actor,
    embedding_dim_actor=embedding_dim,
    num_hidden_layers_critic=num_hidden_layers_critic,
    hidden_dim_critic=hidden_dim_critic,
)

n_games = 1

best_score = -1000
score_history = []
moves_history = []
cards_saved_history = []
episodes_without_best = 0

learn_iters = 0
avg_score = 0
avg_moves = 0
avg_cards_saved = 0
n_steps = 0

i = 0
while True:
    observation, reward, done, truncated, info = env.reset()
    observation = env.get_state()
    done = False
    score = 0
    while (not done) and (not truncated):
        legal_actions = env.get_legal_actions_as_int()
        action, prob, val = agent.choose_action(observation, legal_actions)
        observation_, reward, done, truncated, info = env.step(action)
        n_steps += 1
        score += reward
        agent.remember(observation, action, prob, val, reward, done)
        if n_steps % N == 0:
            agent.learn()
            learn_iters += 1
        observation = observation_
    score_history.append(score)
    moves_history.append(env.num_moves)
    cards_saved_history.append(env.game.foundation.total_cards())
    avg_score = np.mean(score_history[-100:])
    avg_moves = np.mean(moves_history[-100:])
    avg_cards_saved = np.mean(cards_saved_history[-100:])
    i += 1

    if avg_score > best_score:
        best_score = avg_score
        #agent.save_models()
        episodes_without_best = 0
        print(f'episode {i:>5} | score {score:>6.1f} | recent avg {avg_score:>6.1f} | avg moves {avg_moves:>5.1f} | avg saved {avg_cards_saved:>4.1f} | learning steps {learn_iters:6} | done {done} *') 
    else:
        episodes_without_best += 1
        
    if episodes_without_best % 100 == 0 and episodes_without_best > 0:
        print(f'episode {i:>5} | score {score:>6.1f} | recent avg {avg_score:>6.1f} | avg moves {avg_moves:>5.1f} | avg saved {avg_cards_saved:>4.1f} | learning steps {learn_iters:6} | done {done}') 

NUM - PARAMS 111888011
episode     1 | score    0.0 | recent avg    0.0 | avg moves   8.0 | avg saved  0.0 | learning steps      0 | done True *
episode     2 | score   38.0 | recent avg   19.0 | avg moves 112.0 | avg saved  5.5 | learning steps      0 | done True *
episode     9 | score  150.0 | recent avg   24.9 | avg moves 111.0 | avg saved  7.3 | learning steps      0 | done True *
episode    58 | score  128.0 | recent avg   26.2 | avg moves 106.8 | avg saved  7.7 | learning steps      6 | done True *
episode   127 | score   88.0 | recent avg   26.6 | avg moves 115.1 | avg saved  7.8 | learning steps     13 | done True *
episode   128 | score    4.0 | recent avg   26.7 | avg moves 115.8 | avg saved  7.8 | learning steps     13 | done True *
episode   182 | score   14.0 | recent avg   26.7 | avg moves 121.9 | avg saved  7.8 | learning steps     20 | done True *
episode   223 | score  134.0 | recent avg   26.8 | avg moves 123.8 | avg saved  7.9 | learning steps     25 | done True *
e

KeyboardInterrupt: 

In [None]:
plt.plot(torch.tensor(score_history, dtype=torch.float).view(-1, 100).mean(1))

In [None]:
plt.plot(torch.tensor(moves_history, dtype=torch.float).view(-1, 100).mean(1))

In [None]:
plt.plot(torch.tensor(cards_saved_history, dtype=torch.float).view(-1, 100).mean(1))

In [None]:
print(max(cards_saved_history))

In [9]:
env = gym.make("CartPole-v1")
env.reset()
#env.render()

(array([ 0.00121022,  0.02562538,  0.01463803, -0.02002225], dtype=float32),
 {})

In [11]:
#N = 20
#batch_size = 5
#n_epochs = 4
#alpha = 0.0003
#embeddings_in_state_actor=1

N = 10
batch_size = 3
n_epochs = 2
alpha = 0.0003
num_hidden_layers_actor=3
hidden_dim_actor=2048
embeddings_in_state_actor=1
#embeddings_in_state_actor = board_rows * board_cols + 1
embedding_dim=30

'''
agent = Agent(
    n_actions=env.action_space.n, 
    input_dims=env.observation_space.shape, 
    batch_size=batch_size, 
    alpha=alpha, 
    n_epochs=n_epochs,
    embeddings_in_state_actor=embeddings_in_state_actor
)
'''

agent = Agent(
    
    n_actions=env.action_space.n, 
    input_dims=env.observation_space.shape, 
    batch_size=batch_size, 
    alpha=alpha, 
    n_epochs=n_epochs,
    num_hidden_layers_actor=num_hidden_layers_actor,
    hidden_dim_actor=hidden_dim_actor,
    embeddings_in_state_actor=embeddings_in_state_actor,
    embedding_dim_actor=embedding_dim
)

n_games = 150000

best_score = -1000
score_history = []
max_score = 0
episodes_without_best = 0

learn_iters = 0
avg_score = 0
n_steps = 0

for i in range(n_games):
    (observation, _) = env.reset()
    observation[0] += 5.0
    observation[0] *= 5.2
    done = False
    truncated = False
    score = 0
    while not done and not truncated:
        action, prob, val = agent.choose_action(observation)
        observation_, reward, done, truncated, info = env.step(action)
        observation_[0] += 5.0
        observation_[0] *= 5.2
        n_steps += 1
        score += reward
        agent.remember(observation, action, prob, val, reward, done)
        if n_steps % N == 0:
            agent.learn()
            learn_iters += 1
        observation = observation_
    score_history.append(score)
    avg_score = np.mean(score_history[-100:])
    
    max_score = max(max_score, score)
    
    if avg_score > best_score:
        best_score = avg_score
        recent_std = np.std(score_history[-100:])
        #agent.save_models()
        episodes_without_best = 0
        print(f'episode {i:>5} | score {score:>6.1f} | avg {avg_score:>6.1f} | std {recent_std:>6.2f} | max score {max_score:>5.1f} | learning steps {learn_iters:>5} | done {done} *')
    else:
        episodes_without_best += 1
        
    if episodes_without_best % 100 == 0 and episodes_without_best > 0:
        recent_std = np.std(score_history[-100:])
        print(f'episode {i:>5} | score {score:>6.1f} | avg {avg_score:>6.1f} | std {recent_std:>6.2f} | max score {max_score:>5.1f} | learning steps {learn_iters:>5} | done {done}')
    

plt.plot(x, score_history)
env.close()

episode     0 | score   37.0 | avg   37.0 | std   0.00 | max score  37.0 | learning steps     3 | done True *
episode   100 | score   10.0 | avg   22.4 | std  11.91 | max score  73.0 | learning steps   228 | done True
episode   200 | score   31.0 | avg   21.9 | std   9.94 | max score  73.0 | learning steps   446 | done True
episode   300 | score   49.0 | avg   23.5 | std  12.01 | max score  73.0 | learning steps   682 | done True
episode   400 | score   17.0 | avg   26.3 | std  11.91 | max score  73.0 | learning steps   944 | done True
episode   500 | score   23.0 | avg   27.2 | std  16.38 | max score 110.0 | learning steps  1216 | done True
episode   600 | score   14.0 | avg   29.3 | std  15.42 | max score 110.0 | learning steps  1509 | done True
episode   700 | score   49.0 | avg   33.6 | std  22.05 | max score 112.0 | learning steps  1845 | done True
episode   800 | score   34.0 | avg   31.9 | std  18.61 | max score 112.0 | learning steps  2163 | done True
episode   900 | score   31

KeyboardInterrupt: 

KeyboardInterrupt: 

In [91]:
env.close()

In [1]:
%pip install -e ../.

Obtaining file:///P:/repos/cellitaire-rlNote: you may need to restart the kernel to use updated packages.

  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Checking if build backend supports build_editable: started
  Checking if build backend supports build_editable: finished with status 'done'
  Getting requirements to build editable: started
  Getting requirements to build editable: finished with status 'done'
  Preparing editable metadata (pyproject.toml): started
  Preparing editable metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: cellitaire
  Building editable for cellitaire (pyproject.toml): started
  Building editable for cellitaire (pyproject.toml): finished with status 'done'
  Created wheel for cellitaire: filename=cellitaire-0.0.1-0.editable-py3-none-any.whl size=1308 sha256=b378391a3c216160d1234ca202eae929e697782b029366f7620acdb700ddbf9d
  Stored in directory: C:\Users\Noe\

You should consider upgrading via the 'C:\Users\Noe\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip' command.


In [3]:
%pip install gymnasium[classic-control]



  error: subprocess-exited-with-error
  
  python setup.py egg_info did not run successfully.
  exit code: 1
  
  [86 lines of output]
  
  
  Using WINDOWS configuration...
  
  Traceback (most recent call last):
    File [35m"<string>"[0m, line [35m2[0m, in [35m<module>[0m
      [31mexec[0m[1;31m(compile('''[0m
      [31m~~~~[0m[1;31m^^^^^^^^^^^^[0m
      [1;31m# This is <pip-setuptools-caller> -- a caller that pip uses to run setup.py[0m
      [1;31m^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^[0m
      ...<31 lines>...
      [1;31mexec(compile(setup_py_code, filename, "exec"))[0m
      [1;31m^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^[0m
      [1;31m''' % ('C:\\Users\\Noe\\AppData\\Local\\Temp\\pip-install-eoyw7vpr\\pygame_967dc80c3fe74667b8820019c1335dad\\setup.py',), "<pip-setuptools-caller>", "exec"))[0m
      [1;31m^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


Collecting pygame==2.1.3 (from gymnasium[classic-control])
  Using cached pygame-2.1.3.tar.gz (12.8 MB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'error'


In [4]:
%pip install pygame

Collecting pygame
  Downloading pygame-2.6.1-cp313-cp313-win_amd64.whl.metadata (13 kB)
Downloading pygame-2.6.1-cp313-cp313-win_amd64.whl (10.6 MB)
   ---------------------------------------- 0.0/10.6 MB ? eta -:--:--
   ---------------------------- ----------- 7.6/10.6 MB 42.4 MB/s eta 0:00:01
   ---------------------------------------- 10.6/10.6 MB 37.0 MB/s eta 0:00:00
Installing collected packages: pygame
Successfully installed pygame-2.6.1
Note: you may need to restart the kernel to use updated packages.


UsageError: Line magic function `%jupyter` not found.
