# ZMUMiG projekt

Autorzy:
- Jarosław Kołodziej
- Przemysław Kożuch


## Importy bibliotek

In [1]:
import sys
path_nb = r'/zmumig/project/'
sys.path.append(path_nb)

In [2]:
#!pip install "gym[atari, accept-rom-license]"
!pip install "gymnasium[atari, accept-rom-license]"

[0m

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import gymnasium as gym
from IPython import display as ipythondisplay
import time

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Input, Conv2D, Dense, MaxPool2D, Flatten

2024-01-02 21:20:48.786780: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-02 21:20:48.786826: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-02 21:20:48.786854: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-01-02 21:20:48.828978: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Inicjalizacja

In [4]:
env = gym.make("ALE/Pong-v5")
seed = 1
env.reset(seed=seed)

A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


(array([[[  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0],
         ...,
         [109, 118,  43],
         [109, 118,  43],
         [109, 118,  43]],
 
        [[109, 118,  43],
         [109, 118,  43],
         [109, 118,  43],
         ...,
         [109, 118,  43],
         [109, 118,  43],
         [109, 118,  43]],
 
        [[109, 118,  43],
         [109, 118,  43],
         [109, 118,  43],
         ...,
         [109, 118,  43],
         [109, 118,  43],
         [109, 118,  43]],
 
        ...,
 
        [[ 53,  95,  24],
         [ 53,  95,  24],
         [ 53,  95,  24],
         ...,
         [ 53,  95,  24],
         [ 53,  95,  24],
         [ 53,  95,  24]],
 
        [[ 53,  95,  24],
         [ 53,  95,  24],
         [ 53,  95,  24],
         ...,
         [ 53,  95,  24],
         [ 53,  95,  24],
         [ 53,  95,  24]],
 
        [[ 53,  95,  24],
         [ 53,  95,  24],
         [ 53,  95,  24],
         ...,
         [ 53,  95,  24],
  

In [5]:
print("Observation space: {}".format(env.observation_space))
n_actions = env.action_space.n
print("Possible actions: {}".format(n_actions))

Observation space: Box(0, 255, (210, 160, 3), uint8)
Possible actions: 6


## Definicja modelu

In [6]:
def create_test_model():
    x = Input(shape=(210, 160, 3), batch_size=1)
    p = MaxPool2D((7,8))(x)
    c1 = Conv2D(4, (3,3),)(p)
    c2 = Conv2D(6, (3,3),)(c1)
    f = Flatten()(c2)
    d = Dense(units=16, activation='relu')(f)
    d = Dense(units=16, activation='relu')(d)
    out = Dense(units=n_actions, activation='softmax')(d)
    return tf.keras.models.Model(inputs=x, outputs=out)

test_model = create_test_model()
test_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(1, 210, 160, 3)]        0         
                                                                 
 max_pooling2d (MaxPooling2  (1, 30, 20, 3)            0         
 D)                                                              
                                                                 
 conv2d (Conv2D)             (1, 28, 18, 4)            112       
                                                                 
 conv2d_1 (Conv2D)           (1, 26, 16, 6)            222       
                                                                 
 flatten (Flatten)           (1, 2496)                 0         
                                                                 
 dense (Dense)               (1, 16)                   39952     
                                                             

2024-01-02 21:20:51.698099: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-01-02 21:20:51.716723: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-01-02 21:20:51.716764: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-01-02 21:20:51.718501: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-01-02 21:20:51.718536: I tensorflow/compile

In [7]:
def choose_action(model, observation):
  observation = observation.reshape((1, 210, 160, 3))
  prob_weights = model.predict(observation, verbose=0)
  action = np.random.choice(n_actions, size=1, p=prob_weights.flatten())[0]
  #print("output: {}  decided action: {}".format(prob_weights,action))
  return action

In [8]:
class Memory:
  def __init__(self):
      self.clear()

  def clear(self):
      self.observations = []
      self.actions = []
      self.rewards = []

  def add_to_memory(self, new_observation, new_action, new_reward):
      self.observations.append(new_observation)
      self.actions.append(new_action)
      self.rewards.append(new_reward)

memory = Memory()

In [9]:
def normalize(x):
  x -= np.mean(x)
  x /= np.std(x)
  return x

def discount_rewards(rewards, gamma=0.95):
  discounted_rewards = np.zeros_like(rewards)
  R = 0
  for t in reversed(range(0, len(rewards))):
      # update the total discounted reward
      R = R * gamma + rewards[t]
      discounted_rewards[t] = R
  return normalize(discounted_rewards)

In [10]:
learning_rate = 1e-3
optimizer = Adam(learning_rate)

## Uczenie modelu

In [11]:
test_model = create_test_model()
test_model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy')

In [12]:

for i_episode in range(10):
    print('episode:', i_episode)
    # Restart the environment
    observation, reset_info = env.reset(seed=seed)
    print("reset info:",reset_info)
    print("observation:",observation.shape)
    iter = 0
    while True:
        if(iter%10==0):
            print("iter {}".format(iter))
        # using our observation, take an action
        action = choose_action(test_model, observation)
        next_observation, reward, terminated, truncated, info = env.step(action)
        # add to memory
        memory.add_to_memory(observation, action, reward)
        # is the episode over? did you crash or do so well that you're done?
        if(iter>100):
            truncated = True
        if terminated or truncated:
            # determine total reward and keep a record of this
            total_reward = sum(memory.rewards)
            print(total_reward, len(memory.actions))
            # initiate training - remember we don't know anything about how the agent is doing until it's crashed!
            test_model.fit(np.stack(memory.observations), np.stack(memory.actions), epochs=1, batch_size=len(memory.observations), verbose=0)
            memory.clear()
            break
        # update our observatons
        observation = next_observation
        iter += 1

episode: 0
reset info: {'lives': 0, 'episode_frame_number': 0, 'frame_number': 0, 'seeds': (1835504127, 1731038949)}
observation: (210, 160, 3)
iter 0


2024-01-02 21:20:52.557507: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8907
2024-01-02 21:20:52.672474: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-01-02 21:20:52.952265: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory


iter 10
iter 20
iter 30
iter 40
iter 50
iter 60
iter 70
iter 80
iter 90
iter 100
-2.0 102


2024-01-02 21:20:57.103169: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f1828f18fd0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-01-02 21:20:57.103205: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3060 Ti, Compute Capability 8.6
2024-01-02 21:20:57.107222: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-01-02 21:20:57.180260: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


episode: 1
reset info: {'lives': 0, 'episode_frame_number': 0, 'frame_number': 0, 'seeds': (1835504127, 1731038949)}
observation: (210, 160, 3)
iter 0
iter 10
iter 20
iter 30
iter 40
iter 50
iter 60
iter 70
iter 80
iter 90
iter 100
-2.0 102
episode: 2
reset info: {'lives': 0, 'episode_frame_number': 0, 'frame_number': 0, 'seeds': (1835504127, 1731038949)}
observation: (210, 160, 3)
iter 0
iter 10
iter 20
iter 30
iter 40
iter 50
iter 60
iter 70
iter 80
iter 90
iter 100
-2.0 102
episode: 3
reset info: {'lives': 0, 'episode_frame_number': 0, 'frame_number': 0, 'seeds': (1835504127, 1731038949)}
observation: (210, 160, 3)
iter 0
iter 10
iter 20
iter 30
iter 40
iter 50
iter 60
iter 70
iter 80
iter 90
iter 100
-2.0 102
episode: 4
reset info: {'lives': 0, 'episode_frame_number': 0, 'frame_number': 0, 'seeds': (1835504127, 1731038949)}
observation: (210, 160, 3)
iter 0
iter 10
iter 20
iter 30
iter 40
iter 50
iter 60
iter 70
iter 80
iter 90
iter 100
-2.0 102
episode: 5
reset info: {'lives': 0, 

In [13]:
def save_video_of_model(model, env_name='ALE/Pong-v5', filename='pong_seesion.mp4'):
  import skvideo.io

  env = gym.make(env_name, render_mode='rgb_array')
  env = env.unwrapped
  obs, _ = env.reset()

  out = skvideo.io.FFmpegWriter(filename)

  done = False
  while not done:

      frame = env.render()
      out.writeFrame(frame)
      action = model.predict(obs.reshape((1, 210, 160, 3))).argmax()
      obs, reward, terminated, truncated, info = env.step(action)
      done = terminated or truncated
  out.close()
  print("Successfully saved into {}!".format(filename))

filename = "pong_session_" + time.strftime("%Y-%m-%d %H-%M-%S") + ".mp4"
save_video_of_model(test_model, filename=filename)

Successfully saved into pong_session_2024-01-02 21-21-31.mp4!
