# AI Workshop Project

In [1]:
import random
import gym
import numpy as np
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Reshape
from tensorflow.keras.layers import LSTM

Sequential model

In [2]:
def cur_model(num_Conv2D,num_filters,num_kernel,num_lstm_units,num_frames):
  model = Sequential()
  for i in range(num_Conv2D):
    conv = Conv2D(num_filters, kernel_size=num_kernel, data_format='channels_last', activation ='relu')
    conv._name = 'conv-' + str(i)
    model.add(conv)
  model.add(Reshape((num_frames, -1)))
  model.add(LSTM(num_lstm_units))
  model.compile(loss='mean_squared_error', optimizer='adam')
  return model

In [10]:
def reinforcement(env, model, n_games=1, epochs=1, epsilon_decrease=0.1, backsight=8):
  epsilon = 1
  state_history = []
  value_history = []
  env.reset()
  obs = env.observation_space.sample()

  for game_number in range(n_games):
    states = []
    actions = []
    values = []
    memory = [np.sum(obs, axis=2).reshape((210, 160, 1)) / (255 * 3)] * backsight
    frames = 0

    while True:
      if random.random() > epsilon:
        action = np.argmax(model.predict(np.array([memory], dtype='float16'))[0])
      else:
        action = env.action_space.sample()
      
      obs, reward, done, _info = env.step(action)

      if frames % 32 == 0:
        print('Game', game_number, 'frame', frames)
        actions.append(action)
        states.append(memory)
        values.append([0] * env.action_space.n)
        memory.pop(0)
        memory.append(np.sum(obs, axis=2).reshape((210, 160, 1)) / (255 * 3))

        for i, scores in enumerate(values):
          scores[actions[i]] += reward
      
      frames += 1
      
      if done:
        print('Game', game_number, 'lasted', frames, 'frames')
        env.reset()
        break
    
    state_history.extend(states)
    value_history.extend(values)
    model.fit(np.array(state_history, dtype='float16'), np.array(value_history))
    epsilon = max(0, epsilon - epsilon_decrease)

In [4]:
!pip install patool

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [5]:
import patoolib
from os import mkdir
from shutil import rmtree
rmtree('roms')
mkdir('roms')
patoolib.extract_archive("Roms.rar", outdir="roms")

patool: Extracting Roms.rar ...
patool: running /usr/bin/unrar x -- /content/Roms.rar
patool:     with cwd='roms'
patool: ... Roms.rar extracted to `roms'.


'roms'

In [6]:
!python -m atari_py.import_roms roms

copying assault.bin from roms/HC ROMS/NTSC VERSIONS OF PAL ORIGINALS/Assault (AKA Sky Alien) (1983) (Bomb - Onbase) (CA281).bin to /usr/local/lib/python3.7/dist-packages/atari_py/atari_roms/assault.bin
copying space_invaders.bin from roms/HC ROMS/BY ALPHABET/S-Z/Space Invaders.bin to /usr/local/lib/python3.7/dist-packages/atari_py/atari_roms/space_invaders.bin
copying zaxxon.bin from roms/HC ROMS/BY ALPHABET/S-Z/Zaxxon.bin to /usr/local/lib/python3.7/dist-packages/atari_py/atari_roms/zaxxon.bin
copying pong.bin from roms/HC ROMS/BY ALPHABET/S-Z/Video Olympics - Pong Sports.bin to /usr/local/lib/python3.7/dist-packages/atari_py/atari_roms/pong.bin
copying wizard_of_wor.bin from roms/HC ROMS/BY ALPHABET/S-Z/Wizard of Wor.bin to /usr/local/lib/python3.7/dist-packages/atari_py/atari_roms/wizard_of_wor.bin
copying yars_revenge.bin from roms/HC ROMS/BY ALPHABET/S-Z/Yars' Revenge.bin to /usr/local/lib/python3.7/dist-packages/atari_py/atari_roms/yars_revenge.bin
copying seaquest.bin from roms/

In [11]:
env = gym.make("Pong-v4")
model = cur_model(4, 8, 4, env.action_space.n, 8)

In [12]:
np.array(env.observation_space.sample()).shape

(210, 160, 3)

In [13]:
reinforcement(env, model, n_games=4, epochs=1)

Game 0 lasted 1189 frames
Game 1 lasted 1195 frames
Game 2 lasted 1111 frames
Game 3 lasted 1104 frames


In [14]:
model.save('pong.model')



INFO:tensorflow:Assets written to: pong.model/assets


INFO:tensorflow:Assets written to: pong.model/assets


In [15]:
from shutil import make_archive
make_archive('pong-archive', 'zip', 'pong-model')

'/content/pong-archive.zip'