<a href="https://colab.research.google.com/github/PiyushiAnand/Breakout_Genius/blob/main/test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gym[atari]
!pip install gym[accept-rom-license]
!pip install gym
!pip install pyvirtualdisplay
!apt-get install -y xvfb
!pip install opencv-python

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
xvfb is already the newest version (2:21.1.4-2ubuntu1.7~22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 15 not upgraded.


In [None]:
import random
import numpy as np
import torch
import torch.nn as nn
import gym
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import convolve, gaussian
from gym.wrappers.monitoring.video_recorder import ImageEncoder
import os
import io
import base64
import time
import glob
from IPython.display import HTML


%matplotlib inline

In [None]:
from gym.wrappers import AtariPreprocessing
from gym.wrappers import FrameStack
from gym.wrappers import TransformReward

def make_env(env_name, clip_rewards=True, seed=None):
    env = gym.make(env_name)
    if seed is not None:
        env.seed(seed)
    env = AtariPreprocessing(env, screen_size=84, scale_obs=True)
    env = FrameStack(env, num_stack=4)
    if clip_rewards:
        env = TransformReward(env, lambda r: np.sign(r))
    return env

env = make_env(env_name)
env.reset()
n_actions = env.action_space.n
state_shape = env.observation_space.shape

In [None]:
n_actions
env.get_action_meanings()

['NOOP', 'FIRE', 'RIGHT', 'LEFT']

In [None]:
def conv2d_size_out(size, kernel_size, stride):
    return (size - (kernel_size - 1) - 1) // stride  + 1


In [None]:
# for the network above
# 1st Conv layer output size
conv1 = conv2d_size_out(84, 8, 4)
print('Conv1: ', conv1)
conv2 = conv2d_size_out(conv1, 4, 2)
print('Conv1: ', conv2)

#number of units entering dense layer would be
print("Input to Dense layer:", conv2*conv2*32) #32 is number of filters coming out in 2nd conv layer

Conv1:  20
Conv1:  9
Input to Dense layer: 2592


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [None]:
class DQNAgent(nn.Module):
    def __init__(self, state_shape, n_actions, epsilon=0):

        super().__init__()
        self.epsilon = epsilon
        self.n_actions = n_actions
        self.state_shape = state_shape

        state_dim = state_shape[0]
        # a simple NN with state_dim as input vector (inout is state s)
        # and self.n_actions as output vector of logits of q(s, a)
        self.network = nn.Sequential()
        self.network.add_module('conv1', nn.Conv2d(4,16,kernel_size=8, stride=4))
        self.network.add_module('relu1', nn.ReLU())
        self.network.add_module('conv2', nn.Conv2d(16,32,kernel_size=4, stride=2))
        self.network.add_module('relu2', nn.ReLU())
        self.network.add_module('flatten', nn.Flatten())
        self.network.add_module('linear3', nn.Linear(2592, 256)) #2592 calculated above
        self.network.add_module('relu3', nn.ReLU())
        self.network.add_module('linear4', nn.Linear(256, n_actions))

        self.parameters = self.network.parameters

    def forward(self, state_t):
        # pass the state at time t through the newrok to get Q(s,a)
        qvalues = self.network(state_t)
        return qvalues

    def get_qvalues(self, states):
        # input is an array of states in numpy and outout is Qvals as numpy array
        states = torch.tensor(states, device=device, dtype=torch.float32)
        qvalues = self.forward(states)
        return qvalues.data.cpu().numpy()

    def sample_actions(self, qvalues):
        # sample actions from a batch of q_values using epsilon greedy policy
        epsilon = self.epsilon
        batch_size, n_actions = qvalues.shape
        random_actions = np.random.choice(n_actions, size=batch_size)
        best_actions = qvalues.argmax(axis=-1)
        should_explore = np.random.choice(
            [0, 1], batch_size, p=[1-epsilon, epsilon])
        return np.where(should_explore, random_actions, best_actions)

In [None]:
from tqdm import trange
from IPython.display import clear_output
import matplotlib.pyplot as plt

In [None]:
def generate_animation(env, agent, save_dir):

    try:
        # env = gym.wrappers.Monitor(
        #     env, save_dir, video_callable=lambda id: True, force=True, mode='evaluation')
        print('starting')
    except gym.error.Error as e:
        print(e)

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    state = env.reset()
    reward = 0
    t = 0
    while True:
        qvalues = agent.get_qvalues([state])
        action = qvalues.argmax(axis=-1)[0]
        state, r, done, _ = env.step(action)
        reward += r
        t += 1
        # plt.imshow(env.render('rgb_array'))
        # plt.show()
        env.env.ale.saveScreenPNG(f'{save_dir}/test_{str(t).zfill(4)}.png')
        time.sleep(0.02)
        if done  or t >= 10000:
            print('Got reward: {}'.format(reward))
            break

def display_animation(filepath):
    video = io.open(filepath, 'r+b').read()
    encoded = base64.b64encode(video)
    return HTML(data='''<video alt="test" controls>
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
                 </video>'''.format(encoded.decode('ascii')))

In [None]:
env_name = "BreakoutNoFrameskip-v4"
save_dir='frames/t_5m'
env = make_env(env_name)
state_dim = env.observation_space.shape
n_actions = env.action_space.n
state = env.reset()

agent = DQNAgent(state_dim, n_actions).to(device)
agent.load_state_dict(torch.load('model_9000000.pth', map_location=lambda storage, loc: storage.cuda(device)))
generate_animation(env, agent, save_dir=save_dir)

starting
Got reward: 23.0


In [None]:
!apt-get -qq install -y ffmpeg
!ffmpeg -framerate 30 -pattern_type glob -i 'frames/t_5m/*.png' -c:v libx264 -pix_fmt yuv420p output.mp4


ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

In [None]:
from google.colab import files

files.download('output.mp4')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>