In [6]:
import torch
import numpy as np
print("numpy version is:", np.__version__)
print("torch version is:", torch.__version__)

numpy version is: 1.26.4
torch version is: 2.3.1+cu121


In [10]:
a = torch.FloatTensor(3, 2)
a.zero_()
print(a)

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])


In [16]:
n = np.zeros(shape=(2,3))
print(n)

[[0. 0. 0.]
 [0. 0. 0.]]


In [18]:
m = torch.tensor(n, dtype=torch.int32)
print(m)

tensor([[0, 0, 0],
        [0, 0, 0]], dtype=torch.int32)


In [51]:
import torch.nn as nn
device = torch.device('cuda', 0)
print(device)
l = nn.Linear(2,5).to(device)
v = torch.tensor([1,2], dtype=torch.float32).to(device)
print([i for i in l.parameters()])
print(v)

cuda:0
[Parameter containing:
tensor([[-0.0380, -0.2205],
        [ 0.1677, -0.0954],
        [ 0.2849,  0.0397],
        [ 0.3442, -0.3085],
        [-0.2297, -0.4714]], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.2561, -0.6667,  0.2437, -0.5131,  0.0876], device='cuda:0',
       requires_grad=True)]
tensor([1., 2.], device='cuda:0')


In [33]:
l(v)

tensor([ 1.4055,  1.3871,  0.1770, -0.9062,  0.6648], device='cuda:0',
       grad_fn=<ViewBackward0>)

In [40]:
s = nn.Sequential(
    nn.Linear(2, 5),
    nn.ReLU(),
    nn.Linear(5,20),
    nn.ReLU(),
    nn.Linear(20, 10),
    nn.Dropout(p=0.3),
    nn.Softmax(dim=1)
).to(device)
print(s)

Sequential(
  (0): Linear(in_features=2, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=20, bias=True)
  (3): ReLU()
  (4): Linear(in_features=20, out_features=10, bias=True)
  (5): Dropout(p=0.3, inplace=False)
  (6): Softmax(dim=1)
)


In [49]:
s(torch.tensor([[1,2]], dtype=torch.float32).to(device))

tensor([[0.1411, 0.1268, 0.1070, 0.0993, 0.1025, 0.0630, 0.1025, 0.0774, 0.1025,
         0.0779]], device='cuda:0', grad_fn=<SoftmaxBackward0>)

In [1]:
import gymnasium as gym

In [2]:
env = gym.make("AirRaid-v0")

  logger.deprecation(
A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


In [4]:
print(env.observation_space)

Box(0, 255, (250, 160, 3), uint8)


In [61]:
import random
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

import torchvision.utils as vutils

import gymnasium as gym
#import gymnasium.spaces as gym.spaces

import numpy as np

log = gym.logger
log.set_level(gym.logger.INFO)

In [62]:
# Global variables

LATENT_VECTOR_SIZE = 100
DISCR_FILTERS = 64
GENER_FILTERS = 64
BATCH_SIZE = 16

IMAGE_SIZE = 64

LEARNING_RATE = 0.0001
REPORT_EVERY_ITER = 100
SAVE_IMAGE_EVERY_ITER = 1000

In [63]:
# Wrapper for observation preprocessing

class PreprocessingObservationWrapper(gym.ObservationWrapper):
    """
    Preprocessing of input numpy array:
    1. resize image into predefined size
    2. move color channel axis to a first place
    """
    def __init__(self, *args):
        super(PreprocessingObservationWrapper, self).__init__(*args)
        # Why do we need it?
        assert isinstance(self.observation_space, gym.spaces.Box) 
        old_space = self.observation_space
        self.observation_space = gym.spaces.Box(self.observation(old_space.low), self.observation(old_space.high),
                                                dtype=np.float32)
    
    def observation(self, observation):
        new_observation = cv2.resize(observation, (IMAGE_SIZE, IMAGE_SIZE))
        new_observation = np.moveaxis(new_observation, -1, 0)
        return new_observation.astype(np.float32)


In [64]:
# Class for discriminator

class Discriminator(nn.Module):
    def __init__(self, input_shape):
        super(Discriminator, self).__init__()
        # this pipe converges image into the single number
        self.conv_pipe = nn.Sequential(
            nn.Conv2d(in_channels=input_shape[0], out_channels=DISCR_FILTERS,
                      kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=DISCR_FILTERS, out_channels=DISCR_FILTERS*2,
                      kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(DISCR_FILTERS*2),
            nn.ReLU(),
            nn.Conv2d(in_channels=DISCR_FILTERS * 2, out_channels=DISCR_FILTERS * 4,
                      kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(DISCR_FILTERS * 4),
            nn.ReLU(),
            nn.Conv2d(in_channels=DISCR_FILTERS * 4, out_channels=DISCR_FILTERS * 8,
                      kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(DISCR_FILTERS * 8),
            nn.ReLU(),
            nn.Conv2d(in_channels=DISCR_FILTERS * 8, out_channels=1,
                      kernel_size=4, stride=1, padding=0),
            nn.Sigmoid()
        )

    def forward(self, x):
        conv_out = self.conv_pipe(x)
        return conv_out.view(-1, 1).squeeze(dim=1)

In [65]:
# Class for generator

class Generator(nn.Module):
    def __init__(self, output_shape):
        super(Generator, self).__init__()
        # pipe deconvolves input vector into (3, 64, 64) image
        self.pipe = nn.Sequential(
            nn.ConvTranspose2d(in_channels=LATENT_VECTOR_SIZE, out_channels=GENER_FILTERS * 8,
                               kernel_size=4, stride=1, padding=0),
            nn.BatchNorm2d(GENER_FILTERS * 8),
            nn.ReLU(),
            nn.ConvTranspose2d(in_channels=GENER_FILTERS * 8, out_channels=GENER_FILTERS * 4,
                               kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(GENER_FILTERS * 4),
            nn.ReLU(),
            nn.ConvTranspose2d(in_channels=GENER_FILTERS * 4, out_channels=GENER_FILTERS * 2,
                               kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(GENER_FILTERS * 2),
            nn.ReLU(),
            nn.ConvTranspose2d(in_channels=GENER_FILTERS * 2, out_channels=GENER_FILTERS,
                               kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(GENER_FILTERS),
            nn.ReLU(),
            nn.ConvTranspose2d(in_channels=GENER_FILTERS, out_channels=output_shape[0],
                               kernel_size=4, stride=2, padding=1),
            nn.Tanh()
        )

    def forward(self, x):
        return self.pipe(x)

In [66]:
# Form a batch of observations to pass to discriminator

def iterate_batches(envs, batch_size=BATCH_SIZE):
    batch = [e.reset()[0] for e in envs]
    env_gen = iter(lambda: random.choice(envs), None)

    while True:
        e = next(env_gen)
        obs, reward, terminated, truncated, _ = e.step(e.action_space.sample())
        if np.mean(obs) > 0.01:
            batch.append(obs)
        if len(batch) == batch_size:
            # Normalising input between -1 to 1
            batch_np = np.array(batch, dtype=np.float32) * 2.0 / 255.0 - 1.0
            yield torch.tensor(batch_np)
            batch.clear()
        if terminated or truncated:
            e.reset()

In [67]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
envs = [PreprocessingObservationWrapper(gym.make(name)) for name in ('Breakout-v0', )]
input_shape = envs[0].observation_space.shape

In [68]:
net_discr = Discriminator(input_shape=input_shape).to(device)
net_gener = Generator(output_shape=input_shape).to(device)

objective = nn.BCELoss()
gen_optimizer = optim.Adam(params=net_gener.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.999))
dis_optimizer = optim.Adam(params=net_discr.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.999))
writer = SummaryWriter()

gen_losses = []
dis_losses = []
iter_no = 0

true_labels_v = torch.ones(BATCH_SIZE, dtype=torch.float32, device=device)
fake_labels_v = torch.zeros(BATCH_SIZE, dtype=torch.float32, device=device)

In [69]:
for batch_v in iterate_batches(envs):
    # generate extra fake samples, input is 4D: batch, filters, x, y
    gen_input_v = torch.FloatTensor(BATCH_SIZE, LATENT_VECTOR_SIZE, 1, 1).normal_(0, 1).to(device)
    batch_v = batch_v.to(device)
    gen_output_v = net_gener(gen_input_v)

    # train discriminator
    dis_optimizer.zero_grad()
    dis_output_true_v = net_discr(batch_v)
    dis_output_fake_v = net_discr(gen_output_v.detach())
    dis_loss = objective(dis_output_true_v, true_labels_v) + objective(dis_output_fake_v, fake_labels_v)
    dis_loss.backward()
    dis_optimizer.step()
    dis_losses.append(dis_loss.item())

    # train generator
    gen_optimizer.zero_grad()
    dis_output_v = net_discr(gen_output_v)
    gen_loss_v = objective(dis_output_v, true_labels_v)
    gen_loss_v.backward()
    gen_optimizer.step()
    gen_losses.append(gen_loss_v.item())

    iter_no += 1
    if iter_no % REPORT_EVERY_ITER == 0:
        log.info("Iter %d: gen_loss=%.3e, dis_loss=%.3e", iter_no, np.mean(gen_losses), np.mean(dis_losses))
        writer.add_scalar("gen_loss", np.mean(gen_losses), iter_no)
        writer.add_scalar("dis_loss", np.mean(dis_losses), iter_no)
        gen_losses = []
        dis_losses = []
    if iter_no % SAVE_IMAGE_EVERY_ITER == 0:
        writer.add_image("fake", vutils.make_grid(gen_output_v.data[:64], normalize=True), iter_no)
        writer.add_image("real", vutils.make_grid(batch_v.data[:64], normalize=True), iter_no)

INFO: Iter 100: gen_loss=5.233e+00, dis_loss=4.609e-02
INFO: Iter 200: gen_loss=6.763e+00, dis_loss=2.808e-03
INFO: Iter 300: gen_loss=7.284e+00, dis_loss=1.467e-03
INFO: Iter 400: gen_loss=7.573e+00, dis_loss=1.048e-03
INFO: Iter 500: gen_loss=7.769e+00, dis_loss=8.226e-04
INFO: Iter 600: gen_loss=8.044e+00, dis_loss=6.051e-04
INFO: Iter 700: gen_loss=8.347e+00, dis_loss=4.337e-04
INFO: Iter 800: gen_loss=8.551e+00, dis_loss=3.661e-04
INFO: Iter 900: gen_loss=8.766e+00, dis_loss=3.031e-04
INFO: Iter 1000: gen_loss=8.168e+00, dis_loss=3.476e-01
INFO: Iter 1100: gen_loss=4.498e+00, dis_loss=3.942e-01
INFO: Iter 1200: gen_loss=5.207e+00, dis_loss=1.310e-01
INFO: Iter 1300: gen_loss=6.202e+00, dis_loss=7.613e-03
INFO: Iter 1400: gen_loss=6.641e+00, dis_loss=3.284e-03
INFO: Iter 1500: gen_loss=7.159e+00, dis_loss=1.912e-03
INFO: Iter 1600: gen_loss=7.680e+00, dis_loss=1.025e-03
INFO: Iter 1700: gen_loss=7.864e+00, dis_loss=7.844e-04
INFO: Iter 1800: gen_loss=8.170e+00, dis_loss=5.454e-04
I

KeyboardInterrupt: 

In [32]:
# баловство с tensorboard

import torchvision
writer = SummaryWriter()
env = PreprocessingObservationWrapper(gym.make("AirRaid-v0"))
for i in range(0, 100):
    writer.add_image('my_image', env.reset()[0], i)


In [2]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()
x = range(100)
for i in x:
    writer.add_scalar('y=2x', i * 2, i)
writer.close()

In [1]:
import math
from torch.utils.tensorboard import SummaryWriter


if __name__ == "__main__":
    writer = SummaryWriter()

    funcs = {"sin": math.sin, "cos": math.cos, "tan": math.tan}

    for angle in range(-360, 360):
        angle_rad = angle * math.pi / 180
        for name, fun in funcs.items():
            val = fun(angle_rad)
            writer.add_scalar(name, val, angle)

    writer.close()