In [1]:
import torch

In [2]:
torch.randn(32, 64)

tensor([[-0.3681,  1.0725,  1.1745,  ...,  0.6649,  1.8754, -1.1738],
        [ 1.9555,  3.0755, -0.1360,  ...,  0.4308, -0.4491, -0.5433],
        [-0.6081, -0.5714,  1.8643,  ..., -1.3304, -0.9567,  1.0136],
        ...,
        [-1.1398,  0.8878,  0.5903,  ..., -0.1679, -2.0859, -0.3872],
        [-2.0747, -0.8561,  0.4827,  ...,  0.2829, -2.4995,  1.3371],
        [-0.6228,  0.0829, -0.0374,  ...,  0.6859, -0.2517,  0.3112]])

In [3]:
import numpy as np
# Hyper Parameters
BATCH_SIZE = 64
LR_G = 0.0001           # learning rate for generator
LR_D = 0.0001           # learning rate for discriminator
N_IDEAS = 5             # think of this as number of ideas for generating an art work (Generator)
ART_COMPONENTS = 15     # it could be total point G can draw in the canvas
PAINT_POINTS = np.vstack([np.linspace(-1, 1, ART_COMPONENTS) for _ in range(BATCH_SIZE)])

In [4]:
PAINT_POINTS

array([[-1.        , -0.85714286, -0.71428571, -0.57142857, -0.42857143,
        -0.28571429, -0.14285714,  0.        ,  0.14285714,  0.28571429,
         0.42857143,  0.57142857,  0.71428571,  0.85714286,  1.        ],
       [-1.        , -0.85714286, -0.71428571, -0.57142857, -0.42857143,
        -0.28571429, -0.14285714,  0.        ,  0.14285714,  0.28571429,
         0.42857143,  0.57142857,  0.71428571,  0.85714286,  1.        ],
       [-1.        , -0.85714286, -0.71428571, -0.57142857, -0.42857143,
        -0.28571429, -0.14285714,  0.        ,  0.14285714,  0.28571429,
         0.42857143,  0.57142857,  0.71428571,  0.85714286,  1.        ],
       [-1.        , -0.85714286, -0.71428571, -0.57142857, -0.42857143,
        -0.28571429, -0.14285714,  0.        ,  0.14285714,  0.28571429,
         0.42857143,  0.57142857,  0.71428571,  0.85714286,  1.        ],
       [-1.        , -0.85714286, -0.71428571, -0.57142857, -0.42857143,
        -0.28571429, -0.14285714,  0.        , 

In [9]:
np.zeros(shape=(10,2))

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]])

In [10]:
np.zeros_like(np.arange(20).reshape(5,4))

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]])

In [11]:
import torch
from torch import nn
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms


# Things to try
# 1.What happens if you use larger network?
# 2.Better normalization with BatchNorm
# 3.Different learning rate?
# 4.Change architecture to CNN?


class Discriminator(nn.Module):
    def __init__(self, in_features):
        super(Discriminator, self).__init__()
        self.disc = nn.Sequential(
            nn.Linear(in_features, 128),
            nn.LeakyReLU(negative_slope=0.1),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.disc(x)


class Generator(nn.Module):
    def __init__(self, z_dim, img_dim):
        super(Generator, self).__init__()
        self.gen = nn.Sequential(
            nn.Linear(z_dim, 256),
            nn.LeakyReLU(0.1),
            nn.Linear(256, img_dim),
            nn.Tanh()
        )

    def forward(self, x):
        return self.gen(x)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
learning_rate = 3e-4
z_dim = 64
img_dim = 28 * 28 * 1
batch_size = 32
num_epochs = 2
writer_fake = SummaryWriter(f"logs/fake")
writer_real = SummaryWriter(f"logs/real")
step = 0

discriminator = Discriminator(in_features=img_dim).to(device)
generator = Generator(z_dim=z_dim, img_dim=img_dim).to(device)
fixed_noise = torch.randn((batch_size, z_dim)).to(device)
transforms = transforms.Compose([
    # torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0.5,), (0.5,))
    torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=0.5, std=0.5)
])
# 上面的ToTensor()可以完成归一化，同时Normalize把mean和std设置为0.5,可以把所有[0,1]的值投影到[-1,1]的区间内
# 这也是为什么最后的Generator要加一层tanh，是因为tanh会把所有的值投射到[-1,1]

datasets = datasets.MNIST(root='./MNIST_dataset', transform=transforms, download=True)
dataloader = DataLoader(dataset=datasets, batch_size=batch_size, shuffle=True)
optimizer_disc = torch.optim.Adam(params=discriminator.parameters(), lr=learning_rate)
optimizer_gen = torch.optim.Adam(params=generator.parameters(), lr=learning_rate)
loss_fn = nn.BCELoss()

for epoch in range(num_epochs):
    for batch_idx, (imgs, _) in enumerate(dataloader):
        imgs = imgs.reshape(-1, 784).to(device)
        batch_size = imgs.shape[0]   # 这一行与前面的batch_size是一个意思

        ### Train Discriminator: max log(D(x)) + log(1 - D(G(z)))
        # 但通过查看BCELoss的Pytorch官方文档，可以看到其表达式前面存在一个负号，因而对于lossD来说是要Minimize loss
        noise = torch.randn(batch_size, z_dim).to(device)
        print(f'noise.shape:{noise.shape}')   # Size([32, 64])
        fake = generator(noise)
        print(f'fake.shape:{fake.shape}')    # Size([32, 784])
        disc_real = discriminator(imgs).reshape(-1)
        print(f'disc_real.shape:{disc_real.shape}')    # 最后返回的都是一个数，同时最后是在一个batch_size下，所以是batch_size个数
        lossD_real = loss_fn(disc_real, torch.ones_like(disc_real))
        disc_fake = discriminator(fake).reshape(-1)
        print(f'disc_fake.shape:{disc_fake.shape}')   # 最后返回的都是一个数，同时最后是在一个batch_size下，所以是batch_size个数
        # 这里同理，是因为discriminator和generator都是返回的一个数字
        lossD_fake = loss_fn(disc_fake, torch.zeros_like(disc_fake))
        lossD = (lossD_real + lossD_fake) / 2
        optimizer_disc.zero_grad()
        lossD.backward(retain_graph=True)  # retain the grads and the grads will not be freed
        optimizer_disc.step()

        ### Train Generator: min log(1 - D(G(z))) <-> max log(D(G(z))
        # where the second option of maximizing doesn't suffer from
        # saturating gradients
        output = discriminator(fake).reshape(-1)
        lossG = loss_fn(output, torch.ones_like(output))

        optimizer_gen.zero_grad()
        lossG.backward()
        optimizer_gen.step()

        if batch_idx == 0:
            print(
                f"Epoch [{epoch}/{num_epochs}] Batch {batch_idx}/{len(dataloader)} \
                              Loss D: {lossD:.4f}, loss G: {lossG:.4f}"
            )

            with torch.no_grad():
                fake = generator(fixed_noise).reshape(-1, 1, 28, 28)
                data = imgs.reshape(-1, 1, 28, 28)
                img_grid_fake = torchvision.utils.make_grid(fake, normalize=True)
                img_grid_real = torchvision.utils.make_grid(data, normalize=True)

                writer_fake.add_image(
                    "Mnist Fake Images", img_grid_fake, global_step=step
                )
                writer_real.add_image(
                    "Mnist Real Images", img_grid_real, global_step=step
                )
                step += 1

noise.shape:torch.Size([32, 64])
fake.shape:torch.Size([32, 784])
disc_real.shape:torch.Size([32])
disc_fake.shape:torch.Size([32])
Epoch [0/2] Batch 0/1875                               Loss D: 0.6079, loss G: 0.6958
noise.shape:torch.Size([32, 64])
fake.shape:torch.Size([32, 784])
disc_real.shape:torch.Size([32])
disc_fake.shape:torch.Size([32])
noise.shape:torch.Size([32, 64])
fake.shape:torch.Size([32, 784])
disc_real.shape:torch.Size([32])
disc_fake.shape:torch.Size([32])
noise.shape:torch.Size([32, 64])
fake.shape:torch.Size([32, 784])
disc_real.shape:torch.Size([32])
disc_fake.shape:torch.Size([32])
noise.shape:torch.Size([32, 64])
fake.shape:torch.Size([32, 784])
disc_real.shape:torch.Size([32])
disc_fake.shape:torch.Size([32])
noise.shape:torch.Size([32, 64])
fake.shape:torch.Size([32, 784])
disc_real.shape:torch.Size([32])
disc_fake.shape:torch.Size([32])
noise.shape:torch.Size([32, 64])
fake.shape:torch.Size([32, 784])
disc_real.shape:torch.Size([32])
disc_fake.shape:torch.S

KeyboardInterrupt: 

In [12]:
import keyword
keyword.kwlist

['False',
 'None',
 'True',
 '__peg_parser__',
 'and',
 'as',
 'assert',
 'async',
 'await',
 'break',
 'class',
 'continue',
 'def',
 'del',
 'elif',
 'else',
 'except',
 'finally',
 'for',
 'from',
 'global',
 'if',
 'import',
 'in',
 'is',
 'lambda',
 'nonlocal',
 'not',
 'or',
 'pass',
 'raise',
 'return',
 'try',
 'while',
 'with',
 'yield']

In [14]:
import numpy as np
a = np.arange(10).reshape(2,5)
a.shape

(2, 5)

In [16]:
np.squeeze(a).shape

(2, 5)

In [17]:
squares =np.array([[1,4,9,16,25]])
squares.shape

(1, 5)

In [19]:
np.squeeze(squares).shape

(5,)

In [21]:
np.full((2,2),7)

array([[7, 7],
       [7, 7]])

In [23]:
np.random.rand(2)

array([0.71978008, 0.33602709])

In [24]:
np.random.rand(2,2)

array([[0.73465135, 0.83785186],
       [0.19787237, 0.02900143]])

In [26]:
np.random.random((2,2))

array([[0.8523121 , 0.55072865],
       [0.28225761, 0.38511207]])

In [29]:
x = np.random.randint(0,10,(4,6))
print(x)
np.partition(x, 3, axis=1)

[[4 9 6 4 8 1]
 [7 1 2 6 3 9]
 [3 1 9 7 6 9]
 [0 5 9 6 8 0]]


array([[4, 1, 4, 6, 8, 9],
       [1, 2, 3, 6, 7, 9],
       [1, 3, 6, 7, 9, 9],
       [0, 0, 5, 6, 8, 9]])

NameError: name 'mglearn' is not defined