In [1]:
import torch
from torch import nn, optim
from torch.autograd.variable import Variable
from torch.utils.data import DataLoader  # Dataset management. It helps us create mini-batches to train and ..
import torchvision.datasets as datasets  # Datasets such as MNIST
import torchvision.transforms as transforms  # transformations that we can do on our datasets 
from util import Logger
import numpy as np
import matplotlib.pyplot as plt

## Hyperparameters

In [2]:
discriminator_input_size = 784  # 28*28 pixels
generator_input_size = 100
discriminator_output_size = 1  # a probability in range [0-1]
learning_rate = 0.0002
batch_size = 100
num_epochs = 5

## Set device

In [3]:
device = torch.device('cpu')

## Reading From Dataset

In [4]:
# Train data
train_dataset = datasets.MNIST(root='dataset/',
                train=True,
                transform=transforms.ToTensor(), # The data is numpy and we want it to be transformed to PyTorch Tensors
                download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [16]:
train_dataset[0]

(tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 

## Creating Discriminator & Generator

In [5]:
class Discriminator(torch.nn.Module):
    def __init__(self, input_size, output_size):
        super(Discriminator, self).__init__()

        self.hidden1 = nn.Sequential( 
            nn.Linear(input_size, 1024),
            nn.LeakyReLU(0.2),    # the slope for negative X values in ReLU function
            nn.Dropout(0.3)    # the probability of dropping out neurons
        )
        self.hidden2 = nn.Sequential(
            nn.Linear(1024, 512),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3)
        )
        self.hidden3 = nn.Sequential(
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3)
        )
        self.output = nn.Sequential(
            torch.nn.Linear(256, output_size),
            torch.nn.Sigmoid()
        )

    def forward(self, x):
        x = self.hidden1(x)
        x = self.hidden2(x)
        x = self.hidden3(x)
        x = self.output(x)
        return x
    
discriminator = Discriminator(input_size=discriminator_input_size, output_size=discriminator_output_size).to(device=device)

In [6]:
class Generator(torch.nn.Module):
    def __init__(self, input_size, output_size):
        super(Generator, self).__init__()
        
        self.hidden0 = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.LeakyReLU(0.2)
        )
        self.hidden1 = nn.Sequential(            
            nn.Linear(256, 512),
            nn.LeakyReLU(0.2)
        )
        self.hidden2 = nn.Sequential(
            nn.Linear(512, 1024),
            nn.LeakyReLU(0.2)
        )
        
        self.output = nn.Sequential(
            nn.Linear(1024, output_size),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.hidden0(x)
        x = self.hidden1(x)
        x = self.hidden2(x)
        x = self.output(x)
        return x
generator = Generator(input_size=generator_input_size, output_size=discriminator_input_size).to(device=device)

## Loss and Optimizier

In [7]:
loss = nn.BCELoss()

In [8]:
d_optimizer = optim.Adam(discriminator.parameters(), lr=learning_rate)
g_optimizer = optim.Adam(generator.parameters(), lr=learning_rate)

## Train functions

In [9]:
def ones_target(size):
    '''
    Tensor containing ones, with shape = size
    '''
    data = Variable(torch.ones(size, 1))
    return data

def zeros_target(size):
    '''
    Tensor containing zeros, with shape = size
    '''
    data = Variable(torch.zeros(size, 1))
    return data

In [10]:
def train_discriminator(optimizer, real_data, fake_data):
    number_of_data = real_data.size(0)
    optimizer.zero_grad()
    
    # 1.1 Train on Real Data
    prediction_real = discriminator(real_data)
    # Calculate error and backpropagate
    error_real = loss(prediction_real, ones_target(number_of_data) )
    error_real.backward()

    # 1.2 Train on Fake Data
    prediction_fake = discriminator(fake_data)
    # Calculate error and backpropagate
    error_fake = loss(prediction_fake, zeros_target(number_of_data))
    error_fake.backward()
    
    # 1.3 Update weights with gradients
    optimizer.step()
    
    # Return error and predictions for real and fake inputs
    return error_real + error_fake, prediction_real, prediction_fake

In [11]:
def train_generator(optimizer, fake_data):
    number_of_data = fake_data.size(0)

    # Reset gradients
    optimizer.zero_grad()

    # Sample noise and generate fake data
    prediction = discriminator(fake_data)

    # Calculate error and backpropagate
    error = loss(prediction, ones_target(number_of_data))
    error.backward()

    # Update weights with gradients
    optimizer.step()

    return error

## Creating test noise for the generator

In [12]:
def create_noise(size):
    return torch.randn(size, 100).to(device=device)

In [13]:
num_test_samples = 16
test_noise = Variable(create_noise(num_test_samples))

## Train Network

In [14]:
logger = Logger(model_name='VGAN', data_name='MNIST')

for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        # Train Discriminator        
        real_data = data.to(device=device)  # data size is batch_sizex1x28x28. Since we have batch_size images in each batch having 1 channel each.
        
        fake_data = generator(create_noise(batch_size)).detach()
        fake_data = fake_data.to(device=device)
        
        d_error, d_pred_real, d_pred_fake = train_discriminator(d_optimizer, real_data, fake_data)
        
        # Train Generator
        fake_data = generator(create_noise(batch_size))
        
        g_error = train_generator(g_optimizer, fake_data)
        
        logger.log(d_error, g_error, epoch, batch_number, batch_numbers)
        
        if batch_number % 100 == 0:
            test_vectors = generator(test_noise)
            test_images = test_vectors.view(test_vectors.size(0), 1, 28, 28)
            
            logger.log_images(
                test_images, num_test_samples, 
                epoch, batch_number, batch_numbers
            );
            
            # Display status Logs
            logger.display_status(
                epoch, number_of_epochs, batch_number, batch_numbers,
                d_error, g_error, d_pred_real, d_pred_fake
            )
        
    


RuntimeError: mat1 and mat2 shapes cannot be multiplied (2800x28 and 784x1024)