In [1]:
import os
import torch
from torch import nn
import torch.optim as optim

import pandas as pd
import numpy as np

import getpass
import sys
sys.path.insert(0, f'/home/{getpass.getuser()}/dowgan/dowgan')
import Util

In [3]:
# CSV path
folder_name = "data"
file_name = "hungary_chickenpox.csv"
path = os.path.join('../', folder_name, file_name)
#Load CSV into Dataframe
df = pd.read_csv(path,sep=',')
df = df.drop(columns = ['Date'])

In [4]:
df

Unnamed: 0,BUDAPEST,BARANYA,BACS,BEKES,BORSOD,CSONGRAD,FEJER,GYOR,HAJDU,HEVES,JASZ,KOMAROM,NOGRAD,PEST,SOMOGY,SZABOLCS,TOLNA,VAS,VESZPREM,ZALA
0,168,79,30,173,169,42,136,120,162,36,130,57,2,178,66,64,11,29,87,68
1,157,60,30,92,200,53,51,70,84,28,80,50,29,141,48,29,58,53,68,26
2,96,44,31,86,93,30,93,84,191,51,64,46,4,157,33,33,24,18,62,44
3,163,49,43,126,46,39,52,114,107,42,63,54,14,107,66,50,25,21,43,31
4,122,78,53,87,103,34,95,131,172,40,61,49,11,124,63,56,7,47,85,60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
517,95,12,41,6,39,0,16,15,14,10,56,7,13,122,4,23,4,11,110,10
518,43,39,31,10,34,3,2,30,25,19,34,20,18,70,36,5,23,22,63,9
519,35,7,15,0,0,0,7,7,4,2,30,36,4,72,5,21,14,0,17,10
520,30,23,8,0,11,4,1,9,10,17,27,17,21,12,5,17,1,1,83,2


In [5]:
embeddings = nn.Embedding(df.shape[1],100)
embeddings.weight.requires_grad = False

In [6]:
class Discriminator(nn.Module):
    def __init__(self, embeddings, nc=3, ndf=64):
        super(Discriminator, self).__init__()
        
        self.embeddings = embeddings
        self.label_to_image = nn.Linear(100,32*32*3)
        self.conv1 = nn.Conv2d(nc * 2, nc, 1, 1, 0, bias=False)
        
        self.main = nn.Sequential(
            # input is (nc) x 64 x 64
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf) x 32 x 32
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 16 x 16
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 8 x 8
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*8) x 4 x 4
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )
        
    def forward(self, x, label_embed):
        
        label_embed = self.embeddings(label_embed)

        label_map = self.label_to_image(label_embed)
        label_map = label_map.view(-1,3,32,32)

        x = torch.cat([x,label_map], dim=1)

        out = self.conv1(x)
        output = self.main(out)

        return output

In [7]:
class Generator(nn.Module):
    def __init__(self, embeddings, nc=3, nz=100, ngf=64):
        super(Generator, self).__init__()
        
        self.embeddings = embeddings
        self.linear = nn.Linear(200,100)
        
        self.main = nn.Sequential(
            # input is Z, going into a convolution
            nn.ConvTranspose2d(     nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            # state size. (ngf*4) x 8 x 8
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 16 x 16
            nn.ConvTranspose2d(ngf * 2,     ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            # state size. (ngf) x 32 x 32
            nn.ConvTranspose2d(    ngf,      nc, 4, 2, 1, bias=False),
            nn.Tanh()
            # state size. (nc) x 64 x 64
        )
        
    def forward(self, x, label_embed):
        label_embed = self.embeddings(label_embed)

        x = x.view(-1,100)
        x = torch.cat([x,label_embed], dim=1)

        x = self.linear(x)
        x = x.unsqueeze(2).unsqueeze(3)

        output = self.main(x)
        return output

In [14]:
# Set random state
torch.manual_seed(111)
# Select # of data points from data set to train GAN
num_data = 450
# Specify Validation Data
val_data = 50
# Specify dimensionality of dataframe
df_dim = len(df.columns)
# learning rate
lr = 0.002
# number of epoch
epochs = 1000
# Discriminator and Generator dropout fro data standardization
drop_out=0.3
# Define batch size for the data loader
batch_size = 25

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [19]:
# These create the arrays and then turns them into tensors for the train loader 
arrays = Util.create_arrays(df[:num_data],num_data)
train_set = Util.create_tensors(arrays)

train_data_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)


embeddings = nn.Embedding(10,100)
embeddings.weight.requires_grad = False
embeddings(torch.LongTensor())

netD = Discriminator(embeddings)
netG = Generator(embeddings)


optimizerD = optim.Adam(netD.parameters(),lr=0.0002,betas=(0.5, 0.999))
optimizerG = optim.Adam(netG.parameters(),lr=0.0002,betas=(0.5, 0.999))

netD.train()
netG.train()

nz = 100

criterion = nn.BCELoss()

real_label = torch.ones([batch_size,1], dtype=torch.float).to(device)
fake_label = torch.zeros([batch_size,1], dtype=torch.float).to(device)


for epoch in range(epochs):
    for i, (input_sequence, label) in enumerate(train_data_loader):
        
        fixed_noise = torch.randn(batch_size, nz, 1, 1, device=device)

        input_sequence = input_sequence.to(device)
        label_embed = label.to(device).long()
        
        '''
            Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        '''

        D_real_result = netD(input_sequence, label_embed)
        D_real_loss = criterion(D_real_result.view(batch_size,-1), real_label)

        G_result = netG(fixed_noise,label_embed)

        D_fake_result = netD(G_result,label_embed)

        D_fake_loss = criterion(D_fake_result.view(batch_size,-1), fake_label)

        # Back propagation
        D_train_loss = (D_real_loss + D_fake_loss) / 2

        netD.zero_grad()
        D_train_loss.backward()
        optimizerD.step()

        '''
            Update G network: maximize log(D(G(z)))
        '''
        new_label = torch.LongTensor(batch_size,10).random_(0, 10).to(device)
        new_embed = new_label[:,0].view(-1)

        G_result = netG(fixed_noise, new_embed)

        D_fake_result = netD(G_result, new_embed)
        G_train_loss = criterion(D_fake_result.view(batch_size,-1), real_label)


        # Back propagation
        netD.zero_grad()
        netG.zero_grad()
        G_train_loss.backward()
        optimizerG.step()
        
        print("D_loss:%f\tG_loss:%f" % (D_train_loss,G_train_loss))

RuntimeError: Tensors must have same number of dimensions: got 2 and 4