In [0]:
import numpy as np
import random 
import copy
import seaborn as sns
from sklearn import datasets
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
from matplotlib import pyplot as plt
from matplotlib.pyplot import figure
from datetime import datetime

import os
from google.colab import files

DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu'
torch.cuda.is_available()

True

In [0]:
# Data params

# ##### DATA: Target data and generator input data
def get_x(size, num_of_gaus=None):
  if num_of_gaus ==8:
    scale = 2.
    centers = [
        (1,0),
        (-1,0),
        (0,1),
        (0,-1),
        (1./np.sqrt(2), 1./np.sqrt(2)),
        (1./np.sqrt(2), -1./np.sqrt(2)),
        (-1./np.sqrt(2), 1./np.sqrt(2)),
        (-1./np.sqrt(2), -1./np.sqrt(2))
    ]
    centers = [(scale*x,scale*y) for x,y in centers]
    dataset = []
    for i in range(size):
        point = np.random.randn(2)*.02
        center = random.choice(centers)
        point[0] += center[0]
        point[1] += center[1]
        dataset.append(point)
    dataset = np.array(dataset, dtype='float32')
#     dataset /= 1.414 # stdev
    return torch.from_numpy(dataset )

  if num_of_gaus == 25:
    centers = []
    for x in range(-2,3):
      for y in range(-2,3):
        centers.append((x,y))  
      dataset = []
    for i in range(size): # consider doing more points
        point = np.random.randn(2)*.02
        center = random.choice(centers)
        point[0] += center[0]
        point[1] += center[1]
        dataset.append(point)
    dataset = np.array(dataset, dtype='float32')
#     dataset /= 2.828 # stdev
    return torch.from_numpy(dataset)

  if num_of_gaus == 0:
    data = datasets.make_swiss_roll(
        n_samples=size, 
        noise=0.25
    )[0]
    dataset = data.astype('float32')[:, [0, 2]]
    dataset /= 7.5 # stdev plus a little
    return torch.from_numpy(dataset)


def get_z(m, n=2):  # sample_Z(batch_size,2) 
    return torch.tensor(np.random.uniform(-1., 1., size=[m, n]), dtype=torch.float32 )


         
def extract(v):
    return v.data.storage().tolist()
  
def calc_g_loss(loss_type = None, d_gz = None , mini_batch_size= None, loss_BCE = None ,loss_MSE= None ):
  if loss_type == 'logD': #  - w * ( ylog(D(x)) + (1-y)log(1 - D(x)) )  
    g_loss = loss_BCE(d_gz, Variable(torch.ones([mini_batch_size,1])).to(DEVICE)) # - E[ log(D(G(Z)) ]
      
  elif loss_type =='minimax':
    g_loss = (-1)*loss_BCE(d_gz, Variable(torch.zeros([mini_batch_size,1])).to(DEVICE)) # E[ log( 1 - D(G(Z)) ) ]
        
  elif loss_type=='ls':
    g_loss = loss_MSE(d_gz, Variable(torch.ones([mini_batch_size,1])).to(DEVICE)) #  mean ( (x - y)**2 ) -> E[D(G(Z)) - 1)^2]
    
  return g_loss
  
def get_fit(some_list):
      return np.argmax(some_list)
  
def calc_fq(d_fake_decision = None):
    return torch.mean(d_fake_decision).data.cpu().numpy() # old torch.mean(d_fake_error).detach().data.cpu().numpy()
  
def calc_fd(d_real_decision, d_fake_error, loss_BCE, D, mini_batch_size=None):
    V = loss_BCE(d_real_decision, Variable(torch.ones([mini_batch_size,1])).to(DEVICE)) \
    + d_fake_error

    delta_D = torch.autograd.grad(outputs=V, inputs= D.parameters(),
                          grad_outputs=torch.ones(V.size()).to(DEVICE),
                          only_inputs=True)
    with torch.no_grad():
        for i, grad in enumerate(delta_D):
            grad = grad.view(-1)
            allgrad = grad if i == 0 else torch.cat([allgrad,grad]) 
        Fd = torch.log(torch.norm(allgrad)).detach().data.cpu().numpy() # usewd to be .numpy
    return Fd

def calc_precision(d_real_decision):
    return  (torch.sum(d_real_decision >= 0.5, dtype=torch.float)/ len(d_real_decision)).item() # rule will return 1 or 0, sum over these and divide by total to get precision (% classed correctly)
    

def calc_F(fq,fd,gamma1,gamma2):
    return (gamma1*fq + gamma2*fd)


def get_D_decision_from_fake(g_sample = None, G= None, D= None): #DETACHES
    d_gen_input = Variable(g_sample).to(DEVICE) #gi_sampler(minibatch_size, g_input_size)
    d_fake_data = G(d_gen_input).detach()  # detach to avoid training G on these labels
    d_fake_decision = D(d_fake_data)
    return d_fake_decision

def get_D_decision_from_real(d_sampler = None, G= None, D= None):
    d_real_data = Variable(d_sampler).to(DEVICE) 
    d_real_decision = D(d_real_data) 
    return d_real_decision
    
    

def create_save_figure():
    plt.figure()
    g_plot = g_fake_data.detach().cpu().numpy()
    xax = plt.scatter(x_plot[:,0], x_plot[:,1], s=10)
    gax = plt.scatter(g_plot[:,0],g_plot[:,1], s=1)

    plt.legend((xax,gax), ("Real Data","Generated Data"))
    plt.savefig('final_plot_'+dt+'.png', bbox_inches='tight')
    plt.show()
    
def result_evaluate(dataset, num_of_gaus, radi=0.1):
# for each node, calculate the number of points within a radius
# each node should have n/num_of_gaus, allow each node a 5% variation, anything over/under that reduce its score
# so if the node has 80 when it should have 100 score it 80/100, likewise for 130 -> 70/100
# sum up the scores and take an average
    size = len(dataset)
    centers = centers_test(num_of_gaus)
    dataset = dataset
    i=0
    scores = [0] * len(centers)
    for center in centers:
        x,y = center
        for data in dataset:
            if x-radi <= data[0] <= x+radi and y-radi <= data[1] <= y+radi:
                scores[i] += 1
        i+=1
    scores=np.array(scores, dtype='float32')
#     print(scores)
#         print(sum(scores))
    scores/=(size/num_of_gaus)
    # scores are now decimals

    for i in range(len(scores)):
        if scores[i] > 2:
          scores[i] = 0.1
        if scores[i] > 1:
          scores[i] = abs(1 - round(scores[i] -1, 3) )
#         print(scores)   
    total_score = sum(scores)/num_of_gaus
#     print(total_score)
    return total_score

def centers_test(num_of_centers):
  if num_of_centers==8:
    scale = 2.
    centers = [
        (1,0),
        (-1,0),
        (0,1),
        (0,-1),
        (1./np.sqrt(2), 1./np.sqrt(2)),
        (1./np.sqrt(2), -1./np.sqrt(2)),
        (-1./np.sqrt(2), 1./np.sqrt(2)),
        (-1./np.sqrt(2), -1./np.sqrt(2))
    ]
    centers = [(scale*x,scale*y) for x,y in centers]
    a=[]
    for center in centers:
        b=[0,0]
        b[0] = center[0]
        b[1] = center[1]
        a+=[b]
    centers = np.array(a)
    return centers

  if num_of_centers == 25:
    centers = []
    for x in range(-2,3):
      for y in range(-2,3):
        centers.append((x,y)) 
    return centers
  
class Generator(nn.Module):
    def __init__(self, input_size =2 , hidden_size = 512, output_size = 2):
        super(Generator, self).__init__()
        self.map1 = nn.Linear(input_size, hidden_size)
        self.map2 = nn.Linear(hidden_size, hidden_size)
        self.map3 = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        x = self.map1(x)
        x = F.leaky_relu(x)
        x = self.map2(x)
        x = F.leaky_relu(x)
        x = self.map3(x)
        return x
    def init_normal_weights(self):
        var = 0.02
        self.map1.weight.data.normal_(0.0,var)
        self.map1.bias.data.normal_(0.0, var)
        self.map2.weight.data.normal_(0.0, var)
        self.map2.bias.data.normal_(0.0, var)
        self.map3.weight.data.normal_(0.0, var)
        self.map3.bias.data.normal_(0.0, var)

class Discriminator(nn.Module):
    def __init__(self, input_size = 2, hidden_size=512, output_size=1): # f =torch.sigmoid
        super(Discriminator, self).__init__()
        self.map1 = nn.Linear(input_size, hidden_size)
        self.map2 = nn.Linear(hidden_size, hidden_size)
        self.map3 = nn.Linear(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = F.leaky_relu(self.map1(x))
        x = F.leaky_relu(self.map2(x))
        x = F.leaky_relu(self.map3(x))
        return F.sigmoid(self.out(x))
    def init_normal_weights(self):
        var = 0.02
        self.map1.weight.data.normal_(0.0,var)
        self.map1.bias.data.normal_(0.0, var)
        self.map2.weight.data.normal_(0.0, var)
        self.map2.bias.data.normal_(0.0, var)
        self.map3.weight.data.normal_(0.0, var)
        self.map3.bias.data.normal_(0.0, var)
 

In [0]:
def train_results_version(mutation_types =  ['logD', 'minimax', 'ls'], 
        d_learning_rate = 0.0001,g_learning_rate = 0.0001,
        d_steps = 1, g_steps=1,
        save_logs = True, save_figure = True, print_fig = True,
        num_of_gaus = 8, g1= 1, g2=1, random_seed_val = 42
          ):
#     manualSeed = 42
#     random.seed(manualSeed)
#     torch.manual_seed(manualSeed)   
    dt = str( datetime.today().strftime('%Y-%m-%d-%H:%M') ) 
    dt = dt.replace(':', '-')
    
    # Model parameters  
    minibatch_size = 64
    batchSize = 256 #they use 64 for small things but 256 for the images 
    num_epochs = 50 * 1000
    show_epoch =  num_epochs  *0.1    
    x_plot = get_x(batchSize,num_of_gaus)
    
    dfe, dre, ge = 0, 0, 0
    d_real_data, d_fake_data, g_fake_data = None, None, None

    d_sampler = get_x(minibatch_size,num_of_gaus)
    gi_sampler = get_z(minibatch_size)
    G = Generator().to(DEVICE)
    D = Discriminator().to(DEVICE)
    G.init_normal_weights()
    D.init_normal_weights()
    
    loss_BCE = nn.BCELoss().to(DEVICE) #Binary cross entropy: http://pytorch.org/docs/nn.html#bceloss
#     loss_BCE = nn.BCEWithLogitsLoss().to(DEVICE)
    loss_MSE = nn.MSELoss().to(DEVICE)
    d_optimizer = optim.Adam(D.parameters(), lr=d_learning_rate, betas=[0.5, 0.99])
    g_optimizer = optim.Adam(G.parameters(), lr=g_learning_rate, betas=[0.5, 0.99])
    
#     mutation_types =  ['logD', 'minimax', 'ls']
    if save_logs==True:
        fit_log = np.zeros(shape=(num_epochs,len(mutation_types)*2))
        
    tally = [0,0,0]
    mut_freq = []
    green_plot = []
    
    for epoch in range(num_epochs):#num_epochs
        for d_index in range(d_steps):
            # Get data
            D.zero_grad()
            d_sampler = get_x(minibatch_size,num_of_gaus)
            gi_sampler = get_z(minibatch_size)
            #  1A: Train D on real 
             
            d_real_decision = get_D_decision_from_real(d_sampler, G, D)
            d_real_error = loss_BCE(d_real_decision, Variable(torch.ones([minibatch_size,1])).to(DEVICE))  # ones = true (y=1), x=D(x) ... -( ylog(D(x)) + (1-y)log(1 - D(x)) ) 
            d_real_error.backward() # compute/store gradients, but don't change params

            #  1B: Train D on fake
            d_fake_decision = get_D_decision_from_fake(gi_sampler, G, D) # DETACHES
            d_fake_error = loss_BCE(d_fake_decision, Variable(torch.zeros([minibatch_size,1])).to(DEVICE))  # zeros = fake (y=0), x=D(g(z)) ... -( ylog(D(x)) + (1-y)log(1 - D(x)) ) 
            d_fake_error.backward()
            d_optimizer.step()     # Only optimizes D's parameters; changes based on stored gradients from backward()

            dre, dfe = extract(d_real_error)[0], extract(d_fake_error)[0] # created fn returns errors

        for g_index in range(g_steps):

          
            fitness_list = []
            generator_list = []
            counter = 0
            for mutation in mutation_types:
                G.zero_grad() # resets graph thingy
                ''' get child from G's + get data  '''
                child_G = copy.deepcopy(G)
                
                g_data = get_z(minibatch_size)
                d_data = get_x(minibatch_size, num_of_gaus)
                
                # CALCULATE D OUTPUT TO UPDATE G NETWORK
                d_gen_input = Variable(g_data).to(DEVICE) #gi_sampler(minibatch_size, g_input_size)
                d_fake_data = child_G(d_gen_input)  # dont detach - want to train G here
                d_fake_decision = D(d_fake_data)
                ''' update network '''
                # CALCULATE G LOSS
                if mutation == 'logD': #  - w * ( ylog(D(x)) + (1-y)log(1 - D(x)) )  
                  child_loss =      loss_BCE(d_fake_decision, Variable(torch.ones([minibatch_size,1])).to(DEVICE)) # - E[ log(D(G(Z)) ]
                elif mutation =='minimax':
                  child_loss = (-1)*loss_BCE(d_fake_decision, Variable(torch.zeros([minibatch_size,1])).to(DEVICE)) # E[ log( 1 - D(G(Z)) ) ]
                elif mutation=='ls':
                  child_loss =      loss_MSE(d_fake_decision, Variable(torch.ones([minibatch_size,1])).to(DEVICE)) #  mean ( (x - y)**2 ) -> E[D(G(Z)) - 1)^2]
                  
#                 child_loss = calc_g_loss(mutation, d_fake_decision, minibatch_size, loss_BCE, loss_MSE)
                
                child_loss.backward() # compute/store gradients, but don't change params
    
                child_opt = optim.Adam(child_G.parameters(), lr=g_learning_rate, betas=[0.5, 0.999])
                child_opt.step()

                ''' fitness '''
                # Fq = Ez[D(G(z))] , Fd =  ( − log||∇D − Ex[log D(x)] − Ez[log(1 − D(G(z)))] || )             
                d_fake_data = G(d_gen_input).detach()  # detach to avoid training G on these labels
                d_fake_decision = D(d_fake_data)
#                 d_fake_decision = get_D_decision_from_fake(g_data, child_G, D) # DETACHES

                d_fake_error = loss_BCE(d_fake_decision, Variable(torch.zeros([minibatch_size,1])).to(DEVICE))  # zeros = fake
                
                Fq = calc_fq(d_fake_decision) # d_fake_decision = D(G(Z))
                
                d_real_data = Variable(d_data).to(DEVICE) 
                d_real_decision = D(d_real_data).detach() # dont use function as we need DETACH!
                
                Fd = calc_fd(d_real_decision, d_fake_error, loss_BCE, D, minibatch_size)
#                 print('fq:',Fq)
#                 print('fd:',Fd)
                fit_log[epoch][counter] = float(Fq) # store the values in the log array
                fit_log[epoch][counter+1] =  float(Fd)
                
                child_fitness = calc_F(Fq, Fd, gamma1= g1, gamma2= g2)
                               
                fitness_list += [child_fitness]
                generator_list += [child_G] # putting the generator in a list ... is this a good idea?
                counter += 2 # 2 so that it moves 2 columns over
            ''' sort their fitnesses '''
            rank = get_fit(fitness_list)
            tally[rank] += 1
            mut_freq += [rank]
            G = generator_list[rank] 
            
        ''' printing data '''
            

        g_data = get_z(batchSize) # batchSize
        gen_input = Variable(g_data).to(DEVICE)
        g_fake_data = G(gen_input).detach()

        # uncomment this when rdy
        if epoch % show_epoch == 0 and print_fig == True: # consider bringing back the extract fn
            d_err = dre + dfe
            print("Epoch %s: D (%s d_err) G (%s err); " %
                  (epoch, d_err, ge))
            
#             print('fitness', fitness_list)
#             print(rank, 'rank')
#             print(tally)
          
            plt.figure()
            g_plot = g_fake_data.detach().cpu().numpy()
#             green_plot.append(np.vstack([g_plot ] )) 
            xax = plt.scatter(x_plot[:,0], x_plot[:,1], s=10)
            gax = plt.scatter(g_plot[:,0],g_plot[:,1], s=1)
            print(len(g_plot))

            plt.legend((xax,gax), ("Real Data","Generated Data"))
#             plt.savefig('final_plot_'+dt+'.png', bbox_inches='tight')
            plt.show()

              
    if save_figure == True:
        create_save_figure
    
    if save_logs == True:
        filename = 'fitlogs_'+dt+'.txt'    
        np.savetxt(filename, fit_log)

        mut_freq = np.array(mut_freq)
        filename = 'mut_freq_logs'+dt+'.txt'
        np.savetxt(filename, mut_freq)
        
        
    g_data = get_z(1000) # batchSize
    gen_input = Variable(g_data).to(DEVICE)
    g_fake_data = G(gen_input).detach()
    plt.figure()
    g_plot = g_fake_data.detach().cpu().numpy()
#             green_plot.append(np.vstack([g_plot ] )) 
    xax = plt.scatter(x_plot[:,0], x_plot[:,1], s=10)
    gax = plt.scatter(g_plot[:,0],g_plot[:,1], s=1)
    print(len(g_plot))

    plt.legend((xax,gax), ("Real Data","Generated Data"))
#             plt.savefig('final_plot_'+dt+'.png', bbox_inches='tight')
    plt.show()
    
    eval_score = result_evaluate(g_plot, num_of_gaus)
    print(eval_score)
           

    g_plot = g_plot[:minibatch_size] #  g_fake_data.detach().cpu().numpy()    
    limmax = 3
    bg_color  = sns.color_palette('Greens', n_colors=256)[0]
    plt.figure(figsize=(6, 6))
    plt.xlim(-limmax, limmax)
    plt.ylim(-limmax, limmax)
    ax2 = sns.kdeplot(g_plot[:, 0], g_plot[:, 1], shade=True, cmap='Greens', n_levels=20, clip=[[-limmax,limmax]]*2)
    ax2.set_facecolor(bg_color)  # set_axis_bgcolor(bg_color)
    plt.xticks([]); plt.yticks([])
#     plt.title('type %d'%(epoch+1))
    ax2.set_ylabel('%d iteration:'% epoch )

    
    print("Finished!")
    return eval_score

train_results_version() #mutation_types =  ['logD']