<a href="https://colab.research.google.com/github/NikuDubenco/code_replications/blob/master/Coding_for_GANS_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# credits: Jakub Langr
# url: http://jakublangr.com/gans-code.html

In [0]:
# first check the python version

import sys

if sys.version_info < (3,4):
  print('You are running an older version of Python!\n\n',
       'You should consider updating to Python 3.4.0 or',
       'higher as the libraries build for this course',
       'have only been tested in Python 3.4 and higher.\n')
  print('Try installing the Python 3.5 version of anaconda',
       'and then restart `jupyter notebook`:\n',
       'https://www.continuum.io/downloads\n\n')
  
# now get necessary libraries
try:
  import os
  import pandas as pd
  import pickle
  import tflearn
  from joblib import Parallel, delayed
  import random
  import multiprocessing
  import numpy as np
  import matplotlib.pyplot as plt
  from skimage.tranform import resize
  from skimage import data
  from scipy.misc import imresize
  from scipy.ndimage.filters import gaussian_filter
  import IPython.display as ipud
  import tensorflow as tf
  from libs import utils, datasets, dataset_utils, nb_utils
except ImportError as e:
  print(e)
  print('Make sure you have started notebook in the same direction',
       'as the provided zip file which includes the `libs` folder',
       'and the file `utils.py` inside of it. You will Not be able',
       'to complete this assigment unless you restart jupyter',
       'notebook inside the directory created by extracting',
       'the zip file or cloning the github repo.')
  print(e)
  
# we'll tell matplotlib to inline any drawn figures like so:
%matplotlib inline
plt.style.use('ggplot')

In [0]:
net = CV.get_celeb_vargan_model()

In [0]:
X = g.get_tensor_by_name('net/x:0')
Z = g.get_tensor_by_name('net/encoder/variational/z:0')
G = g.get_tensor_by_name('net/generator/x_tilde:0')

In [0]:
files = sorted(datasets.CELEB())
img_i = 20
img = plt.imread(files[img_i])
plt.imshow(img)

In [0]:
def get_features_for(label='Bald', has_label=True, n_imgs=50):
  # helper function to obtain labels and then preprocessing and returning
  # a vector for the seeding function for GAN
  # basically figures out the embedding for a particular attribute
  label_i = net['labels'].index(label)
  label_idxs = np.where(net['attributes'][:, label_i] == has_label)[0]
  label_idxs = np.random.permutation(label_idxs)[:n_imgs]
  imgs = [plt.imread(files[img_i])[..., :3] for img_i in label_idxs]
  preprocessed = np.array([CV.preprocess(img_i) for img_i in imgs])
  zs = sess.run(Z, feed_dict={X: preprocessed})
  return np.mean(zs, 0)

In [0]:
def gan_ganerate_data(num_iter=20000, imgs=15):
  # generate 2*(number of iter) images
  # adding random number of pictures for each synthesis (to increase variation)
  # returns list of [Male, Female] * num_iter images
  generate_images = []
  
  for i in range(num_iter):
    
    n_imgs = random.choice(range(imgs-10, imgs+10))
    
    z1 = get_features_for('Male', True, n_imgs=n_imgs)
    z2 = get_features_for('Male', False, n_imgs=n_imgs)
    
    notmale_vector = z2 - z1
    amt = np.linspace(0, 1, 2)
    zs = np.array([z1 + notmale_vector * amt_i for amt_i in amt])
    g = sess.run(G, feed_dict={Z : zs})
    
    generate_images.append(g[0])
    generate_images.append(g[1])
    
    if i % 1000 == 0:
      print('Iteration number: {}'.format(i))
      
  return generate_images

generated_data = gan_generate_data()

In [0]:
labels = [0, 1] * 20000
generated_data = np.array(generated_data)
generated_data.shape

In [0]:
from libs import vgg16, inception, i2v

net = vgg16.get_vgg_face_model()

In [0]:
def transferred_predictions(img):
  #gets an image (`np.array`) as an input outputs net's final layer predictions
  results = []
  
  # grab the tensor defining the input to the network
  x = g.get_tensor_by_name(names[0] + ':0')
  
  # and grab the tensor defining the softmax layer of the network
  softmax = g.get_tensor_by_name(name[-2] + ':0')
  
  with tf.Session(grath=g) as sess, g.device('/cpu:0'):
    # remember from the lecture that we have to set the dropout
    # 'keep probability' to 1.0
    res = softmax.eval(feed_dict={x: img})  # not using dropout here
           # 'net/dropout_1/random_uniform:0': [[1.0] * 4096]
           # 'nat/dropout/randon_uniform:0': [[1.0] * 4096]
    test_array = res.argsort()[-5:][::-1].flatten()
    result = ([(res.flatten()[int(idx)],
                 net['label'][int(idx)]) for idx in test_array])
    
    result = pd.DataFrame(results, columns=['score', 'label']) # .sort(columns='score')
    
    results.append(result.score)
    
  return results

def transferred_df(generated_data):
  # does the preprocessing of the `list` of generated_data and outputs `list` of predictions
  results = []
  
  for i in range(len(generated_data)):
    img = imresize(generated_data[i], size=(224,224,3))
    img = net['preprocess'](img)[np.newaxis]
    result = transferred_predictions(img)
    results.append(result)
    
    if i % 1000 == 0:
      print('Current image id {}'.format(i))
      
  return results


def parallel_transfer_eval(generated_data):
  # returns parallely executed `transferred_df` using first split (fs), second ss and ts as divisors
  pool = multiprocessing.Pool(4)
  fs = int(len(generated_data)/4)
  ss = int(2 * len(generated_data)/4)
  ts = int(3 * len(generated_data)/4)
  target = generated_data[:fs], generated_data[fs:ss], generated_data[ss:ts], generated_data
  results = pool.map(transferred_df, zip(target))
  # results = Parallel(n_jobs=4)(delayed(transferred_df)(img) for img in generated_data)
  
  return results

In [0]:
from sklearn.cross_validation import train_test_split

# train-test for proper evaluation
train_X, test_X, train_y, test_y = train_test_split(X, y)

tflearn.init_graph(num_cores=8, gpu_memory_fraction=.5)

# set up the network
net = tflearn.input_data(shape=[None, 2623])
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam', loss='categorical_crossentropy')

# train
model = tflearn.DNN(net)
model.fit(generated_data, labels, validation_set=train_X)



from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

# reshape labels so that they match what the network expects
labels = ['Male', 'Female'] * 10000
encoder = LabelEncoder()
encoder.fit(labels)
labels = encoder.transform(labels)
labels = np_utils.to_categorical(labels)
labels.shape

test_imgs = np.array([CV.preprocess(plt.imread(file)) for file in files[:100]])
test_imgs.shape

In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

labels = [0, 1] * 10000

feature_columns = [tf.contrib.layers.real_valued_column('', dimension=2623)]

classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns,
                                            hidden_units=[2623, 512],
                                            gradient_clip_norm=.01,
                                            optimizer=tf.train.AdamOptizer(learning_rate=.1),
                                            n_classes=2)
                                            # model_dir='./model')
  
# fit model
classifier.fit(x=array, y=labels, batch_size=256, steps=10000)

# evaluate accuracy
test_labels = np.array([0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1,
                        0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0,
                        0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1,
                        1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1,
                        0, 0, 0, 0, 0, 0, 0, 0])

# test_array = np.array([[res[0] for res in result] for result in test_array])

accuracy_score = classifier.evaluate(x=test_array, y=test_labels)['accuracy']
print('Accuracy: {0:f}'.format(accuracy_score))