# SMART GOAL #1
#### ****Finish implementing a GAN for understanding and testing purposes.

Source of tutorial : https://github.com/aadilh/blogs/tree/new/basic-gans/basic-gans

In [None]:
#Import libraries for testing
import tensorflow as tf
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt

batch_size = 256

In [None]:
def sample_Z(m, n):
    return np.random.uniform(-1,1, size=[m, n])

In [None]:
#In the tutorial, the user creates a random data of 10000 samples, 2D array. Here I will use data sample from SMART Goal #4 as input.
#Generator. Z = data input, hsize = [16 nodes for layer1, 16 nodes for layer2], reuse = false (will not reuse layers)
def generator(Z,hsize=[16, 16],reuse=False):
    with tf.variable_scope("GAN/Generator",reuse=reuse):
        h1 = tf.layers.dense(Z,hsize[0],activation=tf.nn.leaky_relu)
        h2 = tf.layers.dense(h1,hsize[1],activation=tf.nn.leaky_relu)
        out = tf.layers.dense(h2,2)

    return out

In [None]:
#Discriminator. X = our original dataset, other parameters are same as above.
#This also haves an extra hidden layer, h3, for....
def discriminator(X,hsize=[16, 16],reuse=False):
    with tf.variable_scope("GAN/Discriminator",reuse=reuse):
        h1 = tf.layers.dense(X,hsize[0],activation=tf.nn.leaky_relu)
        h2 = tf.layers.dense(h1,hsize[1],activation=tf.nn.leaky_relu)
        h3 = tf.layers.dense(h2,2)
        out = tf.layers.dense(h3,2)

    return out, h3

In [None]:
#These placeholders are storage for us, in which tf.float32 is the type (in this case 32 bit float)
#[None,2] represents : None = any number of rows, 2 = 2 columns.
X = tf.placeholder(tf.float32,[None,2])
Z = tf.placeholder(tf.float32,[None,2])

In [None]:
#Here we set up the generator and discriminator
#G_sample = generated data, r_logits/r_rep = real data, f_logits,g_rep = generated data
G_sample = generator(Z)
r_logits, r_rep = discriminator(X)
f_logits, g_rep = discriminator(G_sample,reuse=True)

In [None]:
#The loss function for discriminator and generator, need to understand how this works and how it can be tweaked.
disc_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=r_logits,labels=tf.ones_like(r_logits)) + tf.nn.sigmoid_cross_entropy_with_logits(logits=f_logits,labels=tf.zeros_like(f_logits)))
gen_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=f_logits,labels=tf.ones_like(f_logits)))

In [None]:
#The training for the discriminator and genereator, once again I will need to understand how this works and how it can be tweaked.
gen_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,scope="GAN/Generator")
disc_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,scope="GAN/Discriminator")

gen_step = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(gen_loss,var_list = gen_vars) # G Train step
disc_step = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(disc_loss,var_list = disc_vars) # D Train step

In [None]:
#To get our result for our GAN
sess = tf.Session()
tf.global_variables_initializer().run(session=sess)

In [None]:
#Here is the iteration for the GAN, again, I will need to understand how this works and tweaked.
loss = []
for i in range(1001):
    X_batch = random_data(n=batch_size)
    Z_batch = sample_Z(batch_size, 2)
    
    for _ in range(10):
        _, dloss = sess.run([disc_step, disc_loss], feed_dict={X: X_batch, Z: Z_batch})
    rrep_dstep, grep_dstep = sess.run([r_rep, g_rep], feed_dict={X: X_batch, Z: Z_batch})

    for _ in range(10):
        _, gloss = sess.run([gen_step, gen_loss], feed_dict={Z: Z_batch})
    rrep_gstep, grep_gstep = sess.run([r_rep, g_rep], feed_dict={X: X_batch, Z: Z_batch})
    
    loss.append([i,dloss,gloss])
    #print ("Iterations: %d\t Discriminator loss: %.4f\t Generator loss: %.4f"%(i,dloss,gloss))
    
    #Because the scatter plot shows that the generatered data is everywhere (when use plt.plot, connected lines looks
    #like some baby drew a picture) I will try to sort the data by "days" and try to plot again.
    #Looks better when it's sorted, we can see a progression on how the generator is learning, but it is still very noisy
    #Perhaps I can try a different NN model?
    if i%100 == 0:
        print ("Iterations: %d\t"%(i))
        plt.figure(figsize=(10,8))
        g_plot = sess.run(G_sample, feed_dict={Z: Z_batch})
        g_plot = g_plot[g_plot[:,1].argsort()]
        plt.plot(X_batch[:,1], X_batch[:,0],color = "red")
        plt.plot(g_plot[:,1],g_plot[:,0], color = "blue")
        plt.xlabel("Days")
        plt.ylabel("USD ($)")
        plt.legend(["Red = Real Data", "Blue = Generated Data"])
        plt.title("Real vs Generated Data")
        plt.show()

In [None]:
#Graph plot for Discriminator and Generator Loss
#Lower Generator Loss is good, means it can fool the discriminator
#Lower Discriminator Loss is good, means it can detect the fake data created by generator
#When I have ran this, there were several graphs.
#1) Generator sky rocketed and never came down. Discriminator stayed at 0
#2) Both Generator and Discriminator bounced around at the beginning but converged in further iterations.
loss = np.array(loss)
plt.figure(figsize=(10,8))
plt.plot(loss[:,0],loss[:,1], color = 'red')
plt.plot(loss[:,0],loss[:,2], color = 'blue')
plt.legend(["Red = Discriminator", "Blue = Generator"])
plt.show()

In [None]:
#Plot of how the generated graphs look like


# SMART GOAL #2
#### Research about RNN, this takes time series as input which will be very useful for us.

# SMART Goal #3
#### ****Add more features into the random walk model.

In [None]:
import numpy as np
from numpy import array
np.random.seed(256)

In [None]:
#Number max returns 0 or higher
def max(num):
    if num > 0:
        return num
    return 0

In [None]:
def determineResult (num):
    if num >= 1:
        return 1
    elif num < 1 and num > -1:
        return 0
    elif num <= -1:
        return -1

In [None]:
def random_data(n):
    #randomStock starts at $100
    randomStock = [100]
    data = []
    result = 0

    #There will be n # closing price total
    #result states whether price has increased (1), decreased (-1), or same (0)
    for i in range(n):
        openPrice = randomStock[-1]
        randomPercentWalk = 0

        if result == 0:
            randomPercentWalk = np.random.uniform(-.02,.02)

        #Result increased before
        elif result == 1:
            #33% it'll increase again, determines % increase/decrease
            probIncrease = np.random.randint(1,10)
            if probIncrease % 3 == 0:
                randomPercentWalk = np.random.uniform(0,.02)
            else:
                randomPercentWalk = np.random.uniform(-.02,.02)

        #Result decreased before
        elif result == -1:
            #33% it'll increase again, determines % increase/decrease
            probIncrease = np.random.randint(1,10)
            if probIncrease % 3 == 0:
                randomPercentWalk = np.random.uniform(-0.02,0)
            else:
                randomPercentWalk = np.random.uniform(-.02,.02)

        #randNum determines if increase is abnormal or not
        #if abnormal randX determines by how much times it'll multiply by
        randNum = np.random.randint(1,11)
        if randNum > 1 and randNum < 10:
            closePrice = openPrice + openPrice * randomPercentWalk
        else:
            randX = np.random.randint(2,4) * randomPercentWalk
            closePrice = openPrice + openPrice * randX

        #Set result
        result = determineResult(randomPercentWalk)

        randomStock.append(max(closePrice))
        data.append([randomStock[-1],i])

    return np.array(data)
    #Update 1 added conditional probability (result)
    #Update 2 converted list into nparray for GAN testing
randomStock = random_data(256)

In [None]:
from matplotlib import pyplot as plt
plt.xlabel("Day")
plt.ylabel("USD ($)")
plt.plot(randomStock[:,1],randomStock[:,0])
plt.show()

In [None]:
np.random.shuffle(randomStock)

In [None]:
print(randomStock)

In [None]:
randomStock = randomStock[randomStock[:,1].argsort()]

In [None]:
randomStock