# Running Neural Network Classification for supervised tasks

### Done on 2 datasets: MNIST, and 20 NG

## A) For MNIST dataset, run a TF in supervised mode (train/test) and report results 

In [1]:
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('/tmp/data/', one_hot = True)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [3]:
#Define model

n_nodes_hl1 = 500
n_nodes_hl2 = 500
n_nodes_hl3 = 500

n_classes = 10
batch_size = 100
'''
Go through batches of 100 images at a time
'''
x = tf.placeholder('float', [None, 784])
y = tf.placeholder('float')

In [4]:
def neural_network_model(data):
    '''
    Create a tensor(array) of weights that are initialized randomly.
    Shape is previous layer columns vs New layer rows
    Then add biases
    formula is actually: ipdata*weights + biases
    Why bias? If ipdata is 0 then 0*weights is 0 so no neuron would ever fire
    More importantly it allows you to shift the sigmoid function left or right 
    '''

    hidden_1_layer = {'weights':tf.Variable(tf.random_normal([784, n_nodes_hl1])), 
                      'biases': tf.Variable(tf.random_normal([n_nodes_hl1]))}

    hidden_2_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])), 
                      'biases': tf.Variable(tf.random_normal([n_nodes_hl2]))}

#     hidden_3_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])), 
#                       'biases': tf.Variable(tf.random_normal([n_nodes_hl3]))}

    output_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl2, n_classes])), 
                      'biases': tf.Variable(tf.random_normal([n_classes]))}


    # model is: ip_data*weights + biases
    l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases'])
    #activation function
    l1 = tf.nn.relu(l1)

    l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']), hidden_2_layer['biases'])
    #activation function
    l2 = tf.nn.relu(l2) 

#     l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']), hidden_3_layer['biases'])
#     #activation function
#     l3 = tf.nn.relu(l3) 

    output = tf.add(tf.matmul(l2, output_layer['weights']), output_layer['biases'])
    #activation function
    return output

In [5]:
def train_neural_network(x):
    # You take data, pass it through your neural network
    prediction = neural_network_model(x)
    # print(prediction)
    #Minimize cost
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits = prediction,labels = y))
    
    # print('cost shape is: ',cost.shape)
    # Learning rate parameter default is 0.001
    optimizer = tf.train.AdamOptimizer().minimize(cost)

    # Epochs are cycles of feedfwd and backprop
    hm_epochs = 10

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(hm_epochs):
            epoch_loss = 0
            for _ in range(int(mnist.train.num_examples/batch_size)):
                epoch_x, epoch_y = mnist.train.next_batch(batch_size)
                # print('epoch_y is:', epoch_y[0])
                _, c = sess.run([optimizer, cost], feed_dict = {x: epoch_x, y: epoch_y})    
                # print("c is: ",c)
                epoch_loss += c

            print('Epoch ', epoch, 'completed out of ', hm_epochs, 'loss: ', epoch_loss)

        correct = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
        accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
        print('Accuracy: ',accuracy.eval({x:mnist.test.images, y:mnist.test.labels}))
        tf.train.write_graph(sess.graph, '/tmp/my-model', 'train.pbtxt')
        # writer = tf.summary.FileWriter('tmp/mnist_demo/l', sess.graph)
        # writer.add_graph(sess.graph)

In [6]:
train_neural_network(x)

Epoch  0 completed out of  10 loss:  127353.01651954651
Epoch  1 completed out of  10 loss:  32358.983506679535
Epoch  2 completed out of  10 loss:  18996.794564102267
Epoch  3 completed out of  10 loss:  12035.141606745708
Epoch  4 completed out of  10 loss:  7732.487982865587
Epoch  5 completed out of  10 loss:  5088.2801134075635
Epoch  6 completed out of  10 loss:  3440.4356208906593
Epoch  7 completed out of  10 loss:  2368.1608387052447
Epoch  8 completed out of  10 loss:  1878.0957595156779
Epoch  9 completed out of  10 loss:  1392.8818865765697
Accuracy:  0.9535


## B) TF classification for 20NG 

In [7]:
from sklearn.datasets import fetch_20newsgroups
ng_train = fetch_20newsgroups(subset = 'train')          # 11,314 datapoints in .data and category targets in .target_names
ng_test = fetch_20newsgroups(subset = 'test')

# use nltk's stopwords to reduce matrix dimensions
import nltk
from nltk.corpus import stopwords
stopw = list(set(stopwords.words('english')))

# Convert to tf-idf vector
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf_20ng_class = TfidfVectorizer(stop_words=stopw)
tfidf_20ng_train = tfidf_20ng_class.fit_transform(ng_train.data)
tfidf_20ng_test = tfidf_20ng_class.fit_transform(ng_test.data)

In [8]:
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.model_selection import train_test_split
import numpy as np

In [11]:
# bychi_2000 = SelectKBest(chi2, 2000)
# bychi_4000 = SelectKBest(chi2, 4000)
# new_train_2000 = bychi_2000.fit_transform(X_train, y_train)
# new_train_4000 = bychi_4000.fit_transform(X_train, y_train)

In [10]:
bychi_800 = SelectKBest(chi2, 800)
new_train_800 = bychi_800.fit_transform(tfidf_20ng_train, ng_train.target)
# new_train_800 = bychi_800.fit_transform(X_train, y_train)

In [11]:
new_train_800.shape

(11314, 800)

In [12]:
X_train, X_test, y_train, y_test = train_test_split(new_train_800, ng_train.target, test_size = 0.3, random_state = 10)

In [13]:
X_train.shape, X_test.shape

((7919, 800), (3395, 800))

In [14]:
# Get labels as one-hot vectors

y_train_onehot = np.zeros((len(y_train), 20))
for sample,target in enumerate(y_train):
    y_train_onehot[sample,target] = 1
    
y_test_onehot = np.zeros((len(y_test), 20))
for sample,target in enumerate(y_test):
    y_test_onehot[sample,target] = 1

In [14]:
y_train_onehot.shape

(7919, 20)

In [15]:
# Get test data also in top features by chi2
new_test_2000 = bychi_2000.fit_transform(X_test, y_test)
new_test_4000 = bychi_4000.fit_transform(X_test, y_test)

In [30]:
new_test_800 = bychi_800.fit_transform(X_test, y_test)

In [15]:
#Define model

n_nodes_hl1 = 500
n_nodes_hl2 = 500
# n_nodes_hl3 = 200

n_classes = 20
# Set batch size parameter if needed here
# x placeholder parameter changes size by input features
# For 20ng try with 2000 and 4000
x = tf.placeholder('float', [None, 800])
y = tf.placeholder('float')

def neural_network_model(data):
    
    hidden_1_layer = {'weights':tf.Variable(tf.random_normal([800, n_nodes_hl1])), 
                      'biases': tf.Variable(tf.random_normal([n_nodes_hl1]))}

    hidden_2_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])), 
                      'biases': tf.Variable(tf.random_normal([n_nodes_hl2]))}

#     hidden_3_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])), 
#                       'biases': tf.Variable(tf.random_normal([n_nodes_hl3]))}

    output_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl2, n_classes])), 
                      'biases': tf.Variable(tf.random_normal([n_classes]))}


    # model is: ip_data*weights + biases
    l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases'])
    #activation function
    l1 = tf.nn.relu(l1)

    l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']), hidden_2_layer['biases'])
    #activation function
    l2 = tf.nn.relu(l2) 

#     l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']), hidden_3_layer['biases'])
#     #activation function
#     l3 = tf.nn.relu(l3) 

    output = tf.add(tf.matmul(l2, output_layer['weights']), output_layer['biases'])
    #activation function
    return output


def train_neural_network(x):
    # You take data, pass it through your neural network
    prediction = neural_network_model(x)
    # print(prediction)
    #Minimize cost
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits = prediction,labels = y))
    
    # print('cost shape is: ',cost.shape)
    # Learning rate parameter default is 0.001
    optimizer = tf.train.AdamOptimizer(learning_rate= 0.05).minimize(cost)

    # Epochs are cycles of feedfwd and backprop
    hm_epochs = 30

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(hm_epochs):
            epoch_loss = 0
            epoch_x, epoch_y = X_train.todense(), y_train_onehot
#             print (epoch_x)
            # print('epoch_y is:', epoch_y[0])
            _, c = sess.run([optimizer, cost], feed_dict = {x: epoch_x, y: epoch_y})    
            # print("c is: ",c)
            epoch_loss += c

            print('Epoch ', epoch, 'completed out of ', hm_epochs, 'loss: ', epoch_loss)

        correct = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
        accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
        print('Accuracy on train data: ',accuracy.eval({x:X_train.todense(), y:y_train_onehot}))
        print('Accuracy on test data: ',accuracy.eval({x:X_test.todense(), y:y_test_onehot}))
#         tf.train.write_graph(sess.graph, '/tmp/my-model', 'train.pbtxt')
        # writer = tf.summary.FileWriter('tmp/mnist_demo/l', sess.graph)
        # writer.add_graph(sess.graph)

In [17]:
train_neural_network(x)
'''
800 features, 2 hidden layers: 500,500, learning rate = 0.05, epochs increased to 30
'''

Epoch  0 completed out of  30 loss:  880.1112670898438
Epoch  1 completed out of  30 loss:  727.9601440429688
Epoch  2 completed out of  30 loss:  747.3934326171875
Epoch  3 completed out of  30 loss:  603.26123046875
Epoch  4 completed out of  30 loss:  637.5509643554688
Epoch  5 completed out of  30 loss:  693.1651000976562
Epoch  6 completed out of  30 loss:  753.481689453125
Epoch  7 completed out of  30 loss:  737.0881958007812
Epoch  8 completed out of  30 loss:  767.10205078125
Epoch  9 completed out of  30 loss:  731.1504516601562
Epoch  10 completed out of  30 loss:  644.152099609375
Epoch  11 completed out of  30 loss:  578.3634643554688
Epoch  12 completed out of  30 loss:  491.4989929199219
Epoch  13 completed out of  30 loss:  378.7703857421875
Epoch  14 completed out of  30 loss:  313.6956787109375
Epoch  15 completed out of  30 loss:  268.4210205078125
Epoch  16 completed out of  30 loss:  228.1151885986328
Epoch  17 completed out of  30 loss:  204.36842346191406
Epoch  

'\n800 features, 2 hidden layers: 500,500, learning rate = 0.05, epochs increased to 30\n'

In [32]:
train_neural_network(x)
'''
4000 features, 2 hidden layers: 1000,500, learning rate = 0.01
'''

Epoch  0 completed out of  20 loss:  1253.2384033203125
Epoch  1 completed out of  20 loss:  769.3799438476562
Epoch  2 completed out of  20 loss:  553.9244384765625
Epoch  3 completed out of  20 loss:  385.3398742675781
Epoch  4 completed out of  20 loss:  345.66326904296875
Epoch  5 completed out of  20 loss:  334.6177978515625
Epoch  6 completed out of  20 loss:  324.4775390625
Epoch  7 completed out of  20 loss:  291.6800231933594
Epoch  8 completed out of  20 loss:  244.28057861328125
Epoch  9 completed out of  20 loss:  200.14328002929688
Epoch  10 completed out of  20 loss:  166.7349853515625
Epoch  11 completed out of  20 loss:  142.88552856445312
Epoch  12 completed out of  20 loss:  129.06190490722656
Epoch  13 completed out of  20 loss:  114.88496398925781
Epoch  14 completed out of  20 loss:  99.55828857421875
Epoch  15 completed out of  20 loss:  86.62059020996094
Epoch  16 completed out of  20 loss:  76.98542785644531
Epoch  17 completed out of  20 loss:  69.8436508178711

'\n4000 features, 2 hidden layers: 1000,500, learning rate = 0.01\n'

In [39]:
train_neural_network(x)
'''
2000 features, 3 hidden layers: 1000,500,200 learning rate = 0.01
'''

Epoch  0 completed out of  20 loss:  6783.3173828125
Epoch  1 completed out of  20 loss:  4102.3125
Epoch  2 completed out of  20 loss:  3539.541015625
Epoch  3 completed out of  20 loss:  2651.954345703125
Epoch  4 completed out of  20 loss:  2285.2412109375
Epoch  5 completed out of  20 loss:  2144.851806640625
Epoch  6 completed out of  20 loss:  1708.6185302734375
Epoch  7 completed out of  20 loss:  1507.900390625
Epoch  8 completed out of  20 loss:  1334.896484375
Epoch  9 completed out of  20 loss:  1026.6300048828125
Epoch  10 completed out of  20 loss:  820.61669921875
Epoch  11 completed out of  20 loss:  737.2379150390625
Epoch  12 completed out of  20 loss:  647.441162109375
Epoch  13 completed out of  20 loss:  558.7777099609375
Epoch  14 completed out of  20 loss:  491.4356689453125
Epoch  15 completed out of  20 loss:  413.13800048828125
Epoch  16 completed out of  20 loss:  343.8560791015625
Epoch  17 completed out of  20 loss:  295.7493896484375
Epoch  18 completed out

'\n2000 features, 3 hidden layers: 1000,500,200 learning rate = 0.01\n'

In [43]:
train_neural_network(x)
'''
4000 features, 3 hidden layers: 1000,500,200 learning rate = 0.01
'''

Epoch  0 completed out of  10 loss:  6814.07763671875
Epoch  1 completed out of  10 loss:  5207.626953125
Epoch  2 completed out of  10 loss:  3456.850341796875
Epoch  3 completed out of  10 loss:  2797.771240234375
Epoch  4 completed out of  10 loss:  2700.213134765625
Epoch  5 completed out of  10 loss:  2011.58251953125
Epoch  6 completed out of  10 loss:  1559.55029296875
Epoch  7 completed out of  10 loss:  1342.8489990234375
Epoch  8 completed out of  10 loss:  1239.013671875
Epoch  9 completed out of  10 loss:  1033.87255859375
Accuracy on train data:  0.55903524
Accuracy on test data:  0.070103094


'\n4000 features, 3 hidden layers: 1000,500,200 learning rate = 0.01\n'

In [None]:
'''
With a learning rate of 0.1 it was overshooting with an accuracy of 80%, with 4000 features and learning rate 0.01 the 
accuracy is 87% and doesnt overshoot.
'''