In [3]:
#A TF graph to add vectors

import tensorflow as tf

my_graph = tf.Graph()

with tf.Session(graph=my_graph) as sess:
    x = tf.constant([1,3,6])
    y = tf.constant([1,1,1])

    op = tf.add(x,y)
    result = sess.run(fetches=op)
    print(result)

[2 4 7]


In [4]:
#A TF graph to multiply vectors 

import tensorflow as tf

my_graph = tf.Graph()

with tf.Session(graph=my_graph)as sess:
    x= tf.constant([1,3,6])
    y = tf.constant([2,2,2])
    
    op = tf.multiply(x,y)
    
    result = sess.run(fetches=op)
    print(result)

[ 2  6 12]


In [21]:
#Parameters
learning_rate = 0.01
training_epochs = 10
batch_size = 150
display_step = 1

# Network Parameters
n_hidden_1 = 100      # 1st layer number of features
n_hidden_2 = 100       # 2nd layer number of features
n_input = total_words # Words in vocab
n_classes = 3         # Categories: graphics, sci.space and baseball

input_tensor = tf.placeholder(tf.float32,[None, n_input],name="input")
output_tensor = tf.placeholder(tf.float32,[None, n_classes],name="output") 

In [36]:
#define the layers
def multilayer_perceptron(input_tensor, weights, biases):
    layer_1_multiplication = tf.matmul(input_tensor, weights['h1'])
    layer_1_addition = tf.add(layer_1_multiplication, biases['b1'])
    layer_1 = tf.nn.relu(layer_1_addition)
    
    # Hidden layer with RELU activation
    layer_2_multiplication = tf.matmul(layer_1, weights['h2'])
    layer_2_addition = tf.add(layer_2_multiplication, biases['b2'])
    layer_2 = tf.nn.relu(layer_2_addition)
    
    # Output layer 
    out_layer_multiplication = tf.matmul(layer_2, weights['out'])
    out_layer_addition = out_layer_multiplication + biases['out']
    
    return out_layer_addition

In [39]:
#Store weights and biases

weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

#predicting
prediction = multilayer_perceptron(input_tensor, weights, biases)

#Define loss
entropy_loss = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=output_tensor)

#Calc mean error
loss = tf.reduce_mean(entropy_loss)

#optimizer
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss)

init = tf.global_variables_initializer()

In [30]:
#Preprocssing data input 

import numpy as np
from collections import Counter

vocab = Counter()

text = "Hi from Singapore"

for word in text.split(' '):
    word_lowercase=word.lower()
    vocab[word_lowercase]+=1
    
# print (vocab)
# Counter({'Hi': 1, 'from': 1, 'Singapore': 1})

def get_word2index(vocab):
    word2index ={}
    for i,word in enumerate(vocab):
        word2index[word]=i
        
    return word2index

# print (get_word2index(vocab))
# {'Hi': 0, 'from': 1, 'Singapore': 2}

word2index=get_word2index(vocab)
total_words = len(vocab)

matrix = np.zeros((total_words), dtype=float)
# print (matrix)
# [ 0.  0.  0.]

for word in text.split():
    matrix[word2index[word.lower()]]+=1
    
print (matrix)


[ 1.  1.  1.]


In [13]:
from sklearn.datasets import fetch_20newsgroups

categories = ["comp.graphics", "sci.space", "rec.sport.baseball"]

newsgroups_train = fetch_20newsgroups(subset='train', categories=categories)
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories)


Downloading dataset from http://people.csail.mit.edu/jrennie/20Newsgroups/20news-bydate.tar.gz (14 MB)


In [32]:
vocab = Counter()

for text in newsgroups_train.data:
    for word in text.split(' '):
        vocab[word.lower()]+=1
        
for text in newsgroups_test.data:
    for word in text.split(' '):
        vocab[word.lower()]+=1

In [33]:
total_words = len(vocab)

def get_word_2_index(vocab):
    word2index = {}
    for i,word in enumerate(vocab):
        word2index[word.lower()] = i
        
    return word2index

word2index = get_word_2_index(vocab)

print("Index of the word 'the':",word2index['the'])

Index of the word 'the': 10


In [34]:
def get_batch(df,i,batch_size):
    batches = []
    results = []
    texts = df.data[i*batch_size: i*batch_size+batch_size]
    categories = df.target[i*batch_size:i*batch_size+batch_size]
    for text in texts:
        layer=np.zeros(total_words, dtype=float)
        for word in text.split(' '):
            layer[word2index[word.lower()]] +=1
        
        batches.append(layer)
        
    for category in categories:
        y = np.zeros((3), dtype=float)
        if category == 0:
            y[0] =1
        elif category ==1:
            y[1]=1
        else:
            y[2]=1
        results.append(y)
    
    return np.array(batches), np.array(results)

In [48]:
#Launch the graph 

with tf.Session() as sess:
    sess.run(init)

    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(len(newsgroups_train.data)/batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_x,batch_y = get_batch(newsgroups_train,i,batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            c,_ = sess.run([loss,optimizer], feed_dict={input_tensor: batch_x,output_tensor:batch_y})
            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "loss=", \
                "{:.9f}".format(avg_cost))
    print("Optimization Finished!")

    #Test model

    index_prediction = tf.argmax(prediction,1)
    index_correct = tf.argmax(output_tensor,1)
    correct_prediction = tf.equal(index_prediction, index_correct)
    #how many of predictions are correct

    #Accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    total_test_data = len(newsgroups_test.target)
    batch_x_test, batch_y_test = get_batch(newsgroups_test,0,total_test_data)
    print("Accuracy:", accuracy.eval(session=sess,feed_dict={input_tensor:batch_x_test, output_tensor:batch_y_test}))

Epoch: 0001 loss= 667.592864990
Epoch: 0002 loss= 116.357352170
Epoch: 0003 loss= 75.173783216
Epoch: 0004 loss= 45.147501599
Epoch: 0005 loss= 4.652700466
Epoch: 0006 loss= 1.762349507
Epoch: 0007 loss= 4.715268059
Epoch: 0008 loss= 2.467824340
Epoch: 0009 loss= 0.061446079
Epoch: 0010 loss= 0.191047146
Optimization Finished!
Accuracy: 0.719492
