### Build a DNN using Keras with `RELU` and `ADAM`

#### Load tensorflow

In [1]:
import tensorflow as tf
tf.reset_default_graph()
tf.set_random_seed(42)

  from ._conv import register_converters as _register_converters


#### Collect Fashion mnist data from tf.keras.datasets 

In [2]:
(trainX, trainY),(testX, testY) = tf.keras.datasets.mnist.load_data()

#### Change train and test labels into one-hot vectors

In [3]:
trainY = tf.keras.utils.to_categorical(trainY, num_classes=10)
testY = tf.keras.utils.to_categorical(testY, num_classes=10)

#### Build the Graph

#### Initialize model, reshape & normalize data

In [4]:
#Initialize model, reshape & normalize data
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Reshape((784,),input_shape=(28,28,)))
model.add(tf.keras.layers.BatchNormalization())

#### Add two fully connected layers with 200 and 100 neurons respectively with `relu` activations. Add a dropout layer with `p=0.25`

In [5]:
#Hidden layers
model.add(tf.keras.layers.Dense(200, activation='relu', name='Layer_1'))
model.add(tf.keras.layers.Dense(100, activation='relu', name='Layer_2'))

#Dropout layer
model.add(tf.keras.layers.Dropout(0.25))

### Add the output layer with a fully connected layer with 10 neurons with `softmax` activation. Use `categorical_crossentropy` loss and `adam` optimizer and train the network. And, report the final validation.

In [6]:
#Output layer
model.add(tf.keras.layers.Dense(10, activation='softmax', name='Output'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [7]:
#Train the model
model.fit(trainX,trainY,          
          validation_data=(testX,testY),
          epochs=30,
          batch_size=32)

Train on 60000 samples, validate on 10000 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30


Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x18fa3f7dcf8>

## Word Embeddings in Python with Gensim

In this, you will practice how to train and load word embedding models for natural language processing applications in Python using Gensim.


1. How to train your own word2vec word embedding model on text data.
2. How to visualize a trained word embedding model using Principal Component Analysis.
3. How to load pre-trained word2vec word embedding models.

### Run the below two commands to install gensim and the wiki dataset

In [None]:
!pip install --upgrade gensim --user

In [None]:
!pip install wikipedia --user

### Import gensim

In [9]:
import gensim

### Obtain Text

Import search and page functions from wikipedia module
search(/key word/): search function takes keyword as argument and gives top 10 article titles matching the given keyword.

page(/title of article/): page function takes page title as argument and gives content in the output.

In [16]:
## Usage: 

content = []
title = []
i =0 
from wikipedia import search, page
titles = search("Machine Learning")
for i in range(10):
    wikipage = page(titles[i])
    #print (wikipage.titles)
    title.append(wikipage.title)
    content.append(wikipage.content)

### Print the top 10 titles for the keyword `Machine Learning`

In [17]:
import pandas as pd
df = pd.DataFrame()
df['content'] = content
df['Title'] = title


In [18]:
df.head(10)

Unnamed: 0,content,Title
0,Machine learning (ML) is the scientific study ...,Machine learning
1,Active learning is a special case of machine l...,Active learning (machine learning)
2,Boosting is a machine learning ensemble meta-a...,Boosting (machine learning)
3,Deep learning (also known as deep structured l...,Deep learning
4,These datasets are used for machine-learning r...,List of datasets for machine learning research
5,"In machine learning, support-vector machines (...",Support-vector machine
6,The following outline is provided as an overvi...,Outline of machine learning
7,Waikato Environment for Knowledge Analysis (We...,Weka (machine learning)
8,Adversarial machine learning is a technique em...,Adversarial machine learning
9,Extreme learning machines are feedforward neur...,Extreme learning machine


### Get the content from the first title from the above obtained 10 titles.

In [21]:
import re, string

def clean_str(string):
  """
  String cleaning before vectorization
  """
  try:    
    string = re.sub(r'^https?:\/\/<>.*[\r\n]*', '', string, flags=re.MULTILINE)
    string = re.sub(r"[^A-Za-z]", " ", string)         
    words = string.strip().lower().split()    
    words = [w for w in words if len(w)>=1]
    return " ".join(words)	
  except:
    return ""

In [23]:
df['clean_Content'] = df['content'].apply(clean_str)
df.head()

Unnamed: 0,content,Title,clean_Content
0,Machine learning (ML) is the scientific study ...,Machine learning,machine learning ml is the scientific study of...
1,Active learning is a special case of machine l...,Active learning (machine learning),active learning is a special case of machine l...
2,Boosting is a machine learning ensemble meta-a...,Boosting (machine learning),boosting is a machine learning ensemble meta a...
3,Deep learning (also known as deep structured l...,Deep learning,deep learning also known as deep structured le...
4,These datasets are used for machine-learning r...,List of datasets for machine learning research,these datasets are used for machine learning r...


### Create a list with name `documents` and append all the words in the 10 pages' content using the above 10 titles.

In [24]:
#List to hold all words in each review
documents = []

#Iterate over each review
for doc in df['clean_Content']:
    documents.append(doc.split(' '))

print(len(documents))
print(documents[0])

10
['machine', 'learning', 'ml', 'is', 'the', 'scientific', 'study', 'of', 'algorithms', 'and', 'statistical', 'models', 'that', 'computer', 'systems', 'use', 'to', 'effectively', 'perform', 'a', 'specific', 'task', 'without', 'using', 'explicit', 'instructions', 'relying', 'on', 'patterns', 'and', 'inference', 'instead', 'it', 'is', 'seen', 'as', 'a', 'subset', 'of', 'artificial', 'intelligence', 'machine', 'learning', 'algorithms', 'build', 'a', 'mathematical', 'model', 'of', 'sample', 'data', 'known', 'as', 'training', 'data', 'in', 'order', 'to', 'make', 'predictions', 'or', 'decisions', 'without', 'being', 'explicitly', 'programmed', 'to', 'perform', 'the', 'task', 'machine', 'learning', 'algorithms', 'are', 'used', 'in', 'the', 'applications', 'of', 'email', 'filtering', 'detection', 'of', 'network', 'intruders', 'and', 'computer', 'vision', 'where', 'it', 'is', 'infeasible', 'to', 'develop', 'an', 'algorithm', 'of', 'specific', 'instructions', 'for', 'performing', 'the', 'task',

### Build the gensim model for word2vec with by considering all the words with frequency >=1 with embedding size=50

In [25]:
#Build the model
model = gensim.models.Word2Vec(documents, #Word list
                               min_count = 1, #Ignore all words with total frequency lower than this                           
                               workers=4, #Number of CPUs
                               size=50,  #Embedding size
                               window=5, #Maximum Distance between current and predicted word
                               iter=10   #Number of iterations over the text corpus
                              )  

### Exploring the model

In [26]:
#Model size
model.wv.syn0.shape

  


(4448, 50)

#### Check how many words in the model

In [27]:
# Vocablury of the model
model.wv.vocab

{'machine': <gensim.models.keyedvectors.Vocab at 0x18fab135fd0>,
 'learning': <gensim.models.keyedvectors.Vocab at 0x18fa1b055f8>,
 'ml': <gensim.models.keyedvectors.Vocab at 0x18fa1a82ba8>,
 'is': <gensim.models.keyedvectors.Vocab at 0x18fab13b0f0>,
 'the': <gensim.models.keyedvectors.Vocab at 0x18fab13b128>,
 'scientific': <gensim.models.keyedvectors.Vocab at 0x18fab13b048>,
 'study': <gensim.models.keyedvectors.Vocab at 0x18fab13b160>,
 'of': <gensim.models.keyedvectors.Vocab at 0x18fab13b198>,
 'algorithms': <gensim.models.keyedvectors.Vocab at 0x18fab13b1d0>,
 'and': <gensim.models.keyedvectors.Vocab at 0x18fab13b208>,
 'statistical': <gensim.models.keyedvectors.Vocab at 0x18fab13b240>,
 'models': <gensim.models.keyedvectors.Vocab at 0x18fab13b278>,
 'that': <gensim.models.keyedvectors.Vocab at 0x18fab13b2b0>,
 'computer': <gensim.models.keyedvectors.Vocab at 0x18fab13b2e8>,
 'systems': <gensim.models.keyedvectors.Vocab at 0x18fab13b320>,
 'use': <gensim.models.keyedvectors.Vocab 

### Get an embedding for word `SVM`

In [29]:
model.wv['svm']

array([-0.42817754,  1.0493265 ,  0.33265767, -1.3082318 ,  1.1169025 ,
        1.0030046 , -0.00340915,  0.6173608 , -0.96298313,  0.16009186,
       -0.28315297, -0.3190175 , -0.01381337, -0.47134888,  0.42271686,
       -0.40937755,  0.557296  ,  0.26535085,  0.4313733 , -0.66291076,
       -0.1961258 , -1.2795964 , -1.133088  ,  0.6253178 , -0.49248698,
        0.13017589, -1.2836212 , -0.7742435 , -0.69274795,  0.01466106,
        0.044676  , -0.5969457 , -0.2654269 ,  1.8276255 , -0.61156255,
        1.4701265 ,  0.19861451, -0.02155786, -0.6527102 ,  0.00305347,
       -0.5848285 ,  1.1390697 , -0.21716228,  0.02662108, -0.2967651 ,
       -0.5773036 ,  0.38282338,  0.10443805,  0.63126683,  0.4947669 ],
      dtype=float32)

### Finding most similar words for word `learning`

In [30]:
model.wv.most_similar('svm')

[('one', 0.9998096227645874),
 ('an', 0.9997731447219849),
 ('classification', 0.999770998954773),
 ('other', 0.9997637271881104),
 ('boosting', 0.9997509717941284),
 ('techniques', 0.9997425675392151),
 ('to', 0.9997419714927673),
 ('many', 0.9997381567955017),
 ('examples', 0.9997338652610779),
 ('feature', 0.9997269511222839)]

### Find the word which is not like others from `machine, svm, ball, learning`

In [31]:
model.doesnt_match("machine, svm, ball, learning".split())

  """Entry point for launching an IPython kernel.


'learning'

### Save the model with name `word2vec-wiki-10`

In [32]:
model.save('word2vec-wiki-10')

### Load the model `word2vec-wiki-10`

In [33]:
model = gensim.models.Word2Vec.load('word2vec-wiki-10')