In [1]:
import nltk

In [2]:
import nltk
paragraph= """Learning rate decay is a technique for training modern neural networks. It starts training the network with a large learning rate and then slowly reducing/decaying it until local minima is obtained. It is empirically observed to help both optimization and generalization.

>When we have a constant learning rate, the steps taken by our algorithm while iterating towards minima are so noisy that after certain iterations it seems wandering around the minima and do not actually converges.
>
>When the learning rate is large initially we still have relatively fast learning but as tending towards minima learning rate gets smaller and smaller, end up oscillating in a tighter region around minima rather than wandering far away from it.
>
>Learing rate decay is implemented as follows:"""
#tokenization
sentences=nltk.sent_tokenize(paragraph)
#print(sentences[1])

words=nltk.word_tokenize(paragraph)
#print(words)


In [3]:
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer

#### Stemming

In [4]:

stemmer=PorterStemmer()
#this creates an object 

for i in range(len(sentences)):
    words= nltk.word_tokenize(sentences[i])
    words= [stemmer.stem(word) for word in words if word not in set(stopwords.words('english'))]
    #.stem is a function call from the stemmer object created which has all the methods of the porterstemmer 
    
    sentences[i]=' '.join(words)
print(sentences)
#Disadvantage: many converted words don't have any actual meaning

['learn rate decay techniqu train modern neural network .', 'it start train network larg learn rate slowli reducing/decay local minima obtain .', 'it empir observ help optim gener .', '> when constant learn rate , step taken algorithm iter toward minima noisi certain iter seem wander around minima actual converg .', '> > when learn rate larg initi still rel fast learn tend toward minima learn rate get smaller smaller , end oscil tighter region around minima rather wander far away .', '> > lear rate decay implement follow :']


#### Lemmatization

In [5]:

lemmatizer= WordNetLemmatizer()
#object created

sentences=nltk.sent_tokenize(paragraph)
for i in range(len(sentences)):
    words= nltk.word_tokenize(sentences[i])
    words= [lemmatizer.lemmatize(word) for word in words if word not in set(stopwords.words('english'))]
    sentences[i]=' '.join(words)
print(sentences)
#Disadvantage: Much slower than stemming

['Learning rate decay technique training modern neural network .', 'It start training network large learning rate slowly reducing/decaying local minimum obtained .', 'It empirically observed help optimization generalization .', '> When constant learning rate , step taken algorithm iterating towards minimum noisy certain iteration seems wandering around minimum actually converges .', '> > When learning rate large initially still relatively fast learning tending towards minimum learning rate get smaller smaller , end oscillating tighter region around minimum rather wandering far away .', '> > Learing rate decay implemented follows :']


#### Bag of words

In [6]:
import re


ps = PorterStemmer()
wordnet= WordNetLemmatizer()
#objects created

corpus=[]

sentences=nltk.sent_tokenize(paragraph)

for i in range(len(sentences)):
    review= re.sub('[^a-zA-Z]',' ',sentences[i])
    #The above line removes all the characters except a-z and A-Z and replaces the with a space
    
    review=review.lower()
    #the above line lowers all the characters
    
    review=review.split()
    #this gives us a list of words
    
    review=[wordnet.lemmatize(word) for word in review if not word in set(stopwords.words('english'))]
    # this removes all the stopwords from the given dataset
    
    review= ' '.join(review)
    corpus.append(review)
    
print(corpus)

['learning rate decay technique training modern neural network', 'start training network large learning rate slowly reducing decaying local minimum obtained', 'empirically observed help optimization generalization', 'constant learning rate step taken algorithm iterating towards minimum noisy certain iteration seems wandering around minimum actually converges', 'learning rate large initially still relatively fast learning tending towards minimum learning rate get smaller smaller end oscillating tighter region around minimum rather wandering far away', 'learing rate decay implemented follows']


In [8]:
from sklearn.feature_extraction.text import CountVectorizer

cv=CountVectorizer()
x=cv.fit_transform(corpus).toarray()
#transforming the words in corpus to vector for the machine to identify and later converting it to array

print(x)


[[0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 0 0 0 0 0 1 0
  0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0]
 [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 1 0 0 0 1 0 0 1 0
  1 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 1 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 2 0 0 0 1 0 0 0 0 1 0
  0 0 0 1 0 0 0 1 0 1 0 0 0 1 0 1]
 [0 0 1 1 0 0 0 0 0 0 1 1 1 0 0 1 0 0 1 0 0 1 0 3 0 2 0 0 0 0 0 0 0 1 2 1
  0 1 1 0 0 2 0 0 1 0 0 1 1 1 0 1]
 [0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]
