In [77]:
import pandas as pd
import numpy as np
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split 
from tensorflow.keras.preprocessing.sequence import pad_sequences   # to do padding or truncatin
from tensorflow.keras.preprocessing.text import Tokenizer

def load_dataset():
    df = pd.read_csv('IMDB Dataset.csv', nrows= 200)
    x_data = df['review']       # Reviews/Input
    y_data = df['sentiment']    # Sentiment/Output

    # PRE-PROCESS REVIEW
    x_data = x_data.replace({'<.*?>': ''}, regex = True)          # remove html tag
    x_data = x_data.replace({'[^A-Za-z]': ' '}, regex = True)     # remove non alphabet
    x_data = x_data.apply(lambda review: [w for w in review.split() if w not in english_stops])  # remove stop words
    x_data = x_data.apply(lambda review: [w.lower() for w in review])   # lower case
    
    # ENCODE SENTIMENT -> 0 & 1
    y_data = y_data.replace('positive', 1)
    y_data = y_data.replace('negative', 0)

    return x_data, y_data












In [78]:
english_stops = set(stopwords.words('english'))
x_data, y_data = load_dataset()

print('Reviews')
print(x_data, '\n')
print('Sentiment')
print(y_data)

Reviews
0      [one, reviewers, mentioned, watching, oz, epis...
1      [a, wonderful, little, production, the, filmin...
2      [i, thought, wonderful, way, spend, time, hot,...
3      [basically, family, little, boy, jake, thinks,...
4      [petter, mattei, love, time, money, visually, ...
                             ...                        
195    [phantasm, class, phantasm, ii, awesome, phant...
196    [ludicrous, angelic, year, old, annakin, turns...
197    [scotty, grant, cramer, would, go, star, great...
198    [if, keep, rigid, historical, perspective, fil...
199    [the, film, quickly, gets, major, chase, scene...
Name: review, Length: 200, dtype: object 

Sentiment
0      1
1      1
2      1
3      0
4      1
      ..
195    0
196    0
197    0
198    1
199    0
Name: sentiment, Length: 200, dtype: int64


In [79]:
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size = 0.2)


In [80]:
token = Tokenizer(lower=False)    # no need lower, because already lowered the data in load_data()
token.fit_on_texts(x_train)
x_train = token.texts_to_sequences(x_train)
x_test = token.texts_to_sequences(x_test)

max_length = 130

x_train = pad_sequences(x_train, maxlen=max_length, padding='post', truncating='post')
x_test = pad_sequences(x_test, maxlen=max_length, padding='post', truncating='post')

total_words = len(token.word_index) + 1   # add 1 because of 0 padding


#open and read the file after the appending:



print('Encoded X Train\n', x_train, '\n')
print('Encoded X Test\n', x_test, '\n')
print('Maximum review length: ', max_length)


Encoded X Train
 [[ 748  117   61 ...    0    0    0]
 [   1  749  135 ...    0    0    0]
 [ 263  284 1228 ... 2783   21   76]
 ...
 [   2 1191 1192 ...    0    0    0]
 [   2  953  627 ...  350  327   22]
 [   9   65    1 ...   94    1  645]] 

Encoded X Test
 [[   1   18    6 ...    0    0    0]
 [4816    4  274 ...    0    0    0]
 [   2 4554 1910 ...    0    0    0]
 ...
 [   1  312   80 ...    0    0    0]
 [   5 4643    4 ...  826 2161 1801]
 [   1  471  166 ...    0    0    0]] 

Maximum review length:  130


In [81]:


num_nodes_input = 130
num_nodes_hidden = 50
num_nodes_output = 1
#get all the feature vectors from data.py
Dataset = x_train
Target = y_train
Testset = x_test
Expectedoutput = y_test




num_training_sets = len(x_train);
num_testing_sets = len(x_test);

Target = np.array(Target)
Target = Target.reshape(num_training_sets,1)

Expectedoutput = np.array(Expectedoutput)
Expectedoutput = Expectedoutput.reshape(num_testing_sets,1)

#all the values of learning rate
alpha = [0.6]

#sigmoid activation function
def sigmoid(x):        
    return 1/(1+np.exp(-x))

#function to train Network
def train(W,Wb,V,Vb,Dataset,Target,alpha):
    
    condition = True    
    epoch = 0
    num_nodes_input = len(W)
    num_nodes_hidden = len(Wb)
    num_nodes_output = len(Vb)
    finalop = [[]]*num_training_sets
    Z = [0]*num_nodes_hidden
    Y = [0]*num_nodes_output
    delta_output = [0]*num_nodes_output
    delta_hidden = [0]*num_nodes_hidden
    
    while condition is True:                 #continue while error is < 0.01 0r epoch < 10000
        error = 0 
        epoch += 1
        for num in range(num_training_sets):                #for each input image vector

            X=Dataset[num]                  
            T=Target[num]

                                                     #forward Propogation
            for j in range(num_nodes_hidden):        #caluculate Z
                Zinj = 0
                for i in range(num_nodes_input):
                    Zinj += X[i]*W[i][j]

                Zinj = Wb[j] + Zinj
                Z[j] = sigmoid(Zinj)
            
            for j in range(num_nodes_output):        #calculate Y
                Yinj = 0
                for i in range(num_nodes_hidden):
                    Yinj += Z[i]*V[i][j]

                Yinj = Vb[j] + Yinj
                Y[j] = sigmoid(Yinj)

            finalop[num] = Y.copy()                  #stores output for each of 15 image vectors

                                    #sum square error
            error+=(finalop[num][0] - T[0])**2
            
            for k in range(num_nodes_output):                        # back Propogation
                delta_output[k] = (T[k]-Y[k])*(Y[k])*(1-Y[k])        #errors for output nodes

            for j in range(num_nodes_hidden):                       
                deltaj = 0
                for k in range(num_nodes_output):
                    deltaj += delta_output[k]*V[j][k]
                delta_hidden[j] = deltaj          
            
            for j in range(num_nodes_hidden):
                delta_hidden[j] = delta_hidden[j]*(Z[j])*(1-Z[j])      #errors for hidden nodes
 
            for i in range(num_nodes_input):                           #udate weights and Bias
                for j in range(num_nodes_hidden):
                    W[i][j]+=alpha*delta_hidden[j]*X[i]                
            
            for i in range(num_nodes_hidden):
                Wb[i]+=alpha*delta_hidden[i]
            
            for j in range(num_nodes_hidden):
                for k in range(num_nodes_output):
                    V[j][k]+=alpha*delta_output[k]*Z[j]

            for i in range(num_nodes_output):
                Vb[i]+=alpha*delta_output[i]
        print(f"Epoch is {epoch} error is {error}")
        if error<0.01 or epoch>500:                                    #stopping condition
            condition=False        
    print(f"Epoch is {epoch}")


#function to test the network
def test(W,Wb,V,Vb,Testset):

    Z = [0]*num_nodes_hidden
    Y = [0]*num_nodes_output
    Yconverted = [0]*num_nodes_output
    num=0
    count = 0;                                #to calculate accuracy
    for X in Testset:
        
        for j in range(num_nodes_hidden):
            Zinj = 0
            for i in range(num_nodes_input):
                Zinj += X[i]*W[i][j]
            Zinj = Wb[j] + Zinj
            Z[j] = sigmoid(Zinj)
            
        for j in range(num_nodes_output):
            Yinj = 0
            for i in range(num_nodes_hidden):
                Yinj += Z[i]*V[i][j]
            Yinj = Vb[j] + Yinj
            Y[j] = sigmoid(Yinj)
            
            if Y[j]<0.5:                             #approximate values based on output
                Yconverted[j] = 0
            else:
                Yconverted[j] = 1


        if Yconverted[0]==0:
            result = 0
        else:
            result = 1

        print(f"Decimal output:{Y}   and output is: {result}  Output should be: {Expectedoutput[num]}")
        
        if result == Expectedoutput[num][0]:       #if result is correct the increment count
            count+=1
        
        num+=1
        
    print(f"accuracy is {(count/num)*100}")

        

    
#test for all the learning rates
for i in range(len(alpha)):
    #initialize with random values
    W = np.random.rand(num_nodes_input,num_nodes_hidden)
    W = np.multiply(W,0.01)
    Wb = np.random.rand(num_nodes_hidden)
    Wb = np.multiply(Wb,0.01)
    V = np.random.rand(num_nodes_hidden,num_nodes_output)
    V = np.multiply(V,0.01)
    Vb = np.random.rand(num_nodes_output)
    Vb = np.multiply(Vb,0.01)

    print("\n#############################################################################\n")
    print(f"the learning rate is {alpha[i]}\n")
    train(W,Wb,V,Vb,Dataset,Target,alpha[i])
    test(W,Wb,V,Vb,Testset)


#############################################################################

the learning rate is 0.6

Epoch is 1 error is 61.932306570679934
Epoch is 2 error is 64.52037182673382
Epoch is 3 error is 64.67209580528427
Epoch is 4 error is 67.85211520358412
Epoch is 5 error is 64.49721570017908
Epoch is 6 error is 66.67821799195761
Epoch is 7 error is 67.665295013483
Epoch is 8 error is 65.32986142176942
Epoch is 9 error is 63.43014908294162
Epoch is 10 error is 61.129063815643285
Epoch is 11 error is 64.94148892482323
Epoch is 12 error is 63.24530544255107
Epoch is 13 error is 63.67243709181911
Epoch is 14 error is 66.78089395699607
Epoch is 15 error is 64.60223224777724
Epoch is 16 error is 67.1259647140609
Epoch is 17 error is 65.42636689260817
Epoch is 18 error is 64.74040591028673
Epoch is 19 error is 66.67678987168333
Epoch is 20 error is 64.99258168883846
Epoch is 21 error is 66.24302826383699
Epoch is 22 error is 65.67492803498273
Epoch is 23 error is 68.64913945337753
Epoch i

Epoch is 207 error is 63.038081195107004
Epoch is 208 error is 66.59097554042579
Epoch is 209 error is 65.4597829224138
Epoch is 210 error is 61.828806879175765
Epoch is 211 error is 63.97094059309753
Epoch is 212 error is 65.97222099805653
Epoch is 213 error is 66.04953543142358
Epoch is 214 error is 63.349937470824635
Epoch is 215 error is 67.5660003844156
Epoch is 216 error is 63.7063968192322
Epoch is 217 error is 70.99424111899997
Epoch is 218 error is 67.02721505962968
Epoch is 219 error is 62.971296146279585
Epoch is 220 error is 65.39293662258015
Epoch is 221 error is 61.378614136439836
Epoch is 222 error is 63.84985353460512
Epoch is 223 error is 67.38196982692313
Epoch is 224 error is 60.454610750759215
Epoch is 225 error is 67.20073384378257
Epoch is 226 error is 65.50796988994061
Epoch is 227 error is 64.08483492027146
Epoch is 228 error is 68.05553475775761
Epoch is 229 error is 62.39422390154912
Epoch is 230 error is 64.08383622927913
Epoch is 231 error is 68.107136640276

Epoch is 413 error is 64.2888957506296
Epoch is 414 error is 66.57127870230885
Epoch is 415 error is 61.576452939505586
Epoch is 416 error is 64.90022036771867
Epoch is 417 error is 65.03233507224634
Epoch is 418 error is 67.47454483277751
Epoch is 419 error is 65.4532930610313
Epoch is 420 error is 66.21533516177493
Epoch is 421 error is 67.57712090008145
Epoch is 422 error is 62.31014982647924
Epoch is 423 error is 63.40494169558537
Epoch is 424 error is 65.7350389943241
Epoch is 425 error is 64.8100576873883
Epoch is 426 error is 64.00641036023123
Epoch is 427 error is 66.86804066301421
Epoch is 428 error is 61.11066044613781
Epoch is 429 error is 63.66688530431223
Epoch is 430 error is 60.645132131247124
Epoch is 431 error is 70.09293245106161
Epoch is 432 error is 65.80614367129769
Epoch is 433 error is 68.27278323547984
Epoch is 434 error is 67.32191881772498
Epoch is 435 error is 67.35437431877062
Epoch is 436 error is 65.16996511338185
Epoch is 437 error is 64.80405337218038
Ep