In [34]:
import tensorflow as tf
import random

In [35]:
def get_data(filename):
    with open(filename, 'r') as f:
        target = []
        text_data = []
        lines = f.readlines()
    random.shuffle(lines)
    for line in lines:
        data = line.split('\t')
        if len(data) == 2:
            target.append(int(data[0]))
            text_data.append(data[1].rstrip())
    return text_data,target

In [36]:
data,value = get_data("/content/training.txt")

In [37]:
len(value)

7086

In [38]:
import re

In [39]:
def processed_tokens(text):
  filt_text = re.sub(r'[^a-zA-Z0-9\s]','',text)
  filt_text = filt_text.split()
  filt_text = [token.lower() for token in filt_text]
  return filt_text

In [40]:
def tokenize_text(text,min_frq=5):
  review_token = [processed_tokens(review) for review in text]
  token_list = [token for review in review_token for token in review]
  token_frq_dict = {token:token_list.count(token) for token in set(token_list)}
  most_frq_tokens = [tokens for tokens in token_frq_dict if token_frq_dict[tokens]>=min_frq ]
  idx = range(len(most_frq_tokens))
  token_idx = dict(zip(most_frq_tokens,idx))
  return token_idx,len(most_frq_tokens)

In [41]:
processed_tokens(str(data[0]))

['not',
 'because',
 'i',
 'hate',
 'harry',
 'potter',
 'but',
 'because',
 'i',
 'am',
 'the',
 'type',
 'of',
 'person',
 'that',
 'likes',
 'it',
 'when',
 'the',
 'main',
 'character',
 'dies']

In [42]:
token_idx,num_token = tokenize_text(data)

In [43]:
def get_max(data):
  tokens_per_review = [len(text.split()) for text in data]
  return max(tokens_per_review)

In [44]:
get_max(data)

40

In [45]:
 max_tokens = get_max(data)

In [46]:
from keras.utils.data_utils import pad_sequences
import numpy as np

In [47]:
def create_sequences(data,token_idx,max_tokens):
  review_tokens = [processed_tokens(review) for review in data]
  review_token_idx = map(lambda review:[token_idx[k] for k in review if k in token_idx.keys()] ,review_tokens)
  pad_seq = pad_sequences(list(review_token_idx),maxlen = max_tokens)
  return np.array(pad_seq)

In [48]:
input_sequences = create_sequences(data, token_idx,max_tokens)

In [49]:
input_sequences

array([[  0,   0,   0, ..., 276, 441, 111],
       [  0,   0,   0, ..., 371, 428,  98],
       [  0,   0,   0, ..., 430, 102, 408],
       ...,
       [  0,   0,   0, ..., 436, 428,  98],
       [  0,   0,   0, ..., 318, 146, 283],
       [  0,   0,   0, ..., 376, 129, 322]], dtype=int32)

In [50]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GRU, Embedding
from tensorflow.keras.optimizers import Adam

In [51]:
emb_size = 8

In [52]:
def define_model(num_tokens,max_tokens):
  model = Sequential()
  model.add(Embedding(input_dim = num_tokens,output_dim = emb_size,input_length = max_tokens,name = 'layer_embedding'))
  model.add(GRU(units = 16,name = "gru_1",return_sequences=True))
  model.add(GRU(units = 8,name = "gru_2",return_sequences=True))
  model.add(GRU(units = 4,name = "gru_3"))
  model.add(Dense(1,activation = 'sigmoid',name = 'dense_1'))
  optimizer = Adam(lr=1e-3)
  model.compile(loss = 'binary_crossentropy',optimizer=optimizer,metrics = ['accuracy'])
  print(model.summary())
  return model


In [53]:
model = define_model(num_token,max_tokens)



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 layer_embedding (Embedding)  (None, 40, 8)            3616      
                                                                 
 gru_1 (GRU)                 (None, 40, 16)            1248      
                                                                 
 gru_2 (GRU)                 (None, 40, 8)             624       
                                                                 
 gru_3 (GRU)                 (None, 4)                 168       
                                                                 
 dense_1 (Dense)             (None, 1)                 5         
                                                                 
Total params: 5,661
Trainable params: 5,661
Non-trainable params: 0
_________________________________________________________________
None


In [55]:
epochs = 10
val_split = 0.05
batch_size = 32
emb_size = 8

In [56]:
def train_model(model,input_seq,y_train):
  model.fit(input_seq,y_train,validation_split=0.05,epochs = epochs,batch_size = batch_size)
  return model

In [57]:

 model1 = train_model(model,input_sequences,np.array(value))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [58]:
import pandas as pd

In [68]:
def test_model(model,token_idx,max_tokens):
  txt = ["i like that movie","awesome movies","Terrible movie","Story line is fine but fight sequences are very poor than Gadar 1. If they had worked well on fight sequence I would have given 5 stars..in any case it is already hit with 300cr... so no point at this time","it was disgusting movie but interesting movie","It's great and heart touching movie. Everyone should watch .Lovely love story and amazing action."]
  pred = model.predict(create_sequences(txt,token_idx,max_tokens))
  pred = [pred[i][0] for i in range(len(txt))]
  output_df = pd.DataFrame({"review text":txt,"prediction":pred})
  output_df = output_df.loc[:,['review text','prediction']]
  print(output_df)

In [69]:
test_model(lo_model,token_idx,max_tokens)

                                         review text  prediction
0                                  i like that movie    0.899391
1                                     awesome movies    0.997773
2                                     Terrible movie    0.997761
3  Story line is fine but fight sequences are ver...    0.002620
4      it was disgusting movie but interesting movie    0.994968
5  It's great and heart touching movie. Everyone ...    0.002631


In [70]:
from tensorflow.python.keras.models import save_model

In [71]:
def save_model(model,path):
  model.save(path)

In [72]:
save_model(model1,"/content/model1.h5")

In [77]:
!mkdir model

In [78]:
!tensorflowjs_converter --input_format keras model.h5 model/

/bin/bash: line 1: tensorflowjs_converter: command not found


In [73]:
token_idx


{'people': 0,
 'kind': 1,
 'combining': 2,
 'story': 3,
 'dads': 4,
 'evil': 5,
 'wotshisface': 6,
 'the': 7,
 'throat': 8,
 'thats': 9,
 'is': 10,
 'betterwe': 11,
 'likes': 12,
 'stupid': 13,
 'take': 14,
 'acceptable': 15,
 'enjoy': 16,
 'demons': 17,
 'finished': 18,
 'does': 19,
 'dont': 20,
 'hear': 21,
 'count': 22,
 'suicides': 23,
 'kinda': 24,
 'only': 25,
 'ass': 26,
 'until': 27,
 'fun': 28,
 'has': 29,
 'bobbypin': 30,
 'black': 31,
 'tom': 32,
 'there': 33,
 'been': 34,
 'those': 35,
 'awesome': 36,
 'fan': 37,
 'money': 38,
 'okay': 39,
 'our': 40,
 'from': 41,
 'just': 42,
 'might': 43,
 'awful': 44,
 'homosexuality': 45,
 'do': 46,
 'zen': 47,
 'terrible': 48,
 'can': 49,
 'won': 50,
 'ever': 51,
 'personally': 52,
 'community': 53,
 'never': 54,
 'go': 55,
 'while': 56,
 'escapades': 57,
 'course': 58,
 'almost': 59,
 'wanted': 60,
 'big': 61,
 'mom': 62,
 'amazing': 63,
 'likeyeah': 64,
 'felicias': 65,
 'outshines': 66,
 'eyre': 67,
 'didnt': 68,
 'being': 69,
 'the

In [74]:
import csv

In [75]:
def create_csv(token_idx,path):
    with open(path, 'w') as csvfile:
        writer = csv.writer(csvfile)
        for key in token_idx.keys():
            writer.writerow([key,token_idx[key]])

In [76]:
create_csv(token_idx,"/content/token_idx.csv")

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("/content/token_idx.csv")

In [None]:
max(token_idx.values())

451

In [None]:
token_idx['all']

393

In [None]:
!npx create-react-app my-app

[K[?25hnpx: installed 67 in 6.493s

Creating a new React app in [32m/content/my-app[39m.

Installing packages. This might take a couple of minutes.
Installing [36mreact[39m, [36mreact-dom[39m, and [36mreact-scripts[39m with [36mcra-template[39m...

[K[?25h
> core-js@3.32.1 postinstall /content/my-app/node_modules/core-js
> node -e "try{require('./postinstall')}catch(e){}"


> core-js-pure@3.32.1 postinstall /content/my-app/node_modules/core-js-pure
> node -e "try{require('./postinstall')}catch(e){}"

[K[?25h+ cra-template@1.2.0
+ react-scripts@5.0.1
+ react@18.2.0
+ react-dom@18.2.0
added 1452 packages from 629 contributors in 72.945s

241 packages are looking for funding
  run `npm fund` for details


Initialized a git repository.

Installing template dependencies using npm...
[K[?25h[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m @apideck/better-ajv-errors@0.3.6 requires a peer of ajv@>=8 but none is installed. You must install peer dependencies yourself.
[0m[37;40m

In [None]:
d = pd.read_csv('/content/testdata.txt')

NameError: ignored

In [None]:
import pickle as pkl
pkl.dump(model1, 'my_model_knn.pkl.pkl')

TypeError: ignored

In [None]:
model1.save('/content/model.h5')

In [1]:
from tensorflow.keras.models import load_model

In [2]:
lo_model = load_model("/content/model.h5")

In [3]:
lo_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 layer_embedding (Embedding)  (None, 40, 8)            3616      
                                                                 
 gru_1 (GRU)                 (None, 40, 16)            1248      
                                                                 
 gru_2 (GRU)                 (None, 40, 8)             624       
                                                                 
 gru_3 (GRU)                 (None, 4)                 168       
                                                                 
 dense_1 (Dense)             (None, 1)                 5         
                                                                 
Total params: 5,661
Trainable params: 5,661
Non-trainable params: 0
_________________________________________________________________
