In [None]:
import numpy as np
import pandas as pd
import nltk
import re
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
nltk.download('stopwords')

In [None]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout

In [None]:
df = pd.read_csv("train.csv")

In [None]:
df.head()

In [None]:
df = df.dropna()

In [None]:
X = df.drop('label',axis = 1)
y = df['label']

In [None]:
X.head()

In [None]:
y.head()

In [None]:
X.shape

In [None]:
y.shape

In [None]:
voc_size = 5000

In [None]:
df = X.copy()

df.reset_index(inplace = True)

In [None]:
df.head()

In [None]:
df['title'][1]

In [None]:
df['text'][1]

In [None]:
ps = PorterStemmer()
corpus = []

In [None]:
for i in range(len(df)):
    review = re.sub('[^a-zA-Z]', '', df['title'][i])
    review = review.lower()
    review = review.split()
    review = [ps.stem(word) for word in review if word not in set(stopwords.words('english'))]
    review = ''.join(review)
    corpus.append(review)

In [None]:
print(corpus)

In [None]:
onehot_repr=[one_hot(words,voc_size)for words in corpus] 
onehot_repr

In [None]:
sent_length = 20

In [None]:
embedded_docs = pad_sequences(onehot_repr,padding = 'pre',maxlen=sent_length)

embedded_docs

In [None]:
model = Sequential()

model.add(Embedding(voc_size,40,input_length=sent_length))
model.add(Dropout(0.3))

model.add(LSTM(400))
model.add(Dropout(0.3))

model.add(Dense(1,activation='sigmoid'))
model.add(Dropout(0.5))

In [None]:
model.compile(loss='binary_crossentropy',optimizer='Adam',metrics=['accuracy'])

print(model.summary())

In [None]:
X_final = np.array(embedded_docs)
y_final=np.array(y)

In [None]:
X_final.shape

In [None]:
y_final.shape

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X_final,y_final,test_size=0.2,random_state=1)

In [None]:
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs = 25,batch_size = 64)

In [None]:
y_pred = model.predict(X_test)

print(y_pred)

In [None]:
from sklearn.metrics import accuracy_score

acc = accuracy_score(np.round(abs(y_pred)),y_test)

print(acc)

# Bi-Directional LSTMRNN

In [None]:
model = Sequential()

model.add(Embedding(voc_size,40,input_length=sent_length))
model.add(Dropout(0.3))

model.add(Bidirectional(LSTM(100)))
model.add(Dropout(0.3))

model.add(Dense(1,activation='sigmoid'))
model.add(Dropout(0.5))

In [None]:
model.compile(loss='binary_crossentropy',optimizer='Adam',metrics=['accuracy'])

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X_final,y_final,test_size=0.2,random_state=1)

In [None]:
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs = 25,batch_size = 64)

In [None]:
y_pred = model.predict(X_test)

print(y_pred)

In [None]:
from sklearn.metrics import accuracy_score

acc = accuracy_score(np.round(abs(y_pred)),y_test)

print(acc)