In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense,Dropout
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS

In [3]:
combined_df=pd.read_csv('/content/drive/MyDrive/Project_Dataset(Amazon)/combined.csv')

In [4]:
combined_df['class'].value_counts()

1    900000
0    900000
Name: class, dtype: int64

In [5]:
X = combined_df['reviewText']
y = combined_df['class']

In [6]:
def preprocess_text(text):
    # Remove stopwords
    cleaned_text = ' '.join([word for word in text.split() if word.lower() not in ENGLISH_STOP_WORDS])
    # Clean the text further as needed (e.g., removing punctuation, lowering, stemming, etc.)
    cleaned_text = cleaned_text.lower()
    # Return cleaned text
    return cleaned_text


In [7]:
X = [preprocess_text(text) for text in X]

In [8]:
# Tokenization
max_words = 10000
max_len = 100
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X)
X = tokenizer.texts_to_sequences(X)
X= pad_sequences(X, maxlen=max_len)

In [9]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# Define your model
embedding_dim = 50  # Dimension of the word embeddings
gru_units = 64  # Number of units in the GRU layer

model = Sequential([
    Embedding(input_dim=len(tokenizer.word_index)+1,output_dim=128, input_length=max_len),
    GRU(gru_units, return_sequences=True),
    GRU(gru_units),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

In [11]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [12]:
# Train the model
history = model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_test, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [17]:
model.save('/content/drive/MyDrive/GRU_model_01.keras')

In [18]:
y_pred = model.predict(X_test)



In [21]:
y_pred

array([[9.6605521e-01],
       [8.8257205e-01],
       [1.2397398e-02],
       ...,
       [2.1402750e-04],
       [6.2543184e-02],
       [4.6249591e-03]], dtype=float32)