## Importing Dataset


In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense

## Uploading Dataset

In [3]:
dataset = pd.read_csv('/content/Test.csv')
X = dataset['text'].values
y = dataset['label'].values

## Tokenization and Padding

In [4]:
max_words = 1000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X)
X = tokenizer.texts_to_sequences(X)
maxlen = 50
X = pad_sequences(X, maxlen=maxlen)

## Splitting TestSet and TrainingSet

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Using LSTM

In [6]:
from keras import Sequential
from keras.layers import Dense,SimpleRNN,Embedding,Flatten
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from keras.optimizers import Adam

In [12]:
model = Sequential()
model.add(Embedding(max_words, 128, input_length=maxlen))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))


model.compile(loss='binary_crossentropy',
              optimizer=Adam(learning_rate=0.001),
              metrics=['accuracy'])


model.fit(X_train, y_train,
          batch_size=32,
          epochs=3,
          validation_data=(X_test, y_test),
          verbose=2)

Epoch 1/3
125/125 - 20s - loss: 0.6286 - accuracy: 0.6518 - val_loss: 0.5029 - val_accuracy: 0.7660 - 20s/epoch - 159ms/step
Epoch 2/3
125/125 - 15s - loss: 0.4649 - accuracy: 0.7812 - val_loss: 0.4811 - val_accuracy: 0.7760 - 15s/epoch - 123ms/step
Epoch 3/3
125/125 - 17s - loss: 0.3866 - accuracy: 0.8328 - val_loss: 0.4925 - val_accuracy: 0.7670 - 17s/epoch - 137ms/step


<keras.src.callbacks.History at 0x795a2dd7a590>

## Using Simple RNN

In [8]:
model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=2, input_length=50))
model.add(SimpleRNN(32,return_sequences=False))
model.add(Dense(1, activation='sigmoid'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 50, 2)             20000     
                                                                 
 simple_rnn (SimpleRNN)      (None, 32)                1120      
                                                                 
 dense_1 (Dense)             (None, 1)                 33        
                                                                 
Total params: 21153 (82.63 KB)
Trainable params: 21153 (82.63 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [10]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
history = model.fit(X_train, y_train,epochs=5,validation_data=(X_test,y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [13]:
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)

Test Accuracy: 0.7670000195503235


In [14]:
def predict_sentiment(sentence):
    sequence = tokenizer.texts_to_sequences([sentence])
    padded_sequence = pad_sequences(sequence, maxlen=maxlen)
    prediction = model.predict(padded_sequence)[0][0]
    if prediction >= 0.5:
        sentiment = "Positive"
    else:
        sentiment = "Negative"
    return sentiment

# Example usage
sentence = "Mohan is good boy!"
predicted_sentiment = predict_sentiment(sentence)
print("Predicted Sentiment:", predicted_sentiment)


Predicted Sentiment: Positive
