In [14]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout
import pickle5 as pickle

In [None]:
dataset = pd.read_csv('../datasets/review.csv')
print(dataset.head())

In [None]:
dataset = dataset[['Review', 'Label']]
dataset['sentiment'] = dataset['Label'].apply(lambda x: 'P' if x == 2 else 'N' if x == 1 else 'E')
dataset = dataset[['Review', 'sentiment']]
dataset = dataset.sample(frac=1).reset_index(drop=True)

In [None]:
tokenizer = Tokenizer(num_words=5000, oov_token='')
tokenizer.fit_on_texts(dataset['Review'])
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(dataset['Review'])
padded_sequences = pad_sequences(sequences, maxlen=100, truncating='post')

In [None]:
# Convert the sentiment labels to one-hot encoding
sentiment_labels = pd.get_dummies(dataset['sentiment']).values

In [None]:
# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(padded_sequences, sentiment_labels, test_size=0.2)

In [None]:
# Build the model
model = Sequential()
model.add(Embedding(5000, 100, input_length=100))
model.add(Conv1D(64, 5, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Train the model
model.fit(x_train, y_train, epochs=32, batch_size=32, validation_data=(x_test, y_test))

# Evaluate the model
y_pred = np.argmax(model.predict(x_test), axis=-1)
print("Accuracy:", accuracy_score(np.argmax(y_test, axis=-1), y_pred))

In [None]:
# Save the trained model
model.save('review_anal_model_v2.h5')
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
# Load the saved model and tokenizer
import keras

model = keras.models.load_model('review_anal_model_v2.h5')
with open('tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

In [15]:
# Define a function to predict the sentiment of input text
def predict_sentiment(text):
    # Tokenize and pad the input text
    text_sequence = tokenizer.texts_to_sequences([text])
    text_sequence = pad_sequences(text_sequence, maxlen=100)

    # Make a prediction using the trained model
    predicted_rating = model.predict(text_sequence)[0]
    if np.argmax(predicted_rating) == 0:
        return 'Negative' + str(predicted_rating)
    else:
        return 'Positive' + str(predicted_rating)

In [17]:
# Example usage
text_input = "These are really good"
predicted_sentiment = predict_sentiment(text_input)
print(predicted_sentiment)

Positive[0.00206424 0.9979358 ]
