In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Embedding, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import classification_report, confusion_matrix

# Load the dataset
file_path = r"C:\Users\adars\OneDrive\Desktop\ExcelR Assignment\06\Sentiment.csv"  # Use a raw string (r"") or escape backslashes
df = pd.read_csv(file_path)

# Preprocessing
df = df[['text', 'sentiment']]  # Keep only relevant columns
df['sentiment'] = df['sentiment'].map({'Positive': 1, 'Negative': 0, 'Neutral':2}) # Map sentiment labels to numerical values. Added Neutral
df = df.dropna()  # Remove rows with missing values

# Prepare data for the model
X = df['text']
y = df['sentiment']

# Tokenization
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>") # Adjust vocab size as needed
tokenizer.fit_on_texts(X)
sequences = tokenizer.texts_to_sequences(X)

# Padding
max_len = 100 # Adjust sequence length as needed
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post', truncating='post')

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, y, test_size=0.2, random_state=42)

# Build the GRU model
model = Sequential()
model.add(Embedding(5000, 64, input_length=max_len)) # Embedding layer
model.add(GRU(64, return_sequences=True)) # GRU layer
model.add(Dropout(0.2)) # Dropout for regularization
model.add(GRU(32)) # Another GRU layer
model.add(Dropout(0.2)) # Dropout for regularization
model.add(Dense(3, activation='softmax')) # Output layer with softmax for multi-class classification. 3 outputs for Pos, Neg, Neutral

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # Use sparse_categorical_crossentropy for integer labels

# Train the model
history = model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_test, y_test)) # Adjust epochs and batch size

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Loss: {loss}")
print(f"Accuracy: {accuracy}")

# Make predictions
predictions = model.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1) # Get predicted labels

# Print classification report and confusion matrix
print(classification_report(y_test, predicted_labels))
cm = confusion_matrix(y_test, predicted_labels)
print(cm)


# Example of making predictions on new text:
def predict_sentiment(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_len, padding='post', truncating='post')
    prediction = model.predict(padded_sequence)
    predicted_label = np.argmax(prediction)
    sentiment_labels = {0: "Negative", 1: "Positive", 2:"Neutral"} # Define label mapping
    return sentiment_labels[predicted_label]

new_text = "This movie was absolutely fantastic!"
predicted_sentiment = predict_sentiment(new_text)
print(f"Predicted sentiment for '{new_text}': {predicted_sentiment}")


new_text = "This movie was terrible and boring."
predicted_sentiment = predict_sentiment(new_text)
print(f"Predicted sentiment for '{new_text}': {predicted_sentiment}")

new_text = "The movie was okay."
predicted_sentiment = predict_sentiment(new_text)
print(f"Predicted sentiment for '{new_text}': {predicted_sentiment}")

: 