<a href="https://colab.research.google.com/github/Christynopal/Speech-and-Image-Processing/blob/main/Untitled5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout, Bidirectional
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.callbacks import EarlyStopping

# Load external data
data_path = '/content/test.csv'
df = pd.read_csv(data_path, encoding='latin-1')  # Update with the path to your dataset


# Inspect unique values in the 'sentiment' column
print(df['sentiment'].unique())

# Map sentiment labels to integers
label_mapping = {
    'negative': 0,
    'neutral': 1,
    'positive': 2
}
df['sentiment'] = df['sentiment'].map(label_mapping)

# Check for any non-mapped values
print(df['sentiment'].unique())

# Handle missing values
df['text'] = df['text'].astype(str)  # Convert all texts to string type
df['sentiment'] = df['sentiment'].fillna(0)  # Fill missing sentiment values with 0 (or appropriate value)

# Check for any non-string values in the text column
df['text'] = df['text'].apply(lambda x: x if isinstance(x, str) else '')

texts = df['text'].values
labels = df['sentiment'].values

# Preprocess text data
max_words = 20000  # Maximum number of words to consider in the vocabulary
max_len = 300     # Maximum length of each sequence

# Tokenizer to convert text to sequences of integers
tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
tokenizer.fit_on_texts(texts)

# Convert text to sequences of integers
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences to ensure they are all the same length
X = pad_sequences(sequences, maxlen=max_len, padding='post', truncating='post')

# Convert labels to numpy array
y = np.array(labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the GRU model
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=128, input_length=max_len))
model.add(Bidirectional(GRU(128, return_sequences=True)))
model.add(Dropout(0.5))
model.add(Bidirectional(GRU(64)))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))  # Output layer for multi-class classification

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=64, validation_split=0.2, callbacks=[early_stopping])

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Accuracy: {accuracy}')

# Predict and get classification report
y_pred = np.argmax(model.predict(X_test), axis=-1)  # Convert probabilities to class labels
print(classification_report(y_test, y_pred))

['neutral' 'positive' 'negative' nan]
[ 1.  2.  0. nan]
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Accuracy: 0.6625129580497742
              precision    recall  f1-score   support

         0.0       0.83      0.73      0.78       484
         1.0       0.47      0.54      0.50       268
         2.0       0.61      0.65      0.63       211

    accuracy                           0.66       963
   macro avg       0.64      0.64      0.64       963
weighted avg       0.68      0.66      0.67       963



In [20]:
# Save the model after training
model.save('sentiment_gru_model.h5')


  saving_api.save_model(


In [21]:
from tensorflow.keras.models import load_model
# Load the trained model
model = load_model('sentiment_gru_model.h5')


In [23]:
def preprocess_text(text, tokenizer, max_len):
    # Convert text to sequences of integers
    sequence = tokenizer.texts_to_sequences([text])
    # Pad sequences to ensure they are all the same length
    padded_sequence = pad_sequences(sequence, maxlen=max_len, padding='post', truncating='post')
    return padded_sequence


In [24]:
def predict_sentiment(text, model, tokenizer, max_len):
    # Preprocess the text
    preprocessed_text = preprocess_text(text, tokenizer, max_len)
    # Make prediction
    prediction = model.predict(preprocessed_text)
    # Get the class with the highest probability
    predicted_class = np.argmax(prediction, axis=-1)
    return predicted_class[0]


In [27]:
from tensorflow.keras.models import load_model
import joblib

# Load the trained model
model = load_model('/content/sentiment_gru_model.h5')




In [32]:
# Example new text for classification
new_text = "THAT IS BETTER"

# Predict sentiment
predicted_class = predict_sentiment(new_text, model, tokenizer, max_len)

# Map integer class back to sentiment label (adjust this mapping as needed)
class_mapping = {
    0: 'negative',
    1: 'neutral',
    2: 'positive'
}

predicted_sentiment = class_mapping.get(predicted_class, 'Unknown')
print(f'The predicted sentiment is: {predicted_sentiment}')


The predicted sentiment is: neutral
