In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Dropout, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV
from sklearn import metrics
import pickle

In [None]:
data = pd.read_csv('Train.csv')  # Replace with your dataset

In [None]:
# Preprocessing for Logistic Regression
vectorizer = TfidfVectorizer(strip_accents=None, lowercase=False, preprocessor=None, use_idf=True, norm='l2', smooth_idf=True)
data = data[['text', 'label']].dropna()
data['label'] = data['label'].astype(int)

X_tfidf = vectorizer.fit_transform(data['text'])
y = data['label'].values

# Train-Test Split for Logistic Regression
X_train_lr, X_test_lr, y_train_lr, y_test_lr = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

# Train Logistic Regression Model
clf = LogisticRegressionCV(cv=6, scoring='accuracy', random_state=0, n_jobs=-1, verbose=3, max_iter=500)
clf.fit(X_train_lr, y_train_lr)

In [None]:
# Evaluate Logistic Regression Model
y_pred_lr = clf.predict(X_test_lr)
print("Logistic Regression Accuracy:", metrics.accuracy_score(y_test_lr, y_pred_lr))

# Save Logistic Regression Model
pickle.dump(clf, open('clf.pkl', 'wb'))
pickle.dump(vectorizer, open('tfidf.pkl', 'wb'))

# Tokenization and Padding for LSTM
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['text'])
sequences = tokenizer.texts_to_sequences(data['text'])
max_sequence_length = 100
X_lstm = pad_sequences(sequences, maxlen=max_sequence_length)

# Train-Test Split for LSTM
X_train_lstm, X_test_lstm, y_train_lstm, y_test_lstm = train_test_split(X_lstm, y, test_size=0.2, random_state=42)

In [None]:
# Define LSTM Model
model = Sequential([
    Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128, input_length=max_sequence_length),
    Bidirectional(LSTM(64, return_sequences=True)),
    Dropout(0.2),
    LSTM(32),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(len(set(y)), activation='softmax')  # Adjust output units based on the number of labels
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train LSTM Model
model.fit(X_train_lstm, y_train_lstm, validation_data=(X_test_lstm, y_test_lstm), epochs=5, batch_size=32)

# Save LSTM Model and Tokenizer
model.save('lstm_emotion_model.h5')
with open('tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

In [None]:
# Prediction Function for Logistic Regression
def predict_with_lr(comment):
    preprocessed_comment = [comment]  # Input comment as a list
    vectorized_comment = vectorizer.transform(preprocessed_comment)
    prediction = clf.predict(vectorized_comment)
    return "Positive" if prediction[0] == 1 else "Negative"

# Prediction Function for LSTM
def predict_with_lstm(comment):
    processed_comment = pad_sequences(tokenizer.texts_to_sequences([comment]), maxlen=max_sequence_length)
    prediction = model.predict(processed_comment)
    emotion_index = np.argmax(prediction)
    confidence = prediction[0][emotion_index]
    return emotion_index, confidence

In [None]:
# Example Usage
if _name_ == "_main_":
    test_comment = "I love this product!"

    # Logistic Regression Prediction
    lr_result = predict_with_lr(test_comment)
    print(f"Logistic Regression Prediction: {lr_result}")

    # LSTM Prediction
    lstm_result, lstm_confidence = predict_with_lstm(test_comment)
    print(f"LSTM Prediction: Emotion Index {lstm_result}, Confidence {lstm_confidence}")