1. Data Pre-processing

Read the data

In [412]:
import pandas as pd

df = pd.read_csv('../dataset/amazon_datasets.csv')

df.shape

(4574, 6)

Clean Review

In [413]:
import re
import nltk
from nltk.corpus import stopwords
# nltk.download('stopwords')

stop_words = set(stopwords.words('english'))

# General Preprocessing (normalization, spaces)
def basic_preprocess(text):
    if not isinstance(text, str):
        return ""
    text = text.lower() # Convert text to lowercase
    text = re.sub(r'\d+', '', text) # Remove numbers
    text = re.sub(r'\s+', ' ', text).strip() # Remove extra spaces
    return text


# VADER Preprocessing
def preprocess_vader(text):
    if not isinstance(text, str):
        return ""
    return basic_preprocess(text)


# Transformer Models Preprocessing (keep meaningful punctuation)
def preprocess_transformers(text):
    if not isinstance(text, str):
        return ""
    text = basic_preprocess(text)
    text = re.sub(r'[^\w\s,!?]', '', text) # Remove punctuation except meaningful ones
    return text


# Deep Learning Models (CNN, LSTM) Preprocessing (Remove punctuation)
def preprocess_dl_models(text):
    if not isinstance(text, str):
        return ""
    text = basic_preprocess(text)
    text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
    return text


# SVM Preprocessing (Remove punctuation, and stop words)
def preprocess_svm(text):
    if not isinstance(text, str):
        return ""
    text = basic_preprocess(text)
    text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
    text = ' '.join([word for word in text.split() if word not in stop_words]) # Remove stop words
    return text


# Assign Sentiment on the basis of customer ratings
def assign_sentiment(rating):
    if rating >= 4:
        return 1
    elif rating == 3:
        return 0
    else:
        return -1

Applyin Preprocess

In [414]:
df['vader']         =   df['review'].apply(preprocess_vader)
df['transformers']  =   df['review'].apply(preprocess_transformers)
df['cnn_lstm']      =   df['review'].apply(preprocess_dl_models)
df['svm']           =   df['review'].apply(preprocess_svm)
df['sentiment']     =   df['rating'].apply(assign_sentiment)

df.to_csv('clean_datasets.csv', index=False)

df = df.head(500)
df.head()
# df.shape

Unnamed: 0,ID,product_title,user_name,rating,review,review_date,vader,transformers,cnn_lstm,svm,sentiment
0,1,OnePlus Nord N30,forest,5,I bought this phone at the recommendation of a...,"May 19, 2024",i bought this phone at the recommendation of a...,i bought this phone at the recommendation of a...,i bought this phone at the recommendation of a...,bought phone recommendation friend happy im so...,1
1,2,OnePlus Nord N30,Drew,5,I have this phone for a few months now and for...,"July 26, 2024",i have this phone for a few months now and for...,i have this phone for a few months now and for...,i have this phone for a few months now and for...,phone months price great phone looking somethi...,1
2,3,OnePlus Nord N30,forest,4,I like that this phone has a good battery life...,"January 2, 2024",i like that this phone has a good battery life...,i like that this phone has a good battery life...,i like that this phone has a good battery life...,like phone good battery life charges superfast...,1
3,4,OnePlus Nord N30,Amazon Customer,5,Short version: I got this for my mom since she...,"August 10, 2024",short version: i got this for my mom since she...,short version i got this for my mom since she ...,short version i got this for my mom since she ...,short version got mom since habit buying cheap...,1
4,5,OnePlus Nord N30,C Jack,5,I have a Samsung s22 Ultra. I've been having b...,"May 27, 2024",i have a samsung s ultra. i've been having bat...,i have a samsung s ultra ive been having batte...,i have a samsung s ultra ive been having batte...,samsung ultra ive battery connectivity issues ...,1


In [415]:
# df['clean_review']  =   df['review'].apply(preprocess_data)
# df['sentiment']     =   df['rating'].apply(assign_sentiment)

# df.to_csv('clean_datasets.csv', index=False)

# df = df.head(500)
# df.shape

Split Data Into Train and Test Set

In [416]:
from sklearn.model_selection import train_test_split

X_text_train, X_text_test, X_vader_train, X_vader_test, X_transformers_train, X_transformers_test, X_cnn_lstm_train, X_cnn_lstm_test, X_svm_train, X_svm_test, y_train, y_test = train_test_split(
    df['review'], df['vader'], df['transformers'], df['cnn_lstm'], df['svm'], df['sentiment'], test_size=0.2, random_state=42)

3. Initializing Models

Transformer-Based Models: ( RoBERTa, BERT )

In [419]:
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification, BertTokenizer, BertForSequenceClassification
# from transformers import RobertaForSequenceClassification, BertForSequenceClassification, AutoTokenizer

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def predict_sentiment(positive, negative, neutral):
    if positive > negative and positive > neutral:
        return positive 
    elif negative > positive and negative > neutral:
        return -negative 
    else:
        return positive - negative

def preprocess_transformers(texts, tokenizer, max_length=128):
    if tokenizer.pad_token is None:
        tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=max_length)
    return inputs

# Load Pre-Train Models
roberta_tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
roberta_model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')

bert_tokenizer = BertTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
bert_model = BertForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')


# Roberta Model
def roberta_sentiment(text):
    roberta_inputs = preprocess_transformers([text], roberta_tokenizer)
    roberta_inputs = {key: val.to(device) for key, val in roberta_inputs.items()}  
    with torch.no_grad():
        roberta_output = roberta_model(**roberta_inputs).logits
    probabilities = F.softmax(roberta_output, dim=-1)
    positive_class_prob = probabilities[0, 1].item()
    negative_class_prob = probabilities[0, 0].item()
    neutral_class_prob = probabilities[0, 2].item()
    return predict_sentiment(positive_class_prob, negative_class_prob, neutral_class_prob)


def bert_sentiment(text):
    bert_inputs = preprocess_transformers([text], bert_tokenizer)
    bert_inputs = {key: val.to(device) for key, val in bert_inputs.items()}
    with torch.no_grad():
        bert_output = bert_model(**bert_inputs).logits
    probabilities = F.softmax(bert_output, dim=-1)
    positive_class_prob = probabilities[0, 1].item()
    negative_class_prob = probabilities[0, 0].item()
    neutral_class_prob = probabilities[0, 2].item()
    return predict_sentiment(positive_class_prob, negative_class_prob, neutral_class_prob)
    



Lexicon-Based Approaches: ( VADER, TextBlob)

In [420]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from textblob import TextBlob

vader_analyzer = SentimentIntensityAnalyzer()

# Vader Model 
def vader_sentiment(text):
    return vader_analyzer.polarity_scores(text)['compound']


# TextBlob Model 
def textblob_sentiment(text):
    polarity = TextBlob(text).sentiment.polarity
    positive_prob = (polarity + 1) / 2  
    negative_prob = 1 - positive_prob
    return positive_prob - negative_prob
    # return TextBlob(text).sentiment.polarity

Machine Learning Model: SVM with TF-IDF

In [421]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
# import numpy as np

# SVM Model definition using TF-IDF Vectorizer
svm_model = make_pipeline(TfidfVectorizer(max_features=5000), SVC(probability=True))

# Training the SVM model
svm_model.fit(X_svm_train, y_train)

# SVM Sentiment Prediction function
def svm_sentiment(text):
    probabilities = svm_model.predict_proba([text])[0]
    
    negative_prob = probabilities[0]
    neutral_prob = probabilities[1]
    positive_prob = probabilities[2]
    return positive_prob - negative_prob

Deep Learning Models: (LSTM, CNN)

In [422]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Parameters
vocab_size = 10000  # Vocabulary size
max_length = 100  # Max length for input sequences
embedding_dim = 100  # Embedding vector size

# Tokenization and padding
tokenizer = Tokenizer(num_words=vocab_size, oov_token='<OOV>')
tokenizer.fit_on_texts(X_cnn_lstm_train)
X_train_seq = tokenizer.texts_to_sequences(X_cnn_lstm_train)
X_train_pad = pad_sequences(X_train_seq, maxlen=max_length, padding='post')


# lstm Model 
def build_lstm():
    model = Sequential()
    model.add(Embedding(vocab_size, embedding_dim))
    model.add(LSTM(128, return_sequences=False))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


# Create and train the LSTM model
lstm_model = build_lstm()
lstm_model.fit(X_train_pad, np.array(y_train), epochs=5, batch_size=32, validation_split=0.2)

# LSTM Sentiment Prediction
def lstm_sentiment(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_length, padding='post')
    positive_prob = lstm_model.predict(padded_sequence)[0][0]
    negative_prob = 1 - positive_prob
    return positive_prob - negative_prob

Epoch 1/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 158ms/step - accuracy: 0.8030 - loss: 0.6408 - val_accuracy: 0.7750 - val_loss: 0.6172
Epoch 2/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 80ms/step - accuracy: 0.8237 - loss: 0.5515 - val_accuracy: 0.7750 - val_loss: 0.6147
Epoch 3/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 90ms/step - accuracy: 0.8330 - loss: 0.5278 - val_accuracy: 0.7750 - val_loss: 0.6289
Epoch 4/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 92ms/step - accuracy: 0.8554 - loss: 0.4938 - val_accuracy: 0.7750 - val_loss: 0.6204
Epoch 5/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 114ms/step - accuracy: 0.8085 - loss: 0.5083 - val_accuracy: 0.7750 - val_loss: 0.6165


In [423]:
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten

# CNN Model
def build_cnn():
    model = Sequential()
    model.add(Embedding(vocab_size, embedding_dim))
    model.add(Conv1D(128, 5, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Create and train the CNN model
cnn_model = build_cnn()
cnn_model.fit(X_train_pad, np.array(y_train), epochs=5, batch_size=32, validation_split=0.2)

# Function for predicting sentiment using CNN
def cnn_sentiment(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_length, padding='post')
    positive_prob = cnn_model.predict(padded_sequence)[0][0]
    negative_prob = 1 - positive_prob
    return positive_prob - negative_prob

Epoch 1/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 830ms/step - accuracy: 0.8046 - loss: 0.6324 - val_accuracy: 0.7750 - val_loss: 0.6571
Epoch 2/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.8064 - loss: 0.5544 - val_accuracy: 0.7750 - val_loss: 0.6334
Epoch 3/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.8130 - loss: 0.4820 - val_accuracy: 0.7750 - val_loss: 0.6354
Epoch 4/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.8054 - loss: 0.4463 - val_accuracy: 0.7750 - val_loss: 0.6353
Epoch 5/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.8231 - loss: 0.3448 - val_accuracy: 0.7750 - val_loss: 0.6230


Hybrid Model

In [424]:
def hybrid_model(text, vader_text, transformers_text, cnn_lstm_text, svm_text):
    vader_score = vader_sentiment(vader_text)
    roberta_score = roberta_sentiment(transformers_text)
    bert_score = bert_sentiment(transformers_text)
    textblob_score = textblob_sentiment(text)
    svm_score = svm_sentiment(svm_text)
    cnn_score = cnn_sentiment(cnn_lstm_text)
    lstm_score= lstm_sentiment(cnn_lstm_text)
    
    # Combine the predictions (averaging for simplicity)
    hybrid_score = np.mean([vader_score, roberta_score, bert_score, textblob_score, svm_score])


    # VADER
    if vader_score > 0.1:
        sentiment_vader = "Positive"
    elif vader_score < -0.1:
        sentiment_vader = "Negative"
    else:
        sentiment_vader = "Neutral"

    # TextBlob
    if textblob_score > 0:
        sentiment_textblob = "Positive"
    elif textblob_score < 0:
        sentiment_textblob = "Negative"
    else:
        sentiment_textblob = "Neutral"

    # RoBERTa
    if roberta_score > 0.1:
        sentiment_roberta = "Positive"
    elif roberta_score < -0.1:
        sentiment_roberta = "Negative"
    else:
        sentiment_roberta = "Neutral"

    # BERT
    if bert_score > 0.1:
        sentiment_bert = "Positive"
    elif bert_score < -0.1:
        sentiment_bert = "Negative"
    else:
        sentiment_bert = "Neutral"

    # CNN
    if cnn_score > 0.55:
        sentiment_cnn = "Positive"
    elif cnn_score < 0.45:
        sentiment_cnn = "Negative"
    else:
        sentiment_cnn = "Neutral"

    # LSTM
    if lstm_score > 0.55:
        sentiment_lstm = "Positive"
    elif lstm_score < 0.45:
        sentiment_lstm = "Negative"
    else:
        sentiment_lstm = "Neutral"
    
    # SVM
    if svm_score > 0.1:
        sentiment_svm = "Positive"
    elif svm_score < -0.1:
        sentiment_svm = "Negative"
    else:
        sentiment_svm = "Neutral"

    # Average score
    if hybrid_score > 0.1:
        sentiment_hybrid = "Positive"
    elif hybrid_score < -0.1:
        sentiment_hybrid = "Negative"
    else:
        sentiment_hybrid = "Neutral"

    comparison = {
        "text": text,
        "vader_score":  vader_score,
        "textblob_score":  textblob_score,
        "roberta_score":  roberta_score,
        "bert_score":  bert_score,
        "cnn_score":  cnn_score,
        "lstm_score":  lstm_score,
        "svm_score":  svm_score,
        "hybrid_score":  hybrid_score,
        'vader_sentiment': sentiment_vader,
        'textblob_sentiment': sentiment_textblob,
        'roberta_sentiment': sentiment_roberta,
        'bert_sentiment': sentiment_bert,
        'cnn_sentiment': sentiment_cnn,
        'lstm_sentiment': sentiment_lstm,
        'svm_sentiment': sentiment_svm,
        'hybrid_sentiment': sentiment_hybrid,
    }


    return comparison

In [425]:
df['review'] = df['review'].fillna('').astype(str)
min_length = min(len(X_text_test), len(X_vader_test), len(X_transformers_test), len(X_cnn_lstm_test), len(X_svm_test))

texts = X_text_test[:min_length].reset_index(drop=True)
vader_texts = X_vader_test[:min_length].reset_index(drop=True)
transformers_texts = X_transformers_test[:min_length].reset_index(drop=True)
cnn_lstm_texts = X_cnn_lstm_test[:min_length].reset_index(drop=True)
svm_texts = X_svm_test[:min_length].reset_index(drop=True)

results_list = []

# Iterate over your dataset
for i, text in enumerate(texts):
    comparison = hybrid_model(text, vader_texts[i], transformers_texts[i], cnn_lstm_texts[i], svm_texts[i])
    results_list.append(comparison)


results_list = pd.DataFrame(results_list)
results_list.to_csv('sentiment_results.csv', index=False)

print("Results saved to sentiment_results.csv")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 223ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 462ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

Compare models sentiment

In [426]:
def accuracy_hybrid_model(text, vader_text, transformers_text, cnn_lstm_text, svm_text):
    vader_score = vader_sentiment(vader_text)
    roberta_score = roberta_sentiment(transformers_text)
    bert_score = bert_sentiment(transformers_text)
    textblob_score = textblob_sentiment(text)
    svm_score = svm_sentiment(svm_text)
    cnn_score = cnn_sentiment(cnn_lstm_text)
    lstm_score= lstm_sentiment(cnn_lstm_text)
    
    hybrid_score = np.mean([vader_score, roberta_score, bert_score, textblob_score, svm_score, cnn_score, lstm_score])
    return hybrid_score

In [427]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

y_pred_vader = [vader_sentiment(text) for text in X_vader_test]
y_pred_roberta = [roberta_sentiment(text) for text in X_transformers_test]
y_pred_bert = [bert_sentiment(text) for text in X_transformers_test]
y_pred_textblob = [textblob_sentiment(text) for text in X_text_test]
y_pred_svm = [svm_sentiment(text) for text in X_svm_test]
y_pred_cnn = [cnn_sentiment(text) for text in X_cnn_lstm_test]
y_pred_lstm = [lstm_sentiment(text) for text in X_cnn_lstm_test]
y_pred_hybrid = [accuracy_hybrid_model(text, vader_text, transformers_text, cnn_lstm_text, svm_text) 
                 for text, vader_text, transformers_text, cnn_lstm_text, svm_text 
                 in zip(X_text_test, X_vader_test, X_transformers_test, X_cnn_lstm_test, X_svm_test)]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31

In [428]:
print("VADER Model:")
print("Accuracy:", accuracy_score(y_test, [1 if score > 0 else 0 for score in y_pred_vader]))
print("Precision (macro):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_vader], average='macro', zero_division=0))
print("Precision (micro):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_vader], average='micro', zero_division=0))
print("Precision (weighted):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_vader], average='weighted', zero_division=0))
print("Recall:", recall_score(y_test, [1 if score > 0 else 0 for score in y_pred_vader], average='macro'))
print("F1-measure:", f1_score(y_test, [1 if score > 0 else 0 for score in y_pred_vader], average='macro'))

print("RoBERTa Model:")
print("Accuracy:", accuracy_score(y_test, [1 if score > 0 else 0 for score in y_pred_roberta]))
print("Precision (macro):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_roberta], average='macro', zero_division=0))
print("Precision (micro):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_roberta], average='micro', zero_division=0))
print("Precision (weighted):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_roberta], average='weighted', zero_division=0))
print("Recall:", recall_score(y_test, [1 if score > 0 else 0 for score in y_pred_roberta], average='macro'))
print("F1-measure:", f1_score(y_test, [1 if score > 0 else 0 for score in y_pred_roberta], average='macro'))

print("BERT Model:")
print("Accuracy:", accuracy_score(y_test, [1 if score > 0 else 0 for score in y_pred_bert]))
print("Precision (macro):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_bert], average='macro', zero_division=0))
print("Precision (micro):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_bert], average='micro', zero_division=0))
print("Precision (weighted):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_bert], average='weighted', zero_division=0))
print("Recall:", recall_score(y_test, [1 if score > 0 else 0 for score in y_pred_bert], average='macro'))
print("F1-measure:", f1_score(y_test, [1 if score > 0 else 0 for score in y_pred_bert], average='macro'))

print("TextBlob Model:")
print("Accuracy:", accuracy_score(y_test, [1 if score > 0 else 0 for score in y_pred_textblob]))
print("Precision (macro):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_textblob], average='macro', zero_division=0))
print("Precision (micro):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_textblob], average='micro', zero_division=0))
print("Precision (weighted):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_textblob], average='weighted', zero_division=0))
print("Recall:", recall_score(y_test, [1 if score > 0 else 0 for score in y_pred_textblob], average='macro'))
print("F1-measure:", f1_score(y_test, [1 if score > 0 else 0 for score in y_pred_textblob], average='macro'))

print("SVM Model:")
print("Accuracy:", accuracy_score(y_test, [1 if score > 0 else 0 for score in y_pred_svm]))
print("Precision (macro):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_svm], average='macro', zero_division=0))
print("Precision (micro):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_svm], average='micro', zero_division=0))
print("Precision (weighted):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_svm], average='weighted', zero_division=0))
print("Recall:", recall_score(y_test, [1 if score > 0 else 0 for score in y_pred_svm], average='macro'))
print("F1-measure:", f1_score(y_test, [1 if score > 0 else 0 for score in y_pred_svm], average='macro'))

print("CNN Model:")
print("Accuracy:", accuracy_score(y_test, [1 if score > 0 else 0 for score in y_pred_cnn]))
print("Precision (macro):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_cnn], average='macro', zero_division=0))
print("Precision (micro):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_cnn], average='micro', zero_division=0))
print("Precision (weighted):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_cnn], average='weighted', zero_division=0))
print("Recall:", recall_score(y_test, [1 if score > 0 else 0 for score in y_pred_cnn], average='macro'))
print("F1-measure:", f1_score(y_test, [1 if score > 0 else 0 for score in y_pred_cnn], average='macro'))

print("LSTM Model:")
print("Accuracy:", accuracy_score(y_test, [1 if score > 0 else 0 for score in y_pred_lstm]))
print("Precision (macro):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_lstm], average='macro', zero_division=0))
print("Precision (micro):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_lstm], average='micro', zero_division=0))
print("Precision (weighted):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_lstm], average='weighted', zero_division=0))
print("Recall:", recall_score(y_test, [1 if score > 0 else 0 for score in y_pred_lstm], average='macro'))
print("F1-measure:", f1_score(y_test, [1 if score > 0 else 0 for score in y_pred_lstm], average='macro'))

print("Hybrid Model:")
print("Accuracy:", accuracy_score(y_test, [1 if score > 0 else 0 for score in y_pred_hybrid]))
print("Precision (macro):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_hybrid], average='macro', zero_division=0))
print("Precision (micro):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_hybrid], average='micro', zero_division=0))
print("Precision (weighted):", precision_score(y_test, [1 if score > 0 else 0 for score in y_pred_hybrid], average='weighted', zero_division=0))
print("Recall:", recall_score(y_test, [1 if score > 0 else 0 for score in y_pred_hybrid], average='macro'))
print("F1-measure:", f1_score(y_test, [1 if score > 0 else 0 for score in y_pred_hybrid], average='macro'))

VADER Model:
Accuracy: 0.67
Precision (macro): 0.33187134502923976
Precision (micro): 0.67
Precision (weighted): 0.633421052631579
Recall: 0.3948948948948949
F1-measure: 0.35407407407407404
RoBERTa Model:
Accuracy: 0.73
Precision (macro): 0.36628491058870805
Precision (micro): 0.73
Precision (weighted): 0.6655334538878843
Recall: 0.4451951951951952
F1-measure: 0.3973063973063973
BERT Model:
Accuracy: 0.67
Precision (macro): 0.3205345379258423
Precision (micro): 0.67
Precision (weighted): 0.6307171089779786
Recall: 0.37162162162162166
F1-measure: 0.3397035635446231
TextBlob Model:
Accuracy: 0.7
Precision (macro): 0.32941176470588235
Precision (micro): 0.7
Precision (weighted): 0.6072941176470588
Recall: 0.3851351351351351
F1-measure: 0.3549965059399021
SVM Model:
Accuracy: 0.76
Precision (macro): 0.3543743078626799
Precision (micro): 0.76
Precision (weighted): 0.6538538205980067
Recall: 0.4121621621621621
F1-measure: 0.38108974358974357
CNN Model:
Accuracy: 0.74
Precision (macro): 0.246