In [135]:
# utilities
import re
import numpy as np
import pandas as pd

# nltk
from nltk.stem import WordNetLemmatizer

#SpellCorrection
from spellchecker import SpellChecker

import string
import emoji
import chardet

In [136]:
DATASET_COLUMNS = ['date', 'username', 'text', 'polarity', 'emotion']

#Detect file encoding using chardet
with open('data.csv', 'rb') as f:
    result = chardet.detect(f.read())

# Print the detected encoding
print("Detected encoding:", result['encoding'])

# Read the file using the detected encoding
df = pd.read_csv('data.csv', encoding=result['encoding'], names=DATASET_COLUMNS)
df.sample(5)

Detected encoding: UTF-8-SIG


Unnamed: 0,date,username,text,polarity,emotion
121,"9:40 PM · Oct 24, 2023",@CryptoSpac3,Get ready for the next #bullrun in the #crypto...,2,anticipation
424,10/29/23,@azami_iqbal2007,🚀Discover #SaitamaToken: The Future of #Crypto...,1,happy
444,10/29/23,@neilstucky1,"For those in $algo governance, enjoy the massi...",1,happy
54,"12:22 AM · Oct 26, 2023",@0x0Checkmate,🚀 #ShibaInu continues its upward trajectory as...,2,happy
381,10/28/23,@univaultx,🚀Exciting news! UNIVAULT is teaming up with @M...,2,anticipation


In [137]:
#Data preprocessing
data=df[['text','polarity', 'emotion']]

In [138]:
data['polarity'].unique()

array([2, 1])

In [139]:
data_pos = data[data['polarity'] == 2]
data_neu = data[data['polarity'] == 1]
data_neg = data[data['polarity'] == 0]

In [140]:
data_pos = data_pos.iloc[:int(200)]
data_neu = data_neu.iloc[:int(200)]
data_neg = data_neg.iloc[:int(200)] 

In [141]:
dataset = pd.concat([data_pos, data_neu, data_neg])

In [142]:
def cleaning_numbers(data):
    return re.sub('[0-9]+', '', data)
dataset['text'] = dataset['text'].apply(lambda x: cleaning_numbers(x))
dataset['text'].tail()

582     Which #crypto project has strong community? 💪🏼🔥🚀
584    New Zealand, Rapper Sesh and DogeCoin Milliona...
585    The founder of the bankrupt cryptocurrency exc...
586    Unlock the Future with .mmit Domains! Join ove...
595    If you sleep now, you will have a dream but if...
Name: text, dtype: object

In [143]:
emoticons_to_keep = [
    '💰', '📈', '🤣', '🎊', '😂', '😭', '🙁', '😞', '💔', '😢', '😮', '😵', '🙀',
    '😱', '❗', '😠', '😡', '😤', '👎', '🔪', '🌕', '🚀', '💎', '👀', '💭', '📉',
    '😨', '😩', '😰', '💸'
]

def clean_tweet(text):
    # Remove URLs
    text = re.sub(r'https?://\S+|www\.\S+', '', text)

    # Remove hashtags and mentions
    text = re.sub(r'@\w+|#\w+', '', text)

    # Remove special characters except for emoticons
    text = re.sub(r'[^\w\s.!?{}]+'.format(''.join(emoticons_to_keep)), '', text)

    # Remove extra whitespace
    text = ' '.join(text.split())

    return text

# Apply the modified cleaning function to the 'text' column in your dataset
dataset['text'] = dataset['text'].apply(clean_tweet)

# Display the 'text' column in the entire dataset
print(dataset['text'])

0      BTC ON GLP RESISTANCE FOR NOW PLAY SAFE IF U R...
1            HAY bullflag breakout👀 Lets fill that wick🚀
2      Did you guys see how is doing a pitch with a d...
3      GN Fam going early to bed been up since or AM ...
4      You think this week has been fun?!? 😂😂😂😂 Wait ...
                             ...                        
582                Which project has strong community? 🚀
584    New Zealand Rapper Sesh and DogeCoin Millionai...
585    The founder of the bankrupt cryptocurrency exc...
586    Unlock the Future with .mmit Domains! Join ove...
595    If you sleep now you will have a dream but if ...
Name: text, Length: 347, dtype: object


In [144]:
# Initialize SpellChecker only once to avoid re-creation for each call
spell = SpellChecker()

# Function for spell correction
def spell_correction(text):
    words = text.split()
    misspelled = spell.unknown(words)
    corrected_words = []
    for word in words:
        if word in misspelled:
            corrected_word = spell.correction(word)
            # Check if the correction is not None, otherwise use the original word
            corrected_words.append(corrected_word if corrected_word is not None else word)
        else:
            corrected_words.append(word)
    return ' '.join(corrected_words)

# Apply spell correction to the entire 'text' column
dataset['text'] = dataset['text'].apply(spell_correction)

# Display the entire dataset
print(dataset)

                                                  text  polarity       emotion
0    BTC ON GLP RESISTANCE FOR NOW PLAY SAFE IF U R...         2         happy
1            HAY bullfrog breakout Lets fill that wick         2  anticipation
2    Did you guys see how is doing a pitch with a d...         2         happy
3    GN Fam going early to bed been up since or AM ...         2         happy
4    You think this week has been fun?!? 😂😂😂😂 Wait ...         2  anticipation
..                                                 ...       ...           ...
582               Which project has strong community i         1         happy
584  New Zealand Rapper Sesh and DogeCoin Millionai...         1         happy
585  The founder of the bankrupt cryptocurrency exc...         1         happy
586  Unlock the Future with emmit Domains! Join ove...         1         happy
595  If you sleep now you will have a dream but if ...         1         happy

[347 rows x 3 columns]


In [145]:
#Define the emoticon dictionary outside the function for a wider scope
emoticon_dict = {
    ":)": "smile ",
    ":(": "sad ",
    ":D": "laugh ",
    "😊": "smiling face with smiling eyes ",
    "😃": "grinning face with big eyes ",
    "😉": "winking face ",
    "👌": "OK hand ",
    "👍": "Thumbs up ",
    "😁": "beaming face with smiling eyes ",
    "😂": "face with tears of joy ",
    "😄": "grinning face with smiling eyes ",
    "😅": "grinning face with sweat ",
    "😆": "grinning squinting face ",
    "😇": "smiling face with halo ",
    "😞": "disappointed face ",
    "😔": "pensive face ",
    "😑": "expressionless face ",
    "😒": "unamused face ",
    "😓": "downcast face with sweat ",
    "😕": "confused face ",
    "😖": "confounded face ",
    "💰": "Money Bag ",
    "📈": "Up Trend ",
    "🤣": "Rolling on the Floor Laughing ",
    "🎊": "Confetti Ball ",
    "😭": "Loudly Crying ",
    "🙁": "Slightly frowning face ",
    "💔": "Broken Heart ",
    "😢": "Crying Face ",
    "😮": "Face with Open Mouth ",
    "😵": "Dizzy Face ",
    "🙀": "Weary Cat ",
    "😱": "Face Screaming in Fear ",
    "❗": "Exclamation Mark ",
    "😠": "Angry Face ",
    "😡": "Pouting Face ",
    "😤": "Face with Steam from Nose ",
    "👎": "Thumbs Down ",
    "🔪": "Hocho ",
    "🌕": "Moon ",
    "🚀": "Rocket ",
    "💎": "Diamond ",
    "👀": "Eyes ",
    "💭": "Thought Balloon ",
    "📉": "Down Trend ",
    "😨": "Fearful Face ",
    "😩": "Weary Face ",
    "😰": "Anxious Face with Fear ",
    "💸": "Money with Wings "
}

# Emoticon to word conversion function
def convert_emoticons_to_words(text):
    changed_emoticons = 0  # Variable to count the number of changed emoticons
    for emoticon, word in emoticon_dict.items():
        while emoticon in text:
            text = text.replace(emoticon, word + " ", 1)
            changed_emoticons += 1
    return text, changed_emoticons

# Apply the function and count emoticons for each row
def apply_conversion(text):
    converted_text, count = convert_emoticons_to_words(text)
    return pd.Series([converted_text, count], index=['converted_text', 'emoticons_count'])

conversion_results = dataset['text'].apply(apply_conversion)
dataset['converted_text'] = conversion_results['converted_text']
dataset['emoticons_count'] = conversion_results['emoticons_count']
print("Emoticons converted to words in 'converted_text' column.")
print(dataset[['converted_text', 'emoticons_count']].head())

Emoticons converted to words in 'converted_text' column.
                                      converted_text  emoticons_count
0  BTC ON GLP RESISTANCE FOR NOW PLAY SAFE IF U R...                0
1          HAY bullfrog breakout Lets fill that wick                0
2  Did you guys see how is doing a pitch with a d...                0
3  GN Fam going early to bed been up since or AM ...                0
4  You think this week has been fun?!? face with ...               13


In [146]:
stopwordlist = ['a', 'about', 'above', 'after', 'again', 'ain', 'all', 'am', 'an',
             'and','any','are', 'as', 'at', 'be', 'because', 'been', 'before',
             'being', 'below', 'between','both', 'by', 'can', 'd', 'did', 'do',
             'does', 'doing', 'down', 'during', 'each','few', 'for', 'from',
             'further', 'had', 'has', 'have', 'having', 'he', 'her', 'here',
             'hers', 'herself', 'him', 'himself', 'his', 'how', 'i', 'if', 'in',
             'into','is', 'it', 'its', 'itself', 'just', 'll', 'm', 'ma',
             'me', 'more', 'most','my', 'myself', 'now', 'o', 'of', 'on', 'once',
             'only', 'or', 'other', 'our', 'ours','ourselves', 'out', 'own', 're','s', 'same', 'she', "shes", 'should', "shouldve",'so', 'some', 'such',
             't', 'than', 'that', "thatll", 'the', 'their', 'theirs', 'them',
             'themselves', 'then', 'there', 'these', 'they', 'this', 'those',
             'through', 'to', 'too','under', 'until', 'up', 've', 'very', 'was',
             'we', 'were', 'what', 'when', 'where','which','while', 'who', 'whom',
             'why', 'will', 'with', 'won', 'y', 'you', "youd","youll", "youre",
             "youve", 'your', 'yours', 'yourself', 'yourselves']

In [147]:
# Stopwords removal applied separately after the option has been chosen and processed
STOPWORDS = set(stopwordlist)
def cleaning_stopwords(text):
    return " ".join([word for word in str(text).split() if word not in STOPWORDS])

# Apply the stopwords cleaning after the loop, once the 'text' column has been updated accordingly
dataset['text'] = dataset['converted_text'].apply(cleaning_stopwords)
print("Stopwords removed from 'text' column.")
print(dataset['text'].head())

Stopwords removed from 'text' column.
0    BTC ON GLP RESISTANCE FOR NOW PLAY SAFE IF U R...
1                 HAY bullfrog breakout Lets fill wick
2    Did guys see pitch deck reaching community Tha...
3    GN Fam going early bed since AM morning nonsto...
4    You think week fun?!? face tears joy face tear...
Name: text, dtype: object


In [148]:
# Function to clean repeating words
def cleaning_repeating_words(text):
    # This regex pattern targets whole words that are repeated
    return re.sub(r'\b(\w+)( \1\b)+', r'\1', text)

# Assuming 'dataset' is a pandas DataFrame and 'text' is a column in it
# Apply the cleaning function for repeating words to each row in the 'text' column
dataset['text'] = dataset['text'].apply(cleaning_repeating_words)
print("Repeating words cleaned from 'text' column.")
print(dataset['text'].head())

Repeating words cleaned from 'text' column.
0    BTC ON GLP RESISTANCE FOR NOW PLAY SAFE IF U R...
1                 HAY bullfrog breakout Lets fill wick
2    Did guys see pitch deck reaching community Tha...
3    GN Fam going early bed since AM morning nonsto...
4    You think week fun?!? face tears joy face tear...
Name: text, dtype: object


In [149]:
dataset['text']=dataset['text'].str.lower()
dataset['text'].head()

0    btc on glp resistance for now play safe if u r...
1                 hay bullfrog breakout lets fill wick
2    did guys see pitch deck reaching community tha...
3    gn fam going early bed since am morning nonsto...
4    you think week fun?!? face tears joy face tear...
Name: text, dtype: object

In [150]:
from nltk.tokenize import RegexpTokenizer

# The pattern matches word characters (\w) and punctuation marks ([^\w\s])
tokenizer = RegexpTokenizer(r'\w+|[^\w\s]')

# Applying the modified tokenizer to the dataset
dataset['text'] = dataset['text'].apply(lambda x: ' '.join(x) if isinstance(x, list) else x)
dataset['text'] = dataset['text'].apply(tokenizer.tokenize)
dataset['text'].head()

0    [btc, on, glp, resistance, for, now, play, saf...
1          [hay, bullfrog, breakout, lets, fill, wick]
2    [did, guys, see, pitch, deck, reaching, commun...
3    [gn, fam, going, early, bed, since, am, mornin...
4    [you, think, week, fun, ?, !, ?, face, tears, ...
Name: text, dtype: object

In [151]:
import nltk
st = nltk.PorterStemmer()
def stemming_on_text(data):
    text = [st.stem(word) for word in data]
    return data
dataset['text']= dataset['text'].apply(lambda x: stemming_on_text(x))
dataset['text'].head()

0    [btc, on, glp, resistance, for, now, play, saf...
1          [hay, bullfrog, breakout, lets, fill, wick]
2    [did, guys, see, pitch, deck, reaching, commun...
3    [gn, fam, going, early, bed, since, am, mornin...
4    [you, think, week, fun, ?, !, ?, face, tears, ...
Name: text, dtype: object

In [152]:
lm = nltk.WordNetLemmatizer()
def lemmatizer_on_text(data):
    text = [lm.lemmatize(word) for word in data]
    return data
dataset['text'] = dataset['text'].apply(lambda x: lemmatizer_on_text(x))
dataset['text'].head()

0    [btc, on, glp, resistance, for, now, play, saf...
1          [hay, bullfrog, breakout, lets, fill, wick]
2    [did, guys, see, pitch, deck, reaching, commun...
3    [gn, fam, going, early, bed, since, am, mornin...
4    [you, think, week, fun, ?, !, ?, face, tears, ...
Name: text, dtype: object

In [153]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Embedding, Dropout, Bidirectional
from keras.regularizers import l2
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
import numpy as np


# Initialize the tokenizer with your dataset
tokenizer = Tokenizer()
tokenizer.fit_on_texts(dataset['text'])

# Function to create LSTM model, now with optimizer initialization inside
def create_lstm_model(input_length, num_classes):
    optimizer = Adam(learning_rate=0.0001)
    
    model = Sequential()
    model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128, input_length=input_length))
    # Adding Bidirectional LSTM and regularization
    model.add(Bidirectional(LSTM(64, return_sequences=False)))
    model.add(Dropout(0.5))
    # Added L2 regularization to the Dense layer
    model.add(Dense(num_classes, activation='softmax', kernel_regularizer=l2(0.01)))
    
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

# Prepare the dataset for training
sequences = tokenizer.texts_to_sequences(dataset['text'])
X = pad_sequences(sequences, maxlen=50)

# Convert labels to one-hot encoding
encoder_polarity = LabelEncoder()
y_polarity = to_categorical(encoder_polarity.fit_transform(dataset['polarity']))

encoder_emotion = LabelEncoder()
y_emotion = to_categorical(encoder_emotion.fit_transform(dataset['emotion']))

# Split the data into training and testing sets
X_train_polarity, X_test_polarity, y_train_polarity, y_test_polarity = train_test_split(X, y_polarity, test_size=0.2, random_state=42)
X_train_emotion, X_test_emotion, y_train_emotion, y_test_emotion = train_test_split(X, y_emotion, test_size=0.2, random_state=42)

# When creating the models, we do not pass the optimizer as a parameter anymore
lstm_model_polarity = create_lstm_model(50, y_polarity.shape[1])
lstm_model_emotion = create_lstm_model(50, y_emotion.shape[1])

# Initialize EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

# Train the LSTM models with EarlyStopping and validation data
lstm_model_polarity.fit(
    X_train_polarity, y_train_polarity, 
    epochs=10, 
    batch_size=64,  # Ensure batch size divides the number of samples evenly or set shuffle=True
    validation_data=(X_test_polarity, y_test_polarity),
    callbacks=[early_stopping]
)
lstm_model_emotion.fit(
    X_train_emotion, y_train_emotion, 
    epochs=10, 
    batch_size=64,  # Same batch size for consistency and ease of computation
    validation_data=(X_test_emotion, y_test_emotion),
    callbacks=[early_stopping]
)

# Save the trained LSTM models
lstm_model_polarity.save('lstm_polarity_model.h5')
lstm_model_emotion.save('lstm_emotion_model.h5')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(


In [154]:

def extract_features(model, data):
    # Create a model that will return the outputs of the LSTM layer
    # Adjust the layer index (-2) based on where your LSTM layer is located within the model
    intermediate_layer_model = Model(inputs=model.input, 
                                     outputs=model.layers[-2].output)
    # Get the features from the data using the intermediate model
    features = intermediate_layer_model.predict(data)
    return features

# Extract features with LSTM for SVM training
X_train_features_polarity = extract_features(lstm_model_polarity, X_train_polarity)
X_train_features_emotion = extract_features(lstm_model_emotion, X_train_emotion)

# Normalize the features
scaler_polarity = StandardScaler().fit(X_train_features_polarity)
X_train_features_polarity = scaler_polarity.transform(X_train_features_polarity)

scaler_emotion = StandardScaler().fit(X_train_features_emotion)
X_train_features_emotion = scaler_emotion.transform(X_train_features_emotion)

# Train SVM for polarity and emotion
# Consider using GridSearchCV for hyperparameter tuning here
svm_classifier_polarity = SVC(kernel='linear', probability=True)
svm_classifier_emotion = SVC(kernel='linear', probability=True)

svm_classifier_polarity.fit(X_train_features_polarity, np.argmax(y_train_polarity, axis=1))
svm_classifier_emotion.fit(X_train_features_emotion, np.argmax(y_train_emotion, axis=1))

# Evaluate SVM on test set
X_test_features_polarity = scaler_polarity.transform(extract_features(lstm_model_polarity, X_test_polarity))
X_test_features_emotion = scaler_emotion.transform(extract_features(lstm_model_emotion, X_test_emotion))

y_pred_polarity = svm_classifier_polarity.predict(X_test_features_polarity)
y_pred_emotion = svm_classifier_emotion.predict(X_test_features_emotion)

print("Polarity Classification Report:")
print(classification_report(np.argmax(y_test_polarity, axis=1), y_pred_polarity))

print("Emotion Classification Report:")
print(classification_report(np.argmax(y_test_emotion, axis=1), y_pred_emotion))

Polarity Classification Report:
              precision    recall  f1-score   support

           0       0.39      0.42      0.41        26
           1       0.64      0.61      0.63        44

    accuracy                           0.54        70
   macro avg       0.52      0.52      0.52        70
weighted avg       0.55      0.54      0.55        70

Emotion Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.44      0.35      0.39        23
           2       0.20      0.29      0.24         7
           3       0.48      0.47      0.48        34
           5       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         2

    accuracy                           0.37        70
   macro avg       0.19      0.18      0.18        70
weighted avg       0.40      0.37      0.38        70



In [155]:
# Save the trained SVM models using joblib or pickle
import joblib
joblib.dump(svm_classifier_polarity, 'svm_polarity_model.joblib')
joblib.dump(svm_classifier_emotion, 'svm_emotion_model.joblib')

['svm_emotion_model.joblib']

In [156]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Make predictions on the test set for polarity
y_pred_polarity = lstm_model_polarity.predict(X_test_polarity)
# Convert predictions from one-hot encoded to label encoded for evaluation
y_pred_polarity = np.argmax(y_pred_polarity, axis=1)
# Convert ground truth from one-hot encoded to label encoded for evaluation
y_true_polarity = np.argmax(y_test_polarity, axis=1)

# Calculate precision, recall, and F-measure for polarity
precision_polarity = precision_score(y_true_polarity, y_pred_polarity, average='weighted')
recall_polarity = recall_score(y_true_polarity, y_pred_polarity, average='weighted')
f1_score_polarity = f1_score(y_true_polarity, y_pred_polarity, average='weighted')

print(f'Polarity Precision: {precision_polarity:.4f}')
print(f'Polarity Recall: {recall_polarity:.4f}')
print(f'Polarity F1 Score: {f1_score_polarity:.4f}')

Polarity Precision: 0.3951
Polarity Recall: 0.6286
Polarity F1 Score: 0.4852


  _warn_prf(average, modifier, msg_start, len(result))


In [157]:
# Make predictions on the test set for emotion
y_pred_emotion = lstm_model_emotion.predict(X_test_emotion)
# Convert predictions from one-hot encoded to label encoded for evaluation
y_pred_emotion = np.argmax(y_pred_emotion, axis=1)
# Convert ground truth from one-hot encoded to label encoded for evaluation
y_true_emotion = np.argmax(y_test_emotion, axis=1)

# Calculate precision, recall, and F-measure for emotion
precision_emotion = precision_score(y_true_emotion, y_pred_emotion, average='weighted')
recall_emotion = recall_score(y_true_emotion, y_pred_emotion, average='weighted')
f1_score_emotion = f1_score(y_true_emotion, y_pred_emotion, average='weighted')

print(f'Emotion Precision: {precision_emotion:.4f}')
print(f'Emotion Recall: {recall_emotion:.4f}')
print(f'Emotion F1 Score: {f1_score_emotion:.4f}')

Emotion Precision: 0.2359
Emotion Recall: 0.4857
Emotion F1 Score: 0.3176


  _warn_prf(average, modifier, msg_start, len(result))


In [158]:
# Assuming `polarity_labels` is your list of original labels for the training data
# Example list of polarity labels used in your training dataset
polarity_labels = ['positive', 'negative', 'neutral']  # This should be replaced by the actual labels you have

# Initialize the LabelEncoder
encoder_polarity = LabelEncoder()

# Fit the LabelEncoder with your actual labels
encoder_polarity.fit(polarity_labels)

# Assuming encoder is a pre-defined LabelEncoder object for decoding the emotion labels

# This function should extract LSTM features
def extract_features(model, sequence):
    intermediate_layer_model = Model(inputs=model.input, outputs=model.layers[-2].output)
    return intermediate_layer_model.predict(sequence)

# Function to classify the intensity
def classify_intensity(emoticons_count, text):
    question_marks = text.count('?')
    periods = text.count('.')
    exclamation_marks = text.count('!')

    if exclamation_marks > 1 or question_marks > 1 or emoticons_count > 1:
        return 'High'
    elif periods == 1 or question_marks == 1 or emoticons_count == 1 or exclamation_marks ==1 :
        return 'Medium'
    elif question_marks == 0 and emoticons_count == 0:
        return 'Low'
    else:
        return 'Undetermined'

def tokenize_text(text):
    tokenizer = RegexpTokenizer(r'\w+|[^\w\s]')
    return ' '.join(tokenizer.tokenize(text))

#Function to perform real-time prediction and intensity classification
def real_time_prediction(text, tokenizer, lstm_model_emotion, lstm_model_polarity, svm_classifier_emotion, svm_classifier_polarity, encoder_emotion, encoder_polarity):
    # Preprocessing steps (assuming these functions are defined elsewhere in your code)


    cleaned_text = cleaning_numbers(text)
    cleaned_tweet = clean_tweet(cleaned_text)
    corrected_text = spell_correction(cleaned_tweet)
    emoticon_converted_text, emoticons_count = convert_emoticons_to_words(corrected_text)  # Ensure this function returns emoticons_count
    cleaned_stopwords = cleaning_stopwords(emoticon_converted_text)
    cleaned_repeating_words = cleaning_repeating_words(cleaned_stopwords)

    # Now tokenize the text after cleaning repeating words
    tokenized_text = tokenize_text(cleaned_repeating_words)

    # Continue with any additional preprocessing steps that work on the tokenized text
    stemmed_text = stemming_on_text(tokenized_text)
    lemmatized_text = lemmatizer_on_text(stemmed_text)

    assert isinstance(lemmatized_text, str), "Processed text must be a string"

    # Convert the processed text to a sequence
    sequence = tokenizer.texts_to_sequences([lemmatized_text])
    padded_sequence = pad_sequences(sequence, maxlen=50)


    # Predict emotion and polarity using the LSTM model
    lstm_prediction_emotion = lstm_model_emotion.predict(padded_sequence)
    lstm_features_emotion = extract_features(lstm_model_emotion, padded_sequence)
    svm_prediction_emotion = svm_classifier_emotion.predict_proba(lstm_features_emotion)

    lstm_prediction_polarity = lstm_model_polarity.predict(padded_sequence)
    lstm_features_polarity = extract_features(lstm_model_polarity, padded_sequence)
    svm_prediction_polarity = svm_classifier_polarity.predict_proba(lstm_features_polarity)

    # Decode the predicted labels
    emotion_label = encoder_emotion.inverse_transform(np.argmax(lstm_prediction_emotion, axis=1))
    polarity_label = encoder_polarity.inverse_transform(np.argmax(svm_prediction_polarity, axis=1))

    # Get probabilities for the predicted labels
    emotion_probability = np.max(lstm_prediction_emotion, axis=1)
    polarity_probability = np.max(svm_prediction_polarity, axis=1)

    # Classify the intensity
    intensity = classify_intensity(emoticons_count, text)  # Ensure `emoticons_count` is defined

    return polarity_label, emotion_label, polarity_probability, emotion_probability, intensity

#This is the real time tweets
tweet = "I'm angry"    
# Call the real-time prediction function
polarity_label, emotion_label, polarity_probability, emotion_probability, intensity = real_time_prediction(tweet, tokenizer, lstm_model_emotion, lstm_model_polarity, svm_classifier_emotion, svm_classifier_polarity, encoder_emotion, encoder_polarity)

# Print the results
print(f"Tweet: {tweet}")
print(f"Polarity Label: {polarity_label[0]}")
print(f"Emotion Label: {emotion_label[0]}")
print(f"Intensity Level: {intensity}")

Tweet: I'm angry
Polarity Label: neutral
Emotion Label: happy
Intensity Level: Low
