In [30]:
#utilities
import re
import numpy as np
import pandas as pd

#nltk
from nltk.stem import WordNetLemmatizer

#SpellCorrection
from spellchecker import SpellChecker

import string
import emoji

In [31]:
import chardet
DATASET_COLUMNS = ['date', 'username', 'text', 'polarity', 'emotion']

#Detect file encoding using chardet
with open('Emcrypt-dataset.csv', 'rb') as f:
    result = chardet.detect(f.read())

# Print the detected encoding
print("Detected encoding:", result['encoding'])

# Read the file using the detected encoding
df = pd.read_csv('Emcrypt-dataset.csv', encoding=result['encoding'], names=DATASET_COLUMNS)
df.sample(10)

Detected encoding: UTF-8-SIG


Unnamed: 0,date,username,text,polarity,emotion
530,"2:39 PM ¬∑ Oct 25, 2023",@rezalovenft,My trust in the crypto market is completely sh...,0,sad
167,"6:34 AM ¬∑ Oct 24, 2023",@Jay_Pee_JP,$KAS Big Move will Happen Soon üîúthe entire #Cr...,1,anticipation
421,20/28/2023,@minepicoins,I made a bad investment and lost a lot of mone...,0,sad
387,20/28/2023,@SanjoyC36294055,I'm so angry at the whales who manipulate the...,0,angry
380,20/29/2023,@CryptoAim82,I'm so scared of getting hacked. ü•∂ü•∂#crypto #c...,0,fear
121,"22:38 PM ¬∑ Oct 24, 2023",@XYOPepe,Don‚Äôt worry $btc #crypto fam this is normal du...,1,anticipation
242,20/29/2023,@CryptoBullGod,I pride myself on being able to communicate th...,1,surprise
40,"4:43 AM ¬∑ Oct 24, 2023",@Eldorado_krypto,üíº Marinade remains the largest DeFi protocol o...,1,happy
472,"20:05 AM ¬∑ Oct 25, 2023",@cryptounitrade,Wish I never got into crypto. The stress is to...,0,fear
77,"8:05 PM ¬∑ Oct 23, 2023",@Eldorado_krypto,üíº A broader issue emerges: the challenges of h...,1,happy


In [32]:
#Data preprocessing
data=df[['text','polarity', 'emotion']]

In [33]:
data['polarity'].unique()

array([0, 1])

In [34]:
data_pos = data[data['polarity'] == 1]
data_neg = data[data['polarity'] == 0]

In [35]:
dataset = pd.concat([data_pos, data_neg])

In [36]:
def cleaning_numbers(data):
    return re.sub('[0-9]+', '', data)
dataset['text'] = dataset['text'].apply(lambda x: cleaning_numbers(x))
dataset['text'].head()

4     How could AI foresee this? #BTC\n\nüìäüìàInside th...
5     Bitcoin breaks $K for the first time in ! üöÄ\nT...
8     üëÄ Total #cryptocurrency market cap testing thi...
9     Breaking: $pndc breaks  and now less than x aw...
10    Amazing news! The future of cryptocurrency is ...
Name: text, dtype: object

In [38]:
emoticons_to_keep = [
    'üåà', 'üåô', 'üåö', 'üåû', 'üåü', 'üå∑', 'üå∏', 'üåπ', 'üå∫', 'üçÄ', 'üçï', 'üçª', 'üéÄ',
    'üéà', 'üéâ', 'üé§', 'üé•', 'üéß', 'üéµ', 'üé∂', 'üëÖ', 'üëá', 'üëà', 'üëâ', 'üëã', 'üëå',
    'üëç', 'üëè', 'üëë', 'üíÄ', 'üíÅ', 'üíÉ', 'üíã', 'üíê', 'üíì', 'üíï', 'üíñ', 'üíó', 'üíò',
    'üíô', 'üíö', 'üíõ', 'üíú', 'üíû', 'üí§', 'üí•', 'üí¶', 'üí™', 'üí´', 'üíØ', 'üì∑', 'üî•',
    'üòÄ', 'üòÅ', 'üòÉ', 'üòÑ', 'üòÖ', 'üòÜ', 'üòá', 'üòà', 'üòâ', 'üòä', 'üòã', 'üòå', 'üòç',
    'üòé', 'üòè', 'üò∫', 'üòª', 'üòΩ', 'üôè', '‚òÄ', '‚ò∫', '‚ô•', '‚úÖ', '‚úà', '‚úä', '‚úã',
    '‚úå', '‚úî', '‚ú®', '‚ùÑ', '‚ù§', '‚≠ê', 'üò¢', 'üòû', 'üòü', 'üò†', 'üò°', 'üòî', 'üòï',
    'üòñ', 'üò®', 'üò©', 'üò™', 'üò´', 'üò∞', 'üò±', 'üò≥', 'üò∂', 'üò∑', 'üëä', 'üëé', '‚ùå',
    'üò≤', 'üòØ', 'üòÆ', 'üòµ', 'üôä', 'üôâ', 'üôà', 'üí≠', '‚ùó', '‚ö°', 'üéä', 'üôÅ', 'üíî',
    'üò§', 'üî™', 'üåï', 'üöÄ', 'üìâ', 'ü§£', 'üí∏'
]

def clean_tweet(text):
    # Remove URLs
    text = re.sub(r'https?://\S+|www\.\S+', '', text)

    # Remove hashtags and mentions
    text = re.sub(r'@\w+|#\w+', '', text)

    # Remove special characters except for emoticons
    text = re.sub(r'[^\w\s.!?{}]+'.format(''.join(emoticons_to_keep)), '', text)

    # Remove extra whitespace
    text = ' '.join(text.split())

    return text

# Apply the modified cleaning function to the 'text' column in your dataset
dataset['text'] = dataset['text'].apply(clean_tweet)

# Display the 'text' column in the entire dataset
print(dataset['text'])

4      How could AI foresee this? Inside the Brain of...
5      Bitcoin breaks K for the first time in ! üöÄ Thi...
8      Total market cap testing this long term resist...
9      Breaking pndc breaks and now less than x away ...
10     Amazing news! The future of cryptocurrency is ...
                             ...                        
595    Angry and frustrated with the crypto markets e...
596    The constant dread of losing more in crypto is...
597    Cryptos crash has left me in a state of deep s...
598    Every crypto plummet leaves me more furious th...
599    The instability of crypto markets is a source ...
Name: text, Length: 600, dtype: object


In [39]:
from spellchecker import SpellChecker

# Initialize SpellChecker only once to avoid re-creation for each call
spell = SpellChecker()

# List of emoticons to keep
emoticons_to_keep = [
    'üåà', 'üåô', 'üåö', 'üåû', 'üåü', 'üå∑', 'üå∏', 'üåπ', 'üå∫', 'üçÄ', 'üçï', 'üçª', 'üéÄ',
    'üéà', 'üéâ', 'üé§', 'üé•', 'üéß', 'üéµ', 'üé∂', 'üëÖ', 'üëá', 'üëà', 'üëâ', 'üëã', 'üëå',
    'üëç', 'üëè', 'üëë', 'üíÄ', 'üíÅ', 'üíÉ', 'üíã', 'üíê', 'üíì', 'üíï', 'üíñ', 'üíó', 'üíò',
    'üíô', 'üíö', 'üíõ', 'üíú', 'üíû', 'üí§', 'üí•', 'üí¶', 'üí™', 'üí´', 'üíØ', 'üì∑', 'üî•',
    'üòÄ', 'üòÅ', 'üòÉ', 'üòÑ', 'üòÖ', 'üòÜ', 'üòá', 'üòà', 'üòâ', 'üòä', 'üòã', 'üòå', 'üòç',
    'üòé', 'üòè', 'üò∫', 'üòª', 'üòΩ', 'üôè', '‚òÄ', '‚ò∫', '‚ô•', '‚úÖ', '‚úà', '‚úä', '‚úã',
    '‚úå', '‚úî', '‚ú®', '‚ùÑ', '‚ù§', '‚≠ê', 'üò¢', 'üòû', 'üòü', 'üò†', 'üò°', 'üòî', 'üòï',
    'üòñ', 'üò®', 'üò©', 'üò™', 'üò´', 'üò∞', 'üò±', 'üò≥', 'üò∂', 'üò∑', 'üëä', 'üëé', '‚ùå',
    'üò≤', 'üòØ', 'üòÆ', 'üòµ', 'üôä', 'üôâ', 'üôà', 'üí≠', '‚ùó', '‚ö°', 'üéä', 'üôÅ', 'üíî',
    'üò§', 'üî™', 'üåï', 'üöÄ', 'üìâ', 'ü§£', 'üí∏'
]

# Function for spell correction
def spell_correction(text):
    words = text.split()
    corrected_words = []
    for word in words:
        # Check if the word is an emoticon, if so, skip spell checking
        if word not in emoticons_to_keep:
            if word in spell.unknown([word]):
                corrected_word = spell.correction(word)
                corrected_words.append(corrected_word if corrected_word else word)
            else:
                corrected_words.append(word)
        else:
            corrected_words.append(word)
    return ' '.join(corrected_words)

# Apply spell correction to the entire 'text' column
dataset['text'] = dataset['text'].apply(spell_correction)

# Display the entire dataset
print(dataset)

                                                  text  polarity   emotion
4    How could AI foresee this Inside the Brain of ...         1  surprise
5    Bitcoin breaks K for the first time in ! üöÄ Thi...         1  surprise
8    Total market cap testing this long term resist...         1  surprise
9    Breaking and breaks and now less than i away f...         1  surprise
10   Amazing news The future of cryptocurrency is l...         1  surprise
..                                                 ...       ...       ...
595  Angry and frustrated with the crypto markets e...         0     angry
596  The constant dread of losing more in crypto is...         0      fear
597  Cryptos crash has left me in a state of deep s...         0       sad
598  Every crypto plummet leaves me more furious th...         0     angry
599  The instability of crypto markets is a source ...         0      fear

[600 rows x 3 columns]


In [None]:
#Define the emoticon dictionary outside the function for a wider scope
emoticon_dict = {
    "üåà": "Rainbow",
    "üåô": "Crescent Moon",
    "üåö": "New Moon Face",
    "üåû": "Sun with Face",
    "üåü": "Glowing Star",
    "üå∑": "Tulip",
    "üå∏": "Cherry Blossom",
    "üåπ": "Rose",
    "üå∫": "Hibiscus",
    "üçÄ": "Four Leaf Clover",
    "üçï": "Pizza",
    "üçª": "Clinking Beer Mugs",
    "üéÄ": "Ribbon",
    "üéà": "Balloon",
    "üéâ": "Party Popper",
    "üé§": "Microphone",
    "üé•": "Movie Camera",
    "üéß": "Headphone",
    "üéµ": "Musical Note",
    "üé∂": "Musical Notes",
    "üëÄ": "Eyes",
    "üëÖ": "Tongue",
    "üëá": "Backhand Index Pointing Down",
    "üëà": "Backhand Index Pointing Left",
    "üëâ": "Backhand Index Pointing Right",
    "üëã": "Waving Hand",
    "üëå": "OK Hand",
    "üëç": "Thumbs Up",
    "üëè": "Clapping Hands",
    "üëë": "Crown",
    "üíÄ": "Skull",
    "üíÅ": "Person Tipping Hand",
    "üíÉ": "Woman Dancing",
    "üíã": "Kiss Mark",
    "üíé": "Gem Stone",
    "üíê": "Bouquet",
    "üíì": "Beating Heart",
    "üíï": "Two Hearts",
    "üíñ": "Sparkling Heart",
    "üíó": "Growing Heart",
    "üíò": "Heart with Arrow",
    "üíô": "Blue Heart",
    "üíö": "Green Heart",
    "üíõ": "Yellow Heart",
    "üíú": "Purple Heart",
    "üíû": "Revolving Hearts",
    "üí§": "Zzz",
    "üí•": "Collision",
    "üí¶": "Sweat Droplets",
    "üí™": "Flexed Biceps",
    "üí´": "Dizzy",
    "üíØ": "Hundred Points",
    "üí∞": "Money Bag",
    "üì∑": "Camera",
    "üî•": "Fire",
    "üòÄ": "Grinning Face",
    "üòÅ": "Beaming Face with Smiling Eyes",
    "üòÇ": "Face with Tears of Joy",
    "üòÉ": "Grinning Face with Big Eyes",
    "üòÑ": "Grinning Face with Smiling Eyes",
    "üòÖ": "Grinning Face with Sweat",
    "üòÜ": "Grinning Squinting Face",
    "üòá": "Smiling Face with Halo",
    "üòà": "Smiling Face with Horns",
    "üòâ": "Winking Face",
    "üòä": "Smiling Face with Smiling Eyes",
    "üòã": "Face Savoring Food",
    "üòå": "Relieved Face",
    "üòç": "Smiling Face with Heart-Eyes",
    "üòé": "Smiling Face with Sunglasses",
    "üòè": "Smirking Face",
    "üò∫": "Smiling Cat with Smiling Eyes",
    "üòª": "Smiling Cat with Heart-Eyes",
    "üòΩ": "Kissing Cat with Closed Eyes",
    "üôÄ": "Weary Cat",
    "üôè": "Folded Hands",
    "‚òÄ": "Sun",
    "‚ò∫": "Smiling Face",
    "‚ô•": "Heart Suit",
    "‚úÖ": "Check Mark Button",
    "‚úà": "Airplane",
    "‚úä": "Raised Fist",
    "‚úã": "Raised Hand",
    "‚úå": "Victory Hand",
    "‚úî": "Check Mark",
    "‚ú®": "Sparkles",
    "‚ùÑ": "Snowflake",
    "‚ù§": "Red Heart",
    "‚≠ê": "Star",
    "üò¢": "Crying Face",
    "üò≠": "Loudly Crying Face",
    "üòû": "Disappointed Face",
    "üòü": "Worried Face",
    "üò†": "Angry Face",
    "üò°": "Pouting Face",
    "üòî": "Pensive Face",
    "üòï": "Confused Face",
    "üòñ": "Confounded Face",
    "üò®": "Fearful Face",
    "üò©": "Weary Face",
    "üò™": "Sleepy Face",
    "üò´": "Tired Face",
    "üò∞": "Anxious Face with Sweat",
    "üò±": "Face Screaming in Fear",
    "üò≥": "Flushed Face",
    "üò∂": "Face Without Mouth",
    "üò∑": "Face with Medical Mask",
    "üëä": "Oncoming Fist",
    "üëé": "Thumbs Down",
    "‚ùå": "Cross Mark",
    "üò≤": "Astonished Face",
    "üòØ": "Hushed Face",
    "üòÆ": "Face with Open Mouth",
    "üòµ": "Dizzy Face",
    "üôä": "Speak-No-Evil Monkey",
    "üôâ": "Hear-No-Evil Monkey",
    "üôà": "See-No-Evil Monkey",
    "üí≠": "Thought Balloon",
    "‚ùó": "Exclamation Mark",
    "‚ö°": "High Voltage",
    "üéä": "Confetti Ball",
    "üôÅ": "Slightly frowning face",
    "üíî": "Broken Heart",
    "üò§": "Face with Steam from Nose",
    "üî™": "Hocho",
    "üåï": "Full Moon",
    "üöÄ": "Rocket",
    "üìâ": "Down Trend",
    "ü§£": "Rolling on the Floor Laughing",
    "üí∏": "Money with Wings"
}

# Emoticon to word conversion function
def convert_emoticons_to_words(text):
    changed_emoticons = 0  # Variable to count the number of changed emoticons
    for emoticon, word in emoticon_dict.items():
        while emoticon in text:
            text = text.replace(emoticon, word + " ", 1)
            changed_emoticons += 1
    return text, changed_emoticons

# Apply the function and count emoticons for each row
def apply_conversion(text):
    converted_text, count = convert_emoticons_to_words(text)
    return pd.Series([converted_text, count], index=['converted_text', 'emoticons_count'])

conversion_results = dataset['text'].apply(apply_conversion)
dataset['converted_text'] = conversion_results['converted_text']
dataset['emoticons_count'] = conversion_results['emoticons_count']
print("Emoticons converted to words in 'converted_text' column.")
print(dataset[['converted_text', 'emoticons_count']].head())

In [40]:
stopwordlist = ['a', 'about', 'above', 'after', 'again', 'ain', 'all', 'am', 'an',
             'and','any','are', 'as', 'at', 'be', 'because', 'been', 'before',
             'being', 'below', 'between','both', 'by', 'can', 'd', 'did', 'do',
             'does', 'doing', 'down', 'during', 'each','few', 'for', 'from',
             'further', 'had', 'has', 'have', 'having', 'he', 'her', 'here',
             'hers', 'herself', 'him', 'himself', 'his', 'how', 'i', 'if', 'in',
             'into','is', 'it', 'its', 'itself', 'just', 'll', 'm', 'ma',
             'me', 'more', 'most','my', 'myself', 'now', 'o', 'of', 'on', 'once',
             'only', 'or', 'other', 'our', 'ours','ourselves', 'out', 'own', 're','s', 'same', 'she', "shes", 'should', "shouldve",'so', 'some', 'such',
             't', 'than', 'that', "thatll", 'the', 'their', 'theirs', 'them',
             'themselves', 'then', 'there', 'these', 'they', 'this', 'those',
             'through', 'to', 'too','under', 'until', 'up', 've', 'very', 'was',
             'we', 'were', 'what', 'when', 'where','which','while', 'who', 'whom',
             'why', 'will', 'with', 'won', 'y', 'you', "youd","youll", "youre",
             "youve", 'your', 'yours', 'yourself', 'yourselves']

In [42]:
# Stopwords removal applied separately after the option has been chosen and processed
STOPWORDS = set(stopwordlist)
def cleaning_stopwords(text):
    return " ".join([word for word in str(text).split() if word not in STOPWORDS])

# Apply the stopwords cleaning after the loop, once the 'text' column has been updated accordingly
dataset['text'] = dataset['text'].apply(cleaning_stopwords)
print("Stopwords removed from 'text' column.")
print(dataset['text'].head())


Stopwords removed from 'text' column.
4     How could AI foresee Inside Brain CallBotüí™üöÄ Ca...
5     Bitcoin breaks K first time ! üöÄ This bullish s...
8     Total market cap testing long term resistance ...
9     Breaking breaks less away time high Big news !...
10    Amazing news The future cryptocurrency looking...
Name: text, dtype: object


In [44]:
# Function to clean repeating words
def cleaning_repeating_words(text):
    # This regex pattern targets whole words that are repeated
    return re.sub(r'\b(\w+)( \1\b)+', r'\1', text)

# Assuming 'dataset' is a pandas DataFrame and 'text' is a column in it
# Apply the cleaning function for repeating words to each row in the 'text' column
dataset['text'] = dataset['text'].apply(cleaning_repeating_words)
print("Repeating words cleaned from 'text' column.")
print(dataset['text'].head())

Repeating words cleaned from 'text' column.
4     How could AI foresee Inside Brain CallBotüí™üöÄ Ca...
5     Bitcoin breaks K first time ! üöÄ This bullish s...
8     Total market cap testing long term resistance ...
9     Breaking breaks less away time high Big news !...
10    Amazing news The future cryptocurrency looking...
Name: text, dtype: object


In [45]:
dataset['text']=dataset['text'].str.lower()
dataset['text'].head()

4     how could ai foresee inside brain callbotüí™üöÄ ca...
5     bitcoin breaks k first time ! üöÄ this bullish s...
8     total market cap testing long term resistance ...
9     breaking breaks less away time high big news !...
10    amazing news the future cryptocurrency looking...
Name: text, dtype: object

In [46]:
import pandas as pd

# Assuming 'dataset' is your DataFrame

# Replace 'output_file.xlsx' with the desired file name
output_file = 'Feature1_file.xlsx'

# Save the dataset to an Excel file
dataset.to_excel(output_file, index=False)

print(f'Dataset saved to {output_file}')

Dataset saved to Feature1_file.xlsx


In [25]:
from nltk.tokenize import RegexpTokenizer

# The pattern matches word characters (\w) and punctuation marks ([^\w\s])
tokenizer = RegexpTokenizer(r'\w+|[^\w\s]')

# Applying the modified tokenizer to the dataset
dataset['text'] = dataset['text'].apply(lambda x: ' '.join(x) if isinstance(x, list) else x)
dataset['text'] = dataset['text'].apply(tokenizer.tokenize)
dataset['text'].head()

4     [how, could, ai, foresee, inside, brain, callb...
5     [bitcoin, breaks, k, first, time, !, rocket, t...
8     [total, market, cap, testing, long, term, resi...
9     [breaking, breaks, less, away, time, high, big...
10    [amazing, news, the, future, cryptocurrency, l...
Name: text, dtype: object

In [26]:
import nltk
st = nltk.PorterStemmer()
def stemming_on_text(data):
    text = [st.stem(word) for word in data]
    return data
dataset['text']= dataset['text'].apply(lambda x: stemming_on_text(x))
dataset['text'].head()

4     [how, could, ai, foresee, inside, brain, callb...
5     [bitcoin, breaks, k, first, time, !, rocket, t...
8     [total, market, cap, testing, long, term, resi...
9     [breaking, breaks, less, away, time, high, big...
10    [amazing, news, the, future, cryptocurrency, l...
Name: text, dtype: object

In [27]:
lm = nltk.WordNetLemmatizer()
def lemmatizer_on_text(data):
    text = [lm.lemmatize(word) for word in data]
    return data
dataset['text'] = dataset['text'].apply(lambda x: lemmatizer_on_text(x))
dataset['text'].head()

4     [how, could, ai, foresee, inside, brain, callb...
5     [bitcoin, breaks, k, first, time, !, rocket, t...
8     [total, market, cap, testing, long, term, resi...
9     [breaking, breaks, less, away, time, high, big...
10    [amazing, news, the, future, cryptocurrency, l...
Name: text, dtype: object

In [29]:
import pandas as pd

# Assuming 'dataset' is your DataFrame

# Replace 'output_file.xlsx' with the desired file name
output_file = 'Feature2_file.xlsx'

# Save the dataset to an Excel file
dataset.to_excel(output_file, index=False)

print(f'Dataset saved to {output_file}')

Dataset saved to Feature2_file.xlsx


In [19]:
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout
from scikeras.wrappers import KerasClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report
import joblib

# Assuming `data` is your dataset with 'text', 'polarity', and 'emotion' columns
texts = data['text']
polarity_labels = data['polarity']
emotion_labels = data['emotion']

# Splitting the dataset into training, testing, and evaluation sets
X_train, X_temp, y_polarity_train, y_polarity_temp = train_test_split(
    texts, polarity_labels, test_size=0.4, random_state=42)
_, _, y_emotion_train, y_emotion_temp = train_test_split(
    texts, emotion_labels, test_size=0.4, random_state=42)

# Further splitting the temporary sets into test and evaluation sets
X_test, X_eval, y_polarity_test, y_polarity_eval = train_test_split(
    X_temp, y_polarity_temp, test_size=0.25, random_state=42)  # 0.25 * 0.4 = 0.1
_, _, y_emotion_test, y_emotion_eval = train_test_split(
    X_temp, y_emotion_temp, test_size=0.25, random_state=42)



# Tokenize words
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
X_eval_seq = tokenizer.texts_to_sequences(X_eval)

# Pad sequences to ensure uniform input size
max_seq_length = max([len(x) for x in X_train_seq])  # Get the length of the longest sequence
X_train_pad = pad_sequences(X_train_seq, maxlen=max_seq_length)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_seq_length)
X_eval_pad = pad_sequences(X_eval_seq, maxlen=max_seq_length)

# LSTM model for polarity classification
def create_lstm_model():
    model = Sequential()
    model.add(Embedding(input_dim=5000, output_dim=64, input_length=max_seq_length))
    model.add(LSTM(64, return_sequences=True))
    model.add(Dropout(0.5))
    model.add(LSTM(32))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))  # Binary classification for polarity

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

lstm_model_polarity = KerasClassifier(build_fn=create_lstm_model, epochs=10, batch_size=32, verbose=1)

# SVM model for emotion classification
svm_model_emotion = SVC(probability=True, kernel='linear', verbose=1)

# Train the models
lstm_model_polarity.fit(X_train_pad, y_polarity_train)
svm_model_emotion.fit(X_train_pad, y_emotion_train)

# Save the models
joblib.dump(lstm_model_polarity, "lstm_model_polarity.pkl")
joblib.dump(svm_model_emotion, "svm_model_emotion.pkl")


2023-11-15 03:11:45.006952: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  X, y = self._initialize(X, y)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[LibSVM]*
optimization finished, #iter = 16
obj = -0.000000, rho = -1.971739
nSV = 6, nBSV = 0
Total nSV = 6
*
optimization finished, #iter = 17
obj = -0.000000, rho = -2.029935
nSV = 7, nBSV = 0
Total nSV = 7
*
optimization finished, #iter = 18
obj = -0.000000, rho = -1.923816
nSV = 6, nBSV = 0
Total nSV = 6
*
optimization finished, #iter = 18
obj = -0.000000, rho = -2.027702
nSV = 6, nBSV = 0
Total nSV = 6
*
optimization finished, #iter = 17
obj = -0.000000, rho = 2.029902
nSV = 7, nBSV = 0
*
optimization finished, #iter = 48
obj = -0.000000, rho = -1.482000
nSV = 13, nBSV = 0
Total nSV = 13
*
optimization finished, #iter = 38
obj = -0.000000, rho = -1.500514
nSV = 11, nBSV = 0
Total nSV = 11
*
optimization finished, #iter = 50
obj = -0.000000, rho = -1.537198
nSV = 14, nBSV = 0
Total nSV = 14
*
optimization finished, #iter = 27
obj = -0.000000, rho = -1.529415
nSV = 11, nBS

In [None]:
# Evaluate the models
y_pred_polarity = lstm_model_polarity.predict(X_test_pad)
y_pred_emotion = svm_model_emotion.predict(X_test_pad)

print("Polarity Classification Report:")
print(classification_report(y_polarity_test, y_pred_polarity))

print("Emotion Classification Report:")
print(classification_report(y_emotion_test, y_pred_emotion))

# Evaluation on the evaluation set
y_pred_eval_polarity = lstm_model_polarity.predict(X_eval_pad)
y_pred_eval_emotion = svm_model_emotion.predict(X_eval_pad)

print("Polarity Evaluation Set Classification Report:")
print(classification_report(y_polarity_eval, y_pred_eval_polarity))

print("Emotion Evaluation Set Classification Report:")
print(classification_report(y_emotion_eval, y_pred_eval_emotion))

NameError: name 'lstm_model_polarity' is not defined