In [3]:
import pandas as pd

# Define the mapping of emotions to each sentiment with intensity values
emotion_intensity_map = {
    "Positive": [("Hope", 0), ("Joy", 1), ("Love", 2)],
    "Neutral": [("Calm", 0), ("Acceptance", 1), ("Peace", 2)],
    "Negative": [("Sadness", 0), ("Grief", 1), ("Anger", 2)]
}

# Function to assign intensity based on sentiment and emotion
def assign_intensity(row):
    sentiment = row['Sentiment']
    emotion_list = emotion_intensity_map.get(sentiment, [])
    for emotion, intensity in emotion_list:
        if row['Emotion'] == emotion:
            return intensity
    return None

# Load datasets
english_df = pd.read_csv('english.csv')
gujrati_df = pd.read_csv('gujrati.csv')
hindi_df = pd.read_csv('hindi.csv')
marathi_df = pd.read_csv('marathi.csv')

# Apply the function to add an Intensity column to each dataset
english_df['Intensity'] = english_df.apply(assign_intensity, axis=1)
gujrati_df['Intensity'] = gujrati_df.apply(assign_intensity, axis=1)
hindi_df['Intensity'] = hindi_df.apply(assign_intensity, axis=1)
marathi_df['Intensity'] = marathi_df.apply(assign_intensity, axis=1)

# Save the datasets with intensity
english_df.to_csv('english_with_intensity.csv', index=False)
gujrati_df.to_csv('gujrati_with_intensity.csv', index=False)
hindi_df.to_csv('hindi_with_intensity.csv', index=False)
marathi_df.to_csv('marathi_with_intensity.csv', index=False)


In [4]:
import pandas as pd
import re
import string
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')

# Custom stopwords for Hindi
hindi_stopwords = set(['है', 'में', 'की', 'से', 'यह', 'था', 'और'])  # Add more as needed

# Load the Hindi dataset
hindi_df = pd.read_csv('hindi_with_intensity.csv')

# Function to normalize text
def normalize_text(text):
    text = text.lower()  # Lowercase the text
    text = re.sub(f"[{re.escape(string.punctuation)}]", "", text)  # Remove punctuation and special characters
    text = re.sub(r'\d+', '', text)  # Remove numbers
    text = text.strip()  # Remove extra whitespace
    return text

# Function to remove stopwords for Hindi
def remove_stopwords(text):
    return ' '.join([word for word in text.split() if word not in hindi_stopwords])

# Apply text normalization and stopword removal for Hindi dataset
def preprocess_hindi(df):
    df['Sentence'] = df['Sentence'].apply(normalize_text)
    df['Sentence'] = df['Sentence'].apply(remove_stopwords)
    return df

# Preprocess Hindi dataset
hindi_df = preprocess_hindi(hindi_df)

# Function to filter sentences based on length (between 3 and 50 words)
def filter_sentence_length(df, column_name, min_len=3, max_len=50):
    df = df[df[column_name].apply(lambda x: min_len <= len(x.split()) <= max_len)]
    return df

# Apply sentence length filtering for Hindi dataset
hindi_df = filter_sentence_length(hindi_df, 'Sentence')

# Save the preprocessed Hindi dataset
hindi_df.to_csv('hindi_preprocessed.csv', index=False)

print(f'Preprocessed Hindi dataset saved with {len(hindi_df)} rows.')


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Preprocessed Hindi dataset saved with 198971 rows.


In [5]:
import pandas as pd
import re
import string
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')

# Load the English stopwords
english_stopwords = set(['the'])

# Load the English dataset
english_df = pd.read_csv('english_with_intensity.csv')

# Function to normalize text
def normalize_text(text):
    text = text.lower()  # Lowercase the text
    text = re.sub(f"[{re.escape(string.punctuation)}]", "", text)  # Remove punctuation and special characters
    text = re.sub(r'\d+', '', text)  # Remove numbers
    text = text.strip()  # Remove extra whitespace
    return text

# Function to remove stopwords for English
def remove_stopwords(text):
    return ' '.join([word for word in text.split() if word not in english_stopwords])

# Apply text normalization and stopword removal for English dataset
def preprocess_english(df):
    df['Sentence'] = df['Sentence'].apply(normalize_text)
    df['Sentence'] = df['Sentence'].apply(remove_stopwords)
    return df

# Preprocess English dataset
english_df = preprocess_english(english_df)

# Function to filter sentences based on length (between 3 and 50 words)
def filter_sentence_length(df, column_name, min_len=3, max_len=50):
    df = df[df[column_name].apply(lambda x: min_len <= len(x.split()) <= max_len)]
    return df

# Apply sentence length filtering for English dataset
english_df = filter_sentence_length(english_df, 'Sentence')

# Save the preprocessed English dataset
english_df.to_csv('english_preprocessed.csv', index=False)

print(f'Preprocessed English dataset saved with {len(english_df)} rows.')


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Preprocessed English dataset saved with 196185 rows.


In [6]:
import pandas as pd
import re
import string
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')

# Custom stopwords for Marathi
marathi_stopwords = set(['आहे', 'मध्ये', 'की', 'तो', 'आणि'])  # Add more as needed

# Load the Marathi dataset
marathi_df = pd.read_csv('marathi_with_intensity.csv')

# Function to normalize text
def normalize_text(text):
    text = text.lower()  # Lowercase the text
    text = re.sub(f"[{re.escape(string.punctuation)}]", "", text)  # Remove punctuation and special characters
    text = re.sub(r'\d+', '', text)  # Remove numbers
    text = text.strip()  # Remove extra whitespace
    return text

# Function to remove stopwords for Marathi
def remove_stopwords(text):
    return ' '.join([word for word in text.split() if word not in marathi_stopwords])

# Apply text normalization and stopword removal for Marathi dataset
def preprocess_marathi(df):
    df['Sentence'] = df['Sentence'].apply(normalize_text)
    df['Sentence'] = df['Sentence'].apply(remove_stopwords)
    return df

# Preprocess Marathi dataset
marathi_df = preprocess_marathi(marathi_df)

# Function to filter sentences based on length (between 3 and 50 words)
def filter_sentence_length(df, column_name, min_len=3, max_len=50):
    df = df[df[column_name].apply(lambda x: min_len <= len(x.split()) <= max_len)]
    return df

# Apply sentence length filtering for Marathi dataset
marathi_df = filter_sentence_length(marathi_df, 'Sentence')

# Save the preprocessed Marathi dataset
marathi_df.to_csv('marathi_preprocessed.csv', index=False)

print(f'Preprocessed Marathi dataset saved with {len(marathi_df)} rows.')


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Preprocessed Marathi dataset saved with 199973 rows.


In [7]:
import pandas as pd
import re
import string
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')

# Custom stopwords for Gujarati
gujarati_stopwords = set(['છે', 'કે', 'હું', 'છો', 'આ', 'તે'])  # Add more as needed

# Load the Gujarati dataset
gujrati_df = pd.read_csv('gujrati_with_intensity.csv')

# Function to normalize text
def normalize_text(text):
    text = text.lower()  # Lowercase the text
    text = re.sub(f"[{re.escape(string.punctuation)}]", "", text)  # Remove punctuation and special characters
    text = re.sub(r'\d+', '', text)  # Remove numbers
    text = text.strip()  # Remove extra whitespace
    return text

# Function to remove stopwords for Gujarati
def remove_stopwords(text):
    return ' '.join([word for word in text.split() if word not in gujarati_stopwords])

# Apply text normalization and stopword removal for Gujarati dataset
def preprocess_gujarati(df):
    df['Sentence'] = df['Sentence'].apply(normalize_text)
    df['Sentence'] = df['Sentence'].apply(remove_stopwords)
    return df

# Preprocess Gujarati dataset
gujrati_df = preprocess_gujarati(gujrati_df)

# Function to filter sentences based on length (between 3 and 50 words)
def filter_sentence_length(df, column_name, min_len=3, max_len=50):
    df = df[df[column_name].apply(lambda x: min_len <= len(x.split()) <= max_len)]
    return df

# Apply sentence length filtering for Gujarati dataset
gujrati_df = filter_sentence_length(gujrati_df, 'Sentence')

# Save the preprocessed Gujarati dataset
gujrati_df.to_csv('gujarati_preprocessed.csv', index=False)

print(f'Preprocessed Gujarati dataset saved with {len(gujrati_df)} rows.')


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Preprocessed Gujarati dataset saved with 199998 rows.


In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Step 1: Load and Sample 50,000 Rows from Each Preprocessed Dataset

# Load the preprocessed datasets
hindi_df = pd.read_csv('hindi_preprocessed.csv')
english_df = pd.read_csv('english_preprocessed.csv')
marathi_df = pd.read_csv('marathi_preprocessed.csv')
gujrati_df = pd.read_csv('gujarati_preprocessed.csv')

# Function to sample 50,000 rows from each dataset with equal sentiment distribution
def sample_data(df, n=50000):
    # Ensure equal proportion of sentiments
    sentiment_counts = df['Sentiment'].value_counts().min()  # Get the smallest class size
    samples_per_class = min(n // df['Sentiment'].nunique(), sentiment_counts)

    df_sampled = df.groupby('Sentiment', group_keys=False).apply(lambda x: x.sample(samples_per_class, random_state=42))
    return df_sampled.sample(frac=1).reset_index(drop=True)

# Sample 50,000 rows from each dataset
hindi_sampled = sample_data(hindi_df)
english_sampled = sample_data(english_df)
marathi_sampled = sample_data(marathi_df)
gujrati_sampled = sample_data(gujrati_df)

# Combine the sampled datasets
combined_df = pd.concat([hindi_sampled, english_sampled, marathi_sampled, gujrati_sampled])
combined_df = combined_df.sample(frac=1).reset_index(drop=True)  # Shuffle the data

print(f'Combined dataset has {len(combined_df)} rows.')


Combined dataset has 199992 rows.


In [9]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Step 2: Apply TF-IDF Vectorization on the 'Sentence' column (limit features for memory efficiency)
tfidf_vectorizer = TfidfVectorizer(max_features=3000)  # Limiting features to reduce memory usage
X_tfidf = tfidf_vectorizer.fit_transform(combined_df['Sentence'])  # Keep the matrix in sparse format

print(f'TF-IDF matrix shape: {X_tfidf.shape}')

# Step 3: Encode Sentiment and Emotion Labels

# Check for missing values and drop them
combined_df = combined_df.dropna(subset=['Sentiment', 'Emotion'])

# Convert Sentiment and Emotion columns to string, if not already
combined_df['Sentiment'] = combined_df['Sentiment'].astype(str)
combined_df['Emotion'] = combined_df['Emotion'].astype(str)

# Apply LabelEncoder to Sentiment and Emotion columns
sentiment_encoder = LabelEncoder()
emotion_encoder = LabelEncoder()

combined_df['Sentiment'] = sentiment_encoder.fit_transform(combined_df['Sentiment'])
combined_df['Emotion'] = emotion_encoder.fit_transform(combined_df['Emotion'])

# Prepare the labels
y_sentiment = combined_df['Sentiment']
y_emotion = combined_df['Emotion']


TF-IDF matrix shape: (199992, 3000)


In [10]:
# Step 4: Fuzzify the Intensity Column

# Fuzzify the Intensity column
def fuzzify_intensity(intensity):
    if intensity == 0:
        return np.array([1, 0, 0])  # Low intensity
    elif intensity == 1:
        return np.array([0, 1, 0])  # Medium intensity
    elif intensity == 2:
        return np.array([0, 0, 1])  # High intensity

fuzzified_intensities = np.array([fuzzify_intensity(i) for i in combined_df['Intensity']])

# Step 5: Split Data into Training, Validation, and Test Sets

# Split fuzzified intensity values into training, validation, and test sets
fuzzy_train, fuzzy_temp = train_test_split(fuzzified_intensities, test_size=0.3, random_state=42)
fuzzy_val, fuzzy_test = train_test_split(fuzzy_temp, test_size=0.5, random_state=42)

# Split data into training, validation, and test sets for the text TF-IDF features and labels
X_train_text, X_temp_text, y_train_sentiment, y_temp_sentiment, y_train_emotion, y_temp_emotion = train_test_split(
    X_tfidf, y_sentiment, y_emotion, test_size=0.3, random_state=42)

X_val_text, X_test_text, y_val_sentiment, y_test_sentiment, y_val_emotion, y_test_emotion = train_test_split(
    X_temp_text, y_temp_sentiment, y_temp_emotion, test_size=0.5, random_state=42)


In [11]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Concatenate

# Input layers for text (TF-IDF features) and fuzzified intensity (fuzzy logic)
input_text = Input(shape=(X_train_text.shape[1],), sparse=True, name='text_input')  # Use sparse input
input_fuzzy = Input(shape=(3,), name='fuzzy_input')  # Fuzzy input: Low, Medium, High intensity

# Dense layers for feature extraction from text
dense_layer = Dense(128, activation='relu')(input_text)
dense_layer = Dense(64, activation='relu')(dense_layer)

# Combine Dense output with fuzzy logic input (fuzzified intensity values)
concat_layer = Concatenate()([dense_layer, input_fuzzy])

# Fully connected layers for sentiment and emotion classification
dense_sentiment = Dense(64, activation='relu')(concat_layer)
output_sentiment = Dense(3, activation='softmax', name='sentiment_output')(dense_sentiment)

dense_emotion = Dense(64, activation='relu')(concat_layer)
output_emotion = Dense(9, activation='softmax', name='emotion_output')(dense_emotion)

# Build the final model
model = Model(inputs=[input_text, input_fuzzy], outputs=[output_sentiment, output_emotion])

# Compile the model
model.compile(optimizer='adam',
              loss={'sentiment_output': 'sparse_categorical_crossentropy', 'emotion_output': 'sparse_categorical_crossentropy'},
              metrics={'sentiment_output': 'accuracy', 'emotion_output': 'accuracy'})

# Show model summary
model.summary()


In [12]:
# Convert the sparse matrices (X_train_text, X_val_text, X_test_text) to dense format during training
X_train_text_dense = X_train_text.toarray()  # Convert sparse to dense
X_val_text_dense = X_val_text.toarray()      # Convert sparse to dense
X_test_text_dense = X_test_text.toarray()    # Convert sparse to dense


In [13]:
# Step 7: Train the Model

# Convert sparse matrices to dense
X_train_text_dense = X_train_text.toarray()  # Convert sparse to dense for training
X_val_text_dense = X_val_text.toarray()      # Convert sparse to dense for validation

history = model.fit([X_train_text_dense, fuzzy_train],  # Pass dense matrix now
                    {'sentiment_output': y_train_sentiment, 'emotion_output': y_train_emotion},
                    epochs=50, batch_size=128, validation_data=([X_val_text_dense, fuzzy_val],
                    {'sentiment_output': y_val_sentiment, 'emotion_output': y_val_emotion}),
                    verbose=1)

# Step 8: Save the trained model
model.save('dense_fuzzy_model_with_dense_input.h5')

# Step 9: Evaluate the Model

# Convert test sparse matrix to dense
X_test_text_dense = X_test_text.toarray()  # Convert sparse to dense for test evaluation

test_results = model.evaluate([X_test_text_dense, fuzzy_test],
                              {'sentiment_output': y_test_sentiment, 'emotion_output': y_test_emotion})

print(f'Test loss: {test_results[0]}')
print(f'Sentiment accuracy: {test_results[1]}')
print(f'Emotion accuracy: {test_results[2]}')


Epoch 1/50
[1m1094/1094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - emotion_output_accuracy: 0.7937 - loss: 1.2088 - sentiment_output_accuracy: 0.7995

KeyboardInterrupt: 

In [17]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import numpy as np

# Step 7: Train the Model

# Convert sparse matrices to dense
X_train_text_dense = X_train_text.toarray()  # Convert sparse to dense for training
X_val_text_dense = X_val_text.toarray()      # Convert sparse to dense for validation

history = model.fit([X_train_text_dense, fuzzy_train],  # Pass dense matrix now
                    {'sentiment_output': y_train_sentiment, 'emotion_output': y_train_emotion},
                    epochs=1, batch_size=128, validation_data=([X_val_text_dense, fuzzy_val],
                    {'sentiment_output': y_val_sentiment, 'emotion_output': y_val_emotion}),
                    verbose=1)

# Step 8: Save the trained model
model.save('dense_fuzzy_model_with_dense_input.h5')

# Step 9: Evaluate the Model

# Convert test sparse matrix to dense
X_test_text_dense = X_test_text.toarray()  # Convert sparse to dense for test evaluation

test_results = model.evaluate([X_test_text_dense, fuzzy_test],
                              {'sentiment_output': y_test_sentiment, 'emotion_output': y_test_emotion})

print(f'Test loss: {test_results[0]}')
print(f'Sentiment accuracy: {test_results[1]}')
print(f'Emotion accuracy: {test_results[2]}')

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import numpy as np

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import numpy as np

# Step 10: Predictions and Metrics

# Get predictions for the test data
y_pred_sentiment_prob, y_pred_emotion_prob = model.predict([X_test_text_dense, fuzzy_test])

# Convert predicted probabilities to class labels
y_pred_sentiment = np.argmax(y_pred_sentiment_prob, axis=1)
y_pred_emotion = np.argmax(y_pred_emotion_prob, axis=1)

# If y_test_sentiment and y_test_emotion are already class labels (not one-hot encoded)
y_true_sentiment = y_test_sentiment if y_test_sentiment.ndim == 1 else np.argmax(y_test_sentiment, axis=1)
y_true_emotion = y_test_emotion if y_test_emotion.ndim == 1 else np.argmax(y_test_emotion, axis=1)

# Sentiment Metrics
accuracy_sentiment = accuracy_score(y_true_sentiment, y_pred_sentiment)
precision_sentiment = precision_score(y_true_sentiment, y_pred_sentiment, average='macro')
recall_sentiment = recall_score(y_true_sentiment, y_pred_sentiment, average='macro')
f1_sentiment = f1_score(y_true_sentiment, y_pred_sentiment, average='macro')
conf_matrix_sentiment = confusion_matrix(y_true_sentiment, y_pred_sentiment)

# Emotion Metrics
accuracy_emotion = accuracy_score(y_true_emotion, y_pred_emotion)
precision_emotion = precision_score(y_true_emotion, y_pred_emotion, average='macro')
recall_emotion = recall_score(y_true_emotion, y_pred_emotion, average='macro')
f1_emotion = f1_score(y_true_emotion, y_pred_emotion, average='macro')
conf_matrix_emotion = confusion_matrix(y_true_emotion, y_pred_emotion)

# Output the evaluation in the desired format

# Sentiment Model Evaluation
print("Sentiment Model Evaluation:")
print(f"Accuracy: {accuracy_sentiment}")
print(f"Precision: {precision_sentiment}")
print(f"Recall: {recall_sentiment}")
print(f"F1 Score: {f1_sentiment}")
print("Confusion Matrix:")
print(conf_matrix_sentiment)

# Emotion Model Evaluation
print("\nEmotion Model Evaluation:")
print(f"Accuracy: {accuracy_emotion}")
print(f"Precision: {precision_emotion}")
print(f"Recall: {recall_emotion}")
print(f"F1 Score: {f1_emotion}")
print("Confusion Matrix:")
print(conf_matrix_emotion)


[1m1094/1094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - emotion_output_accuracy: 0.9874 - loss: 0.0642 - sentiment_output_accuracy: 0.9872 - val_emotion_output_accuracy: 0.9519 - val_loss: 0.3385 - val_sentiment_output_accuracy: 0.9518




[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - emotion_output_accuracy: 0.9507 - loss: 0.3456 - sentiment_output_accuracy: 0.9507
Test loss: 0.33865252137184143
Sentiment accuracy: 0.9504650235176086
Emotion accuracy: 0.9503650069236755
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step
Sentiment Model Evaluation:
Accuracy: 0.9503650121670723
Precision: 0.9504801103489265
Recall: 0.9503683421129004
F1 Score: 0.9504101208525849
Confusion Matrix:
[[9595  190  207]
 [ 154 9401  390]
 [ 135  413 9514]]

Emotion Model Evaluation:
Accuracy: 0.9504650155005167
Precision: 0.9508110229640104
Recall: 0.950295850819044
F1 Score: 0.9504778326091997
Confusion Matrix:
[[3225    0    0   31    1   91    1    0    0]
 [   0 3103    0    0    0    0   94   49    1]
 [   0    0 3160    0  154    0    0    0   60]
 [  38    0    1 3237    0   68    0    0    0]
 [   0    0  225    0 2980    0    0    0   34]
 [  75    0    0   51    0 3303    0    0  

In [None]:
# Save the trained model
model.save('dense_fuzzy_model_with_dense_input.h5')  # Save the model for future use

print("Model saved as 'dense_fuzzy_model_with_dense_input.h5'.")


In [None]:
import tensorflow as tf

# Load the previously saved model
model = tf.keras.models.load_model('dense_fuzzy_model_with_dense_input.h5')


In [None]:
import pandas as pd
import numpy as np

# Assuming combined_df contains the dataset with 'Sentence', 'Sentiment', 'Emotion'
# Randomly sample 50 sentences from the dataset
random_samples = combined_df.sample(50, random_state=42)

# Prepare the features (text) and labels (sentiment, emotion) for these samples
X_random_text = tfidf_vectorizer.transform(random_samples['Sentence'])  # TF-IDF vectorization (sparse)
y_true_sentiment = random_samples['Sentiment'].values
y_true_emotion = random_samples['Emotion'].values

# Convert the sparse matrix to dense (since model expects dense)
X_random_text_dense = X_random_text.toarray()


In [None]:
# Apply fuzzification on each intensity value
fuzzy_intensities_random = np.array([fuzzify_intensity(i) for i in random_samples['Intensity']])

# Predict using the loaded model
predictions = model.predict([X_random_text_dense, fuzzy_intensities_random])

# Extract sentiment and emotion predictions
y_pred_sentiment = np.argmax(predictions[0], axis=1)
y_pred_emotion = np.argmax(predictions[1], axis=1)


In [None]:
import matplotlib.pyplot as plt

# Plot line graph for Sentiment
plt.figure(figsize=(12, 6))
plt.plot(range(50), y_true_sentiment, label='True Sentiment', marker='o')
plt.plot(range(50), y_pred_sentiment, label='Predicted Sentiment', marker='x')
plt.title('True vs Predicted Sentiment')
plt.xlabel('Sample Index')
plt.ylabel('Sentiment Label')
plt.legend()
plt.show()

# Plot line graph for Emotion
plt.figure(figsize=(12, 6))
plt.plot(range(50), y_true_emotion, label='True Emotion', marker='o')
plt.plot(range(50), y_pred_emotion, label='Predicted Emotion', marker='x')
plt.title('True vs Predicted Emotion')
plt.xlabel('Sample Index')
plt.ylabel('Emotion Label')
plt.legend()
plt.show()


In [None]:
import numpy as np

# Double Bar Plot for Sentiment
plt.figure(figsize=(12, 6))
indices = np.arange(50)  # Indices for 50 samples
width = 0.35  # Width of the bars

plt.bar(indices, y_true_sentiment, width=width, label='True Sentiment', color='b')
plt.bar(indices + width, y_pred_sentiment, width=width, label='Predicted Sentiment', color='r')

plt.xlabel('Sample Index')
plt.ylabel('Sentiment Label')
plt.title('True vs Predicted Sentiment (Double Bar Plot)')
plt.legend()
plt.show()

# Double Bar Plot for Emotion
plt.figure(figsize=(12, 6))
plt.bar(indices, y_true_emotion, width=width, label='True Emotion', color='b')
plt.bar(indices + width, y_pred_emotion, width=width, label='Predicted Emotion', color='r')

plt.xlabel('Sample Index')
plt.ylabel('Emotion Label')
plt.title('True vs Predicted Emotion (Double Bar Plot)')
plt.legend()
plt.show()
