<a href="https://colab.research.google.com/github/Shehab-Mechanical/codes/blob/main/CNN_%26_NLP_2025.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import drive
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Mount Google Drive
drive.mount('/content/drive')
print("Google Drive mounted successfully.")

# Define dataset paths in Google Drive
facial_expression_path = "/content/drive/My Drive/FacialExpressionData"
goemotions_path = "/content/drive/My Drive/GoEmotionsData"
os.makedirs(facial_expression_path, exist_ok=True)
os.makedirs(goemotions_path, exist_ok=True)
print("Facial Expression Data Path:", facial_expression_path)
print("GoEmotions Data Path:", goemotions_path)
print("Dataset folders verified successfully.")

# Load CSV dataset (for text-based emotion analysis)
goemotions_csv_path = os.path.join(goemotions_path, "goemotions.csv")
goemotions_df = pd.read_csv(goemotions_csv_path)
print("GoEmotions Dataset loaded successfully.")
print("GoEmotions Dataset Preview:")
print(goemotions_df.head())

# Display dataset statistics
print("Dataset Statistics:")
print(goemotions_df.describe())
plt.figure(figsize=(10, 5))
sns.countplot(x='emotion', data=goemotions_df)
plt.title("Distribution of Emotions in GoEmotions Dataset")
plt.xticks(rotation=45)
plt.show()
print("Text dataset preprocessing completed successfully.")

# Load images using ImageDataGenerator
image_dir = os.path.join(facial_expression_path, "images")

datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

train_generator = datagen.flow_from_directory(
    image_dir,
    target_size=(48, 48),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

val_generator = datagen.flow_from_directory(
    image_dir,
    target_size=(48, 48),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)
print("Image dataset loaded and preprocessed successfully.")

# Display class distribution
class_labels = list(train_generator.class_indices.keys())
plt.figure(figsize=(10, 5))
sns.barplot(x=class_labels, y=train_generator.class_indices.values())
plt.title("Class Distribution in Facial Expression Dataset")
plt.xticks(rotation=45)
plt.show()
print("Image dataset visualization completed successfully.")

# CNN Model for Image-Based Emotion Recognition
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization

cnn_model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(48, 48, 3)),
    BatchNormalization(),
    MaxPooling2D((2,2)),
    Conv2D(64, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2,2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(train_generator.num_classes, activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
cnn_model.summary()
print("CNN Model defined successfully.")

# LSTM Model for Text-Based Emotion Recognition
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Embedding, LSTM, SpatialDropout1D, GlobalMaxPooling1D, Bidirectional

# Tokenization & Padding
max_words = 5000
max_len = 100
tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(goemotions_df['text'])
X_text = tokenizer.texts_to_sequences(goemotions_df['text'])
X_text = pad_sequences(X_text, maxlen=max_len)

y_text = pd.get_dummies(goemotions_df['emotion']).values
print("Text tokenization and padding completed successfully.")

# Display tokenized text distribution
word_counts = pd.DataFrame.from_dict(tokenizer.word_counts, orient='index', columns=['count'])
word_counts = word_counts.sort_values(by='count', ascending=False).head(20)
plt.figure(figsize=(10, 5))
sns.barplot(x=word_counts.index, y=word_counts['count'])
plt.title("Top 20 Most Frequent Words in GoEmotions Dataset")
plt.xticks(rotation=45)
plt.show()
print("Tokenized text visualization completed successfully.")

# Define LSTM model
lstm_model = Sequential([
    Embedding(max_words, 128, input_length=max_len),
    SpatialDropout1D(0.2),
    Bidirectional(LSTM(64, return_sequences=True)),
    GlobalMaxPooling1D(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(y_text.shape[1], activation='softmax')
])

lstm_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
lstm_model.summary()
print("LSTM Model defined successfully.")
