# Exploratory Data Analysis

This notebook is used for exploratory data analysis (EDA) on the dataset used for training the emotion detection model. The goal is to visualize and understand the dataset before proceeding with model training.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set visualization style
sns.set(style='whitegrid')

# Load the dataset
data = pd.read_csv('../data/processed/dataset.csv')  # Adjust the path as necessary

# Display the first few rows of the dataset
data.head()

In [None]:
# Check for missing values
missing_values = data.isnull().sum()
missing_values[missing_values > 0]

In [None]:
# Visualize the distribution of emotions
plt.figure(figsize=(10, 6))
sns.countplot(x='emotion', data=data)
plt.title('Distribution of Emotions')
plt.xlabel('Emotion')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Visualize sample images for each emotion
import os
import cv2

emotion_labels = data['emotion'].unique()
plt.figure(figsize=(12, 8))
for i, emotion in enumerate(emotion_labels):
    plt.subplot(2, 3, i + 1)
    sample_image = data[data['emotion'] == emotion].sample(1)
    image_path = sample_image['image_path'].values[0]
    image = cv2.imread(image_path)
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.title(emotion)
    plt.axis('off')
plt.tight_layout()
plt.show()