In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
"""
Kinematic Emotion Recognition Dataset - Data Setup & Feature Extraction
Complete data processing pipeline for BVH motion capture files
"""

import numpy as np
import pandas as pd
import os
from pathlib import Path
import re
from typing import List, Dict, Tuple
import warnings
warnings.filterwarnings('ignore')


class BVHParser:
    """Parse BVH (Biovision Hierarchy) motion capture files"""

    def __init__(self, filepath):
        self.filepath = filepath
        self.joints = []
        self.hierarchy = {}
        self.motion_data = None
        self.frame_time = 0.0
        self.num_frames = 0
        self.channels_list = []
        self.joint_parents = {}

    def parse(self):
        """Parse BVH file"""
        with open(self.filepath, 'r') as f:
                lines = f.readlines()

        # Find MOTION section
        motion_idx = None
        for i, line in enumerate(lines):
            if 'MOTION' in line.upper():
                motion_idx = i
                break

        # Parse sections
        self._parse_hierarchy(lines[:motion_idx])
        self._parse_motion(lines[motion_idx:])

        return self

    def _parse_hierarchy(self, lines):
        """Extract joint hierarchy"""
        joint_stack = []
        current_joint = None

        for line in lines:
            stripped = line.strip()

            if stripped.startswith('ROOT') or stripped.startswith('JOINT'):
                parts = stripped.split()
                if len(parts) >= 2:
                    current_joint = parts[1]
                    self.joints.append(current_joint)
                    self.hierarchy[current_joint] = {
                        'channels': [],
                        'offset': [0, 0, 0],
                        'children': []
                    }

                    if joint_stack:
                        parent = joint_stack[-1]
                        self.joint_parents[current_joint] = parent
                        self.hierarchy[parent]['children'].append(current_joint)

                    joint_stack.append(current_joint)

            elif stripped.startswith('End Site'):
                # End sites don't have channels, skip them
                pass

            elif stripped.startswith('OFFSET') and current_joint:
                parts = stripped.split()
                if len(parts) >= 4:
                    try:
                        offset = [float(parts[1]), float(parts[2]), float(parts[3])]
                        self.hierarchy[current_joint]['offset'] = offset
                    except ValueError:
                        pass

            elif stripped.startswith('CHANNELS') and current_joint:
                parts = stripped.split()
                if len(parts) >= 2:
                    try:
                        num_channels = int(parts[1])
                        channels = parts[2:2+num_channels]
                        self.hierarchy[current_joint]['channels'] = channels
                        for ch in channels:
                            self.channels_list.append((current_joint, ch))
                    except (ValueError, IndexError):
                        pass

            elif '}' in stripped and joint_stack:
                joint_stack.pop()
                if joint_stack:
                    current_joint = joint_stack[-1]

    def _parse_motion(self, lines):
        """Extract motion data"""
        # Parse header
        for line in lines[:10]:
            if 'Frames:' in line:
                    self.num_frames = int(line.split(':')[1].strip())
            elif 'Frame Time:' in line:
                    self.frame_time = float(line.split(':')[1].strip())

        # Find where actual data starts
        data_start = 0
        for i, line in enumerate(lines):
            if line.strip() and not any(keyword in line for keyword in ['MOTION', 'Frames', 'Frame Time']):
                # This line should be numeric data
                try:
                    float(line.strip().split()[0])
                    data_start = i
                    break
                except:
                    continue

        # Read motion data
        data = []
        for line in lines[data_start:]:
            stripped = line.strip()
            if stripped:
                try:
                    values = [float(x) for x in stripped.split()]
                    if len(values) > 0:
                        data.append(values)
                except ValueError:
                    continue

        if data:
            self.motion_data = np.array(data)
            if self.num_frames == 0:
                self.num_frames = len(data)
        else:
            raise Exception("No motion data found")

    def get_joint_channels(self, joint_name):
        """Get data for specific joint across all frames"""
        if joint_name not in self.hierarchy:
            return None

        if self.motion_data is None:
            return None

        # Find column indices for this joint
        col_idx = 0
        for joint in self.joints:
            if joint == joint_name:
                num_channels = len(self.hierarchy[joint]['channels'])
                if num_channels > 0:
                    return self.motion_data[:, col_idx:col_idx+num_channels]
                else:
                    return None
            col_idx += len(self.hierarchy[joint]['channels'])

        return None

    def get_joint_position_indices(self, joint_name):
        """Get indices of position channels for a joint"""
        if joint_name not in self.hierarchy:
            return []

        channels = self.hierarchy[joint_name]['channels']
        position_indices = []

        for i, ch in enumerate(channels):
            if 'position' in ch.lower():
                position_indices.append(i)

        return position_indices

# ============================================================================
# FEATURE EXTRACTION
# ============================================================================

class KinematicFeatureExtractor:
    """Extract features from BVH data"""

    def __init__(self, bvh_parser):
        self.parser = bvh_parser
        self.features = {}

    def extract_all_features(self):
        """Extract comprehensive feature set"""
        self.features = {}

        # Get all joints with position data
        position_data = {}

        for joint in self.parser.joints:
            channels = self.parser.get_joint_channels(joint)
            if channels is not None:
                pos_indices = self.parser.get_joint_position_indices(joint)

                if len(pos_indices) == 3:  # Full 3D position
                    position_data[joint] = channels[:, pos_indices]
                elif len(pos_indices) > 0:  # Partial position data
                    position_data[joint] = channels[:, pos_indices]

        if not position_data:
            # If no position data, use all channel data
            for joint in self.parser.joints:
                channels = self.parser.get_joint_channels(joint)
                if channels is not None and len(channels) > 0:
                    position_data[joint] = channels

        # Extract features from available data
        self._extract_movement_features(position_data)
        self._extract_statistical_features(position_data)

        return self.features

    def _extract_movement_features(self, position_data):
        """Extract velocity and acceleration features"""

        for joint_name, data in position_data.items():
            if len(data) < 2:
                continue

            # Calculate velocity
            velocity = np.diff(data, axis=0)
            speed = np.linalg.norm(velocity, axis=1) if velocity.shape[1] > 1 else np.abs(velocity).flatten()

            if len(speed) > 0:
                self.features[f'{joint_name}_speed_mean'] = float(np.mean(speed))
                self.features[f'{joint_name}_speed_std'] = float(np.std(speed))
                self.features[f'{joint_name}_speed_max'] = float(np.max(speed))

            # Calculate acceleration
            if len(data) >= 3:
                acceleration = np.diff(velocity, axis=0)
                acc_mag = np.linalg.norm(acceleration, axis=1) if acceleration.shape[1] > 1 else np.abs(acceleration).flatten()

                if len(acc_mag) > 0:
                    self.features[f'{joint_name}_acc_mean'] = float(np.mean(acc_mag))

    def _extract_statistical_features(self, position_data):
        """Extract statistical features"""

        all_speeds = []

        for joint_name, data in position_data.items():
            if len(data) < 2:
                continue

            # Range of motion
            data_range = np.ptp(data, axis=0)  # Peak-to-peak
            self.features[f'{joint_name}_range'] = float(np.mean(data_range))

            # Velocity for overall statistics
            velocity = np.diff(data, axis=0)
            speed = np.linalg.norm(velocity, axis=1) if velocity.shape[1] > 1 else np.abs(velocity).flatten()
            all_speeds.extend(speed)

        # Overall movement statistics
        if all_speeds:
            self.features['overall_speed_mean'] = float(np.mean(all_speeds))
            self.features['overall_speed_std'] = float(np.std(all_speeds))
            self.features['overall_speed_max'] = float(np.max(all_speeds))
            self.features['movement_intensity'] = float(np.percentile(all_speeds, 90))

# ============================================================================
# DATASET PROCESSOR
# ============================================================================

class EmotionDatasetProcessor:
    """Process emotion dataset"""

    def __init__(self, data_dir, fileinfo_csv):
        self.data_dir = Path(data_dir)
        self.fileinfo = pd.read_csv(fileinfo_csv)
        self.features_df = None

    def find_bvh_file(self, filename):
        """Try multiple locations to find BVH file"""
        possible_paths = [
            self.data_dir / f"{filename}.bvh",
            self.data_dir / "bvh" / f"{filename}.bvh",
            self.data_dir / "data" / f"{filename}.bvh",
            self.data_dir / filename / f"{filename}.bvh",
        ]

        # Also search recursively
        for bvh_path in self.data_dir.rglob(f"{filename}.bvh"):
            return bvh_path

        for path in possible_paths:
            if path.exists():
                return path

        return None

    def process_dataset(self, sample_size=None, save_path='features_dataset.csv'):
        """Process BVH files and extract features"""

        print(f"Processing dataset with {len(self.fileinfo)} entries...")

        if sample_size:
            print(f"Sampling {sample_size} files")
            fileinfo_sample = self.fileinfo.sample(n=min(sample_size, len(self.fileinfo)), random_state=42)
        else:
            fileinfo_sample = self.fileinfo

        all_features = []
        failed_files = []
        error_details = {}

        for idx, row in fileinfo_sample.iterrows():
            filename = row['filename']

            # Find BVH file
            bvh_path = self.find_bvh_file(filename)

            if bvh_path is None:
                failed_files.append(filename)
                error_details[filename] = "File not found"
                continue

            try:
                # Parse BVH
                parser = BVHParser(str(bvh_path))
                parser.parse()

                # Extract features
                extractor = KinematicFeatureExtractor(parser)
                features = extractor.extract_all_features()

                if not features:
                    failed_files.append(filename)
                    error_details[filename] = "No features extracted"
                    continue

                # Add metadata
                features['filename'] = filename
                features['actor_ID'] = row['actor_ID']
                features['emotion'] = row['emotion']
                features['gender'] = row['actor_gender']
                features['num_frames'] = parser.num_frames
                features['duration'] = parser.num_frames * parser.frame_time

                all_features.append(features)

                if (len(all_features)) % 20 == 0:
                    print(f"Processed {len(all_features)} files successfully...")

            except Exception as e:
                failed_files.append(filename)
                error_details[filename] = str(e)

        if all_features:
            self.features_df = pd.DataFrame(all_features)
            self.features_df.to_csv(save_path, index=False)

            print(f"Successfully processed: {len(all_features)} files")
            print(f"Features saved to: {save_path}")
            print(f"Total features per sample: {len([c for c in self.features_df.columns if c not in ['filename', 'actor_ID', 'emotion', 'gender', 'num_frames', 'duration']])}")

            if failed_files:
                print(f"\n Failed files: {len(failed_files)}")
                print("\n First 5 errors:")
                for filename in list(error_details.keys())[:5]:
                    print(f"  - {filename}: {error_details[filename]}")

            return self.features_df

# ============================================================================
# MAIN
# ============================================================================

if __name__ == "__main__":

    print("KINEMATIC EMOTION RECOGNITION - DIAGNOSTIC & SETUP")

    # Configuration
    data_dir = "/content/drive/MyDrive/kinematic_dataset_final/BVH/"
    fileinfo_csv = "/content/drive/MyDrive/kinematic_dataset_final/file-info.csv"

    sample_size = 1402

    processor = EmotionDatasetProcessor(data_dir, fileinfo_csv)
    features_df = processor.process_dataset(sample_size=sample_size)

    if features_df is not None:
        print("DATASET SUMMARY")
        print(f"\nTotal samples: {len(features_df)}")
        print(f"\nEmotion distribution:")
        print(features_df['emotion'].value_counts())
        print(f"\nFeature count: {len(features_df.columns) - 6}")

KINEMATIC EMOTION RECOGNITION - DIAGNOSTIC & SETUP
Processing dataset with 1402 entries...
Sampling 1402 files
Processed 20 files successfully...
Processed 40 files successfully...
Processed 60 files successfully...
Processed 80 files successfully...
Processed 100 files successfully...
Processed 120 files successfully...
Processed 140 files successfully...
Processed 160 files successfully...
Processed 180 files successfully...
Processed 200 files successfully...
Processed 220 files successfully...
Processed 240 files successfully...
Processed 260 files successfully...
Processed 280 files successfully...
Processed 300 files successfully...
Processed 320 files successfully...
Processed 340 files successfully...
Processed 360 files successfully...
Processed 380 files successfully...
Processed 400 files successfully...
Processed 420 files successfully...
Processed 440 files successfully...
Processed 460 files successfully...
Processed 480 files successfully...
Processed 500 files successfu

In [None]:


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import (
    classification_report, confusion_matrix,
    f1_score, precision_score, recall_score, accuracy_score,
    precision_recall_fscore_support
)
from sklearn.impute import SimpleImputer
import warnings
warnings.filterwarnings('ignore')

# Set style for better-looking plots
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")


class EmotionDataLoader:
    """Load and preprocess emotion recognition dataset"""

    def __init__(self, features_csv):
        self.features_csv = features_csv
        self.df = None
        self.X = None
        self.y = None
        self.feature_names = None
        self.label_encoder = LabelEncoder()
        self.scaler = StandardScaler()

    def load_data(self):
        """Load features from CSV"""

        self.df = pd.read_csv(self.features_csv)
        print(f"\n Loaded {len(self.df)} samples")
        print(f"Total columns: {len(self.df.columns)}")

        return self

    def prepare_features(self):
        """Prepare feature matrix and labels"""

        # Identify metadata columns to exclude
        metadata_cols = ['filename', 'actor_ID', 'emotion', 'gender',
                        'scenario_ID', 'version', 'num_frames', 'duration']

        # Get feature columns
        feature_cols = [col for col in self.df.columns if col not in metadata_cols]
        self.feature_names = feature_cols

        print(f"\n Feature columns: {len(feature_cols)}")

        # Extract features and labels
        X = self.df[feature_cols].values
        y = self.df['emotion'].values

        # Encode labels
        print(f"\n Encoding emotion labels...")
        y_encoded = self.label_encoder.fit_transform(y)

        print(f"\n Emotion classes:")
        for i, emotion in enumerate(self.label_encoder.classes_):
            count = np.sum(y == emotion)
            print(f"  {i}: {emotion} ({count} samples)")

        self.X = X
        self.y = y_encoded

        return self

    def normalize_features(self, X_train, X_test):
        """Normalize features using training set statistics"""
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)
        return X_train_scaled, X_test_scaled

    def get_emotion_name(self, encoded_label):
        """Convert encoded label back to emotion name"""
        return self.label_encoder.inverse_transform([encoded_label])[0]

class EmotionClassifierTrainer:
    """Train and evaluate multiple classifiers"""

    def __init__(self, X, y, feature_names, label_encoder):
        self.X = X
        self.y = y
        self.feature_names = feature_names
        self.label_encoder = label_encoder
        self.models = {}
        self.results = {}
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None

    def split_data(self, test_size=0.2, random_state=42):
        """Split data into train and test sets"""
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            self.X, self.y, test_size=test_size, random_state=random_state, stratify=self.y
        )


        print(f"\n Training samples: {len(self.X_train)}")
        print(f" Test samples: {len(self.X_test)}")
        print(f" Test size: {test_size*100:.0f}%")

        # Normalize features
        scaler = StandardScaler()
        self.X_train = scaler.fit_transform(self.X_train)
        self.X_test = scaler.transform(self.X_test)

        return self

    def train_random_forest(self, n_estimators=100, max_depth=None):
        """Train Random Forest classifier"""

        rf = RandomForestClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            random_state=42,
            n_jobs=-1
        )

        print(f"\n→ Training with {n_estimators} trees...")
        rf.fit(self.X_train, self.y_train)

        # Predictions
        y_pred = rf.predict(self.X_test)

        # Store results
        self.models['Random Forest'] = rf
        self.results['Random Forest'] = {
            'model': rf,
            'y_pred': y_pred,
            'accuracy': accuracy_score(self.y_test, y_pred),
            'precision': precision_score(self.y_test, y_pred, average='weighted'),
            'recall': recall_score(self.y_test, y_pred, average='weighted'),
            'f1': f1_score(self.y_test, y_pred, average='weighted')
        }
        print(f" Results of Random Forest Classifier")
        print(f"  Accuracy: {self.results['Random Forest']['accuracy']:.4f}")

        return rf

    def train_svm(self, kernel='rbf', C=1.0):
        """Train SVM classifier"""

        svm = SVC(
            kernel=kernel,
            C=C,
            random_state=42,
            probability=True
        )

        print(f"\n→ Training with {kernel} kernel...")
        svm.fit(self.X_train, self.y_train)

        # Predictions
        y_pred = svm.predict(self.X_test)

        # Store results
        self.models['SVM'] = svm
        self.results['SVM'] = {
            'model': svm,
            'y_pred': y_pred,
            'accuracy': accuracy_score(self.y_test, y_pred),
            'precision': precision_score(self.y_test, y_pred, average='weighted'),
            'recall': recall_score(self.y_test, y_pred, average='weighted'),
            'f1': f1_score(self.y_test, y_pred, average='weighted')
        }
        print(f" Results of SVM Classifier")
        print(f"  Accuracy: {self.results['SVM']['accuracy']:.4f}")

        return svm

    def train_gradient_boosting(self, n_estimators=100, learning_rate=0.1):
        """Train Gradient Boosting classifier"""

        gb = GradientBoostingClassifier(
            n_estimators=n_estimators,
            learning_rate=learning_rate,
            random_state=42
        )

        print(f"\n→ Training with {n_estimators} estimators...")
        gb.fit(self.X_train, self.y_train)

        # Predictions
        y_pred = gb.predict(self.X_test)

        # Store results
        self.models['Gradient Boosting'] = gb
        self.results['Gradient Boosting'] = {
            'model': gb,
            'y_pred': y_pred,
            'accuracy': accuracy_score(self.y_test, y_pred),
            'precision': precision_score(self.y_test, y_pred, average='weighted'),
            'recall': recall_score(self.y_test, y_pred, average='weighted'),
            'f1': f1_score(self.y_test, y_pred, average='weighted')
        }
        print(f" Results of Gradient Boosting Classifier")
        print(f"  Accuracy: {self.results['Gradient Boosting']['accuracy']:.4f}")

        return gb

    def train_all_models(self):
        """Train all classifiers"""
        self.train_random_forest(n_estimators=100)
        self.train_svm(kernel='rbf', C=1.0)
        self.train_gradient_boosting(n_estimators=100)
        return self


class ModelEvaluator:
    """Evaluate and visualize model performance"""

    def __init__(self, trainer):
        self.trainer = trainer
        self.results = trainer.results
        self.label_encoder = trainer.label_encoder

    def print_comparison(self):
        """Print comparison of all models"""

        comparison_df = pd.DataFrame({
            'Model': list(self.results.keys()),
            'Accuracy': [r['accuracy'] for r in self.results.values()],
            'Precision': [r['precision'] for r in self.results.values()],
            'Recall': [r['recall'] for r in self.results.values()],
            'F1-Score': [r['f1'] for r in self.results.values()]
        })

        print("\n" + comparison_df.to_string(index=False))

        # Find best model
        best_model = comparison_df.loc[comparison_df['F1-Score'].idxmax(), 'Model']
        best_f1 = comparison_df.loc[comparison_df['F1-Score'].idxmax(), 'F1-Score']

        print(f"\n Best Model: {best_model} (F1-Score: {best_f1:.4f})")

        return comparison_df

    def plot_metric_comparison(self, save_path='metric_comparison.png'):
        """Plot comparison of metrics across models"""
        fig, axes = plt.subplots(2, 2, figsize=(14, 10))
        fig.suptitle('Model Performance Comparison', fontsize=16, fontweight='bold')

        models = list(self.results.keys())
        metrics = ['accuracy', 'precision', 'recall', 'f1']
        metric_names = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
        colors = ['#3498db', '#e74c3c', '#2ecc71']

        for idx, (metric, metric_name) in enumerate(zip(metrics, metric_names)):
            ax = axes[idx // 2, idx % 2]

            values = [self.results[model][metric] for model in models]
            bars = ax.bar(models, values, color=colors, alpha=0.8, edgecolor='black')

            # Add value labels on bars
            for bar in bars:
                height = bar.get_height()
                ax.text(bar.get_x() + bar.get_width()/2., height,
                       f'{height:.3f}',
                       ha='center', va='bottom', fontweight='bold')

            ax.set_ylabel(metric_name, fontsize=12, fontweight='bold')
            ax.set_ylim([0, 1])
            ax.grid(axis='y', alpha=0.3)
            ax.set_xticklabels(models, rotation=15, ha='right')

        plt.tight_layout()
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f" Saved metric comparison to {save_path}")
        plt.close()

    def plot_per_class_metrics(self, save_path='per_class_metrics.png'):
        """Plot precision, recall, F1 for each class across all models"""
        emotion_classes = self.label_encoder.classes_
        n_models = len(self.results)

        fig, axes = plt.subplots(1, 3, figsize=(18, 6))
        fig.suptitle('Per-Class Performance Metrics', fontsize=16, fontweight='bold')

        metric_names = ['Precision', 'Recall', 'F1-Score']

        for metric_idx, (ax, metric_name) in enumerate(zip(axes, metric_names)):
            x = np.arange(len(emotion_classes))
            width = 0.25

            for model_idx, (model_name, result) in enumerate(self.results.items()):
                y_true = self.trainer.y_test
                y_pred = result['y_pred']

                # Calculate per-class metrics
                if metric_name == 'Precision':
                    scores = precision_score(y_true, y_pred, average=None, zero_division=0)
                elif metric_name == 'Recall':
                    scores = recall_score(y_true, y_pred, average=None, zero_division=0)
                else:  # F1-Score
                    scores = f1_score(y_true, y_pred, average=None, zero_division=0)

                offset = width * (model_idx - 1)
                ax.bar(x + offset, scores, width, label=model_name, alpha=0.8)

            ax.set_xlabel('Emotion Class', fontsize=12, fontweight='bold')
            ax.set_ylabel(metric_name, fontsize=12, fontweight='bold')
            ax.set_title(f'{metric_name} by Emotion Class', fontsize=13, fontweight='bold')
            ax.set_xticks(x)
            ax.set_xticklabels(emotion_classes, rotation=45, ha='right')
            ax.legend()
            ax.grid(axis='y', alpha=0.3)
            ax.set_ylim([0, 1])

        plt.tight_layout()
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"Saved per-class metrics to {save_path}")
        plt.close()

    def plot_confusion_matrices(self, save_path='confusion_matrices.png'):
        """Plot confusion matrices for all models"""
        n_models = len(self.results)
        fig, axes = plt.subplots(1, n_models, figsize=(6*n_models, 5))

        if n_models == 1:
            axes = [axes]

        emotion_classes = self.label_encoder.classes_

        for ax, (model_name, result) in zip(axes, self.results.items()):
            cm = confusion_matrix(self.trainer.y_test, result['y_pred'])

            # Normalize confusion matrix
            cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

            sns.heatmap(cm_normalized, annot=True, fmt='.2f', cmap='Blues',
                       xticklabels=emotion_classes, yticklabels=emotion_classes,
                       ax=ax, cbar_kws={'label': 'Proportion'})

            ax.set_title(f'{model_name}\nAccuracy: {result["accuracy"]:.3f}',
                        fontweight='bold', fontsize=12)
            ax.set_xlabel('Predicted', fontweight='bold')
            ax.set_ylabel('Actual', fontweight='bold')

        plt.suptitle('Confusion Matrices (Normalized)', fontsize=16, fontweight='bold', y=1.02)
        plt.tight_layout()
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"Saved confusion matrices to {save_path}")
        plt.close()

    def plot_classification_reports(self):
        """Print detailed classification reports"""

        emotion_classes = self.label_encoder.classes_

        for model_name, result in self.results.items():
            print(f"\n{'='*60}")
            print(f"{model_name}")
            print(f"{'='*60}")

            y_true = self.trainer.y_test
            y_pred = result['y_pred']

            report = classification_report(
                y_true, y_pred,
                target_names=emotion_classes,
                digits=3
            )
            print(report)

    def plot_feature_importance(self, save_path='feature_importance.png', top_n=50):
        """Plot feature importance for tree-based models"""
        tree_models = ['Random Forest', 'Gradient Boosting']
        available_models = [m for m in tree_models if m in self.results]

        fig, axes = plt.subplots(1, len(available_models), figsize=(10*len(available_models), 8))

        if len(available_models) == 1:
            axes = [axes]

        for ax, model_name in zip(axes, available_models):
            model = self.results[model_name]['model']
            importances = model.feature_importances_

            # Get top N features
            indices = np.argsort(importances)[-top_n:]
            feature_names = np.array(self.trainer.feature_names)[indices]

            ax.barh(range(top_n), importances[indices], color='steelblue', alpha=0.8)
            ax.set_yticks(range(top_n))
            ax.set_yticklabels(feature_names, fontsize=9)
            ax.set_xlabel('Feature Importance', fontweight='bold')
            ax.set_title(f'{model_name}\nTop {top_n} Features', fontweight='bold')
            ax.grid(axis='x', alpha=0.3)

        plt.tight_layout()
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"\n Saved feature importance to {save_path}")
        plt.close()

    def generate_all_plots(self):
        """Generate all evaluation plots"""

        self.plot_metric_comparison()
        self.plot_per_class_metrics()
        self.plot_confusion_matrices()
        self.plot_feature_importance()

# ============================================================================
# MAIN PIPELINE
# ============================================================================

def main():
    """Main execution pipeline"""

    # Configuration
    features_csv = 'features_dataset.csv'
    test_size = 0.2

    # Load data
    loader = EmotionDataLoader(features_csv)
    loader.load_data().prepare_features()

    # Train models
    trainer = EmotionClassifierTrainer(
        X=loader.X,
        y=loader.y,
        feature_names=loader.feature_names,
        label_encoder=loader.label_encoder
    )

    trainer.split_data(test_size=test_size).train_all_models()

    # Evaluate and visualize
    evaluator = ModelEvaluator(trainer)
    comparison_df = evaluator.print_comparison()
    evaluator.plot_classification_reports()
    evaluator.generate_all_plots()

    return trainer, evaluator

if __name__ == "__main__":
    trainer, evaluator = main()


 Loaded 1401 samples
Total columns: 305

 Feature columns: 299

 Encoding emotion labels...

 Emotion classes:
  0: Angry (200 samples)
  1: Disgust (210 samples)
  2: Fearful (216 samples)
  3: Happy (216 samples)
  4: Neutral (145 samples)
  5: Sad (202 samples)
  6: Surprise (212 samples)

 Training samples: 1120
 Test samples: 281
 Test size: 20%

→ Training with 100 trees...
 Results of Random Forest Classifier
  Accuracy: 0.5338

→ Training with rbf kernel...
 Results of SVM Classifier
  Accuracy: 0.5231

→ Training with 100 estimators...
 Results of Gradient Boosting Classifier
  Accuracy: 0.5196

            Model  Accuracy  Precision   Recall  F1-Score
    Random Forest  0.533808   0.534474 0.533808  0.532366
              SVM  0.523132   0.539735 0.523132  0.525416
Gradient Boosting  0.519573   0.527034 0.519573  0.520455

 Best Model: Random Forest (F1-Score: 0.5324)

Random Forest
              precision    recall  f1-score   support

       Angry      0.463     0.475     