In [1]:
!unzip asl_split_dataset.zip

Archive:  asl_split_dataset.zip
   creating: asl_split_dataset/
   creating: asl_split_dataset/test/
   creating: asl_split_dataset/test/0/
  inflating: asl_split_dataset/test/0/hand1_0_bot_seg_3_cropped.jpeg  
  inflating: asl_split_dataset/test/0/hand1_0_bot_seg_4_cropped.jpeg  
  inflating: asl_split_dataset/test/0/hand1_0_dif_seg_1_cropped.jpeg  
  inflating: asl_split_dataset/test/0/hand1_0_left_seg_4_cropped.jpeg  
  inflating: asl_split_dataset/test/0/hand1_0_left_seg_5_cropped.jpeg  
  inflating: asl_split_dataset/test/0/hand1_0_right_seg_3_cropped.jpeg  
  inflating: asl_split_dataset/test/0/hand2_0_bot_seg_3_cropped.jpeg  
  inflating: asl_split_dataset/test/0/hand2_0_bot_seg_4_cropped.jpeg  
  inflating: asl_split_dataset/test/0/hand2_0_dif_seg_2_cropped.jpeg  
  inflating: asl_split_dataset/test/0/hand2_0_dif_seg_5_cropped.jpeg  
  inflating: asl_split_dataset/test/0/hand2_0_left_seg_1_cropped.jpeg  
  inflating: asl_split_dataset/test/0/hand2_0_left_seg_3_cropped.jpeg  
  

In [2]:
!pip install mediapipe
!pip install optuna

Collecting mediapipe
  Downloading mediapipe-0.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (36.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m36.1/36.1 MB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.18 sounddevice-0.5.1
Collecting optuna
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.8-py3-none-any.

In [3]:

import tensorflow as tf
import mediapipe as mp
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
import os
from scipy.spatial.distance import pdist, squareform

class EnhancedHandLandmarkExtractor:
    def __init__(self):
        self.mp_hands = mp.solutions.hands
        self.hands = self.mp_hands.Hands(
            static_image_mode=True,
            max_num_hands=1,
            min_detection_confidence=0.3,
            min_tracking_confidence=0.3
        )
        self.mp_drawing = mp.solutions.drawing_utils

        self.finger_tips = [4, 8, 12, 16, 20]  # Thumb to pinky tips
        self.finger_bases = [2, 5, 9, 13, 17]  # Thumb to pinky bases
        self.palm_landmarks = [0, 1, 5, 9, 13, 17]  # Wrist and finger bases

    def load_dataset(self, data_dir, batch_size=32):
        dataset = tf.keras.preprocessing.image_dataset_from_directory(
            data_dir,
            image_size=(224, 224),
            batch_size=batch_size,
            label_mode='categorical',
            color_mode='rgb',
            interpolation='bilinear'
        )

        def normalize_img(image, label):
            return tf.cast(image, tf.float32) / 255.0, label

        return dataset.map(normalize_img)

    def calculate_finger_angles(self, landmarks_array):
        angles = []
        # For each finger (except thumb)
        for finger_idx in range(1, 5):
            base = finger_idx * 4 + 1
            mid = finger_idx * 4 + 2
            tip = finger_idx * 4 + 3

            # Get vectors for the two segments
            v1 = landmarks_array[mid] - landmarks_array[base]
            v2 = landmarks_array[tip] - landmarks_array[mid]

            # Calculate angle
            cosine = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
            angle = np.arccos(np.clip(cosine, -1.0, 1.0))
            angles.append(angle)

        # Special case for thumb
        thumb_base = landmarks_array[1]
        thumb_mid = landmarks_array[2]
        thumb_tip = landmarks_array[4]

        v1 = thumb_mid - thumb_base
        v2 = thumb_tip - thumb_mid
        cosine = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
        thumb_angle = np.arccos(np.clip(cosine, -1.0, 1.0))
        angles.append(thumb_angle)

        return np.array(angles)

    def calculate_finger_lengths(self, landmarks_array):
        lengths = []
        for finger_idx in range(5):  # For all fingers including thumb
            if finger_idx == 0:  # Thumb
                base = 1
                tip = 4
            else:
                base = finger_idx * 4 + 1
                tip = finger_idx * 4 + 4

            length = np.linalg.norm(landmarks_array[tip] - landmarks_array[base])
            lengths.append(length)

        # Normalize by palm size
        palm_size = np.linalg.norm(landmarks_array[0] - landmarks_array[5])
        normalized_lengths = np.array(lengths) / palm_size

        return normalized_lengths

    def calculate_palm_features(self, landmarks_array):
        # Calculate palm area using convex hull
        palm_points = landmarks_array[self.palm_landmarks]
        hull = cv2.convexHull(palm_points[:, :2].astype(np.float32))
        palm_area = cv2.contourArea(hull)

        # Calculate palm orientation
        wrist_to_middle = landmarks_array[9] - landmarks_array[0]
        palm_angle = np.arctan2(wrist_to_middle[1], wrist_to_middle[0])

        # Calculate palm width/height ratio
        palm_width = np.linalg.norm(landmarks_array[5] - landmarks_array[17])
        palm_height = np.linalg.norm(landmarks_array[0] - landmarks_array[9])
        palm_ratio = palm_width / palm_height if palm_height != 0 else 0

        return np.array([palm_area, palm_angle, palm_ratio])

    def calculate_finger_distance_matrix(self, landmarks_array):
        fingertip_positions = landmarks_array[self.finger_tips]
        distances = pdist(fingertip_positions)
        return distances

    def extract_enhanced_features(self, landmarks):
        landmarks_array = landmarks.reshape(-1, 3)

        # Basic landmark positions (normalized)
        basic_features = landmarks.flatten()

        # Calculate additional features
        finger_angles = self.calculate_finger_angles(landmarks_array)
        finger_lengths = self.calculate_finger_lengths(landmarks_array)
        palm_features = self.calculate_palm_features(landmarks_array)
        fingertip_distances = self.calculate_finger_distance_matrix(landmarks_array)

        enhanced_features = np.concatenate([
            basic_features,          # Original landmark positions (63 features)
            finger_angles,           # Angles between finger segments (5 features)
            finger_lengths,          # Normalized finger lengths (5 features)
            palm_features,           # Palm characteristics (3 features)
            fingertip_distances      # Pairwise fingertip distances (10 features)
        ])

        return enhanced_features

    def extract_landmarks(self, image):
        image_mp = tf.cast(image * 255, tf.uint8)
        if isinstance(image_mp, tf.Tensor):
            image_mp = image_mp.numpy()

        results = self.hands.process(image_mp)

        features = np.zeros(86)  # 63 original + 23 enhanced features

        if results.multi_hand_landmarks:
            landmarks = results.multi_hand_landmarks[0]
            # Extract basic landmarks
            basic_features = np.zeros(63)
            for idx, landmark in enumerate(landmarks.landmark):
                basic_features[idx*3:(idx*3)+3] = [landmark.x, landmark.y, landmark.z]

            # Calculate enhanced features
            features = self.extract_enhanced_features(basic_features)

            # visualization
            annotated_image = image_mp.copy()
            self.mp_drawing.draw_landmarks(
                annotated_image,
                landmarks,
                self.mp_hands.HAND_CONNECTIONS
            )
            return features, annotated_image, True

        return features, image_mp, False

    def augment_landmarks(self, features):
        augmented_features = []

        if np.any(features):
            # Original features
            augmented_features.append(features)

            # More subtle rotation variations
            for angle in [-20, -10, 10, 20]:
                rotated = self._rotate_landmarks(features[:63].copy(), angle)
                enhanced_rotated = self.extract_enhanced_features(rotated)
                augmented_features.append(enhanced_rotated)

            # Scale variations
            for scale in [0.85, 0.95, 1.05, 1.15]:
                scaled = self._scale_landmarks(features[:63].copy(), scale)
                enhanced_scaled = self.extract_enhanced_features(scaled)
                augmented_features.append(enhanced_scaled)

            # Controlled noise addition
            for _ in range(3):
                noisy = self._add_noise(features[:63].copy(), 0.005)
                enhanced_noisy = self.extract_enhanced_features(noisy)
                augmented_features.append(enhanced_noisy)
        else:
            augmented_features.append(features)

        return augmented_features

    def _rotate_landmarks(self, landmarks, angle):
        landmarks_reshaped = landmarks.reshape(-1, 3)
        center = np.mean(landmarks_reshaped[:, :2], axis=0)
        angle_rad = np.radians(angle)

        rotation_matrix = np.array([
            [np.cos(angle_rad), -np.sin(angle_rad)],
            [np.sin(angle_rad), np.cos(angle_rad)]
        ])

        centered = landmarks_reshaped[:, :2] - center
        rotated = np.dot(centered, rotation_matrix.T)
        landmarks_reshaped[:, :2] = rotated + center

        return landmarks_reshaped.flatten()

    def _scale_landmarks(self, landmarks, scale_factor):
        landmarks_reshaped = landmarks.reshape(-1, 3)
        center = np.mean(landmarks_reshaped[:, :2], axis=0)

        centered = landmarks_reshaped[:, :2] - center
        scaled = centered * scale_factor
        landmarks_reshaped[:, :2] = scaled + center

        return landmarks_reshaped.flatten()

    def _add_noise(self, landmarks, noise_factor=0.01):
        noise = np.random.normal(0, noise_factor, landmarks.shape)
        return landmarks + noise

    def save_features(self, features, labels, filename):
        df = pd.DataFrame(features)
        df['label'] = labels.argmax(axis=1)  # Convert one-hot to label index
        df.to_csv(filename, index=False)
        print(f"Saved {len(df)} samples to {filename}")


    def process_dataset(self, dataset, augment=True, visualize=False):
        features_list = []
        labels_list = []

        for images, labels in tqdm(dataset, desc="Processing dataset"):
            for img, label in zip(images, labels):
                # Extract enhanced features
                features, annotated_image, detected = self.extract_landmarks(img)

                if augment:
                    augmented_features = self.augment_landmarks(features)
                    features_list.extend(augmented_features)
                    labels_list.extend([label] * len(augmented_features))
                else:
                    features_list.append(features)
                    labels_list.append(label)

                if visualize and len(features_list) <= 3:
                    plt.figure(figsize=(10, 5))
                    plt.subplot(1, 2, 1)
                    plt.imshow(tf.cast(img * 255, tf.uint8))
                    plt.title('Original Image')
                    plt.axis('off')

                    plt.subplot(1, 2, 2)
                    plt.imshow(annotated_image)
                    plt.title('Detected Landmarks' if detected else 'No Landmarks Detected')
                    plt.axis('off')
                    plt.show()

        return np.array(features_list), np.array(labels_list)


In [4]:

    extractor = EnhancedHandLandmarkExtractor()

       # Load datasets
    print("Loading datasets...")
    train_dataset = extractor.load_dataset('asl_split_dataset/train')
    val_dataset = extractor.load_dataset('asl_split_dataset/val')
    test_dataset = extractor.load_dataset('asl_split_dataset/test')

    # Process datasets
    print("Processing training dataset...")
    train_features, train_labels = extractor.process_dataset(train_dataset, augment=True, visualize=True)

    print("Processing validation dataset...")
    val_features, val_labels = extractor.process_dataset(val_dataset, augment=True, visualize=False)

    print("Processing test dataset...")
    test_features, test_labels = extractor.process_dataset(test_dataset, augment=False, visualize=False)

    # Save features
    extractor.save_features(train_features, train_labels, 'train_features.csv')
    extractor.save_features(val_features, val_labels, 'val_features.csv')
    extractor.save_features(test_features, test_labels, 'test_features.csv')

Loading datasets...
Found 1581 files belonging to 36 classes.
Found 395 files belonging to 36 classes.
Found 539 files belonging to 36 classes.
Processing training dataset...


Processing dataset: 100%|██████████| 50/50 [01:10<00:00,  1.40s/it]


Processing validation dataset...


Processing dataset: 100%|██████████| 13/13 [00:13<00:00,  1.07s/it]


Processing test dataset...


Processing dataset: 100%|██████████| 17/17 [00:17<00:00,  1.04s/it]


Saved 15364 samples to train_features.csv
Saved 3893 samples to val_features.csv
Saved 539 samples to test_features.csv


In [9]:
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import os
from tqdm import tqdm

def visualize_dataset_samples(extractor, dataset, output_dir='visualization_samples',
                            num_samples_per_class=3):
    """Generate visualization samples from the dataset"""

    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    os.makedirs(f"{output_dir}/original", exist_ok=True)
    os.makedirs(f"{output_dir}/landmarks", exist_ok=True)

    # Dictionary to keep track of samples per class
    class_counts = {}
    visualization_data = []

    for images, labels in tqdm(dataset, desc="Generating visualizations"):
        for img, label in zip(images, labels):
            # Get class index
            class_idx = np.argmax(label)
            class_name = chr(65 + class_idx) if class_idx >= 10 else str(class_idx)

            # Skip if we have enough samples for this class
            if class_name in class_counts and class_counts[class_name] >= num_samples_per_class:
                continue

            # Initialize class count
            if class_name not in class_counts:
                class_counts[class_name] = 0

            # Extract landmarks and get visualization
            features, annotated_image, detected = extractor.extract_landmarks(img)

            if detected:
                # Save original image
                original_path = f"{output_dir}/original/{class_name}_{class_counts[class_name]}.png"
                landmark_path = f"{output_dir}/landmarks/{class_name}_{class_counts[class_name]}.png"

                # Save original image
                plt.imsave(original_path, tf.cast(img * 255, tf.uint8).numpy().astype(np.uint8))

                # Save landmark visualization
                plt.imsave(landmark_path, annotated_image)

                # Add to visualization data
                visualization_data.append({
                    'label': class_name,
                    'original': original_path,
                    'landmark': landmark_path,
                    'features': features
                })

                class_counts[class_name] += 1

            # Check if we have enough samples
            if len(class_counts) == 36 and all(count >= num_samples_per_class for count in class_counts.values()):
                break

    # Sort visualization data by label
    visualization_data.sort(key=lambda x: x['label'])

    # Create overview visualization
    create_overview_visualization(visualization_data, output_dir)

    return visualization_data

def create_overview_visualization(visualization_data, output_dir):
    """Create an overview grid of all samples"""
    # Group data by class
    class_groups = {}
    for item in visualization_data:
        if item['label'] not in class_groups:
            class_groups[item['label']] = []
        class_groups[item['label']].append(item)

    # Calculate grid layout
    num_classes = len(class_groups)
    samples_per_class = len(next(iter(class_groups.values())))

    # Create figure with enough space for all samples
    fig = plt.figure(figsize=(samples_per_class * 6, num_classes * 3))

    # Plot each class
    for class_idx, (class_label, samples) in enumerate(sorted(class_groups.items())):
        for sample_idx, sample in enumerate(samples):
            # Plot original image
            plt.subplot(num_classes, samples_per_class * 2, class_idx * samples_per_class * 2 + sample_idx * 2 + 1)
            img = plt.imread(sample['original'])
            plt.imshow(img)
            if sample_idx == 0:
                plt.ylabel(f"Class {class_label}", rotation=0, labelpad=40)
            plt.title("Original")
            plt.axis('off')

            # Plot landmark image
            plt.subplot(num_classes, samples_per_class * 2, class_idx * samples_per_class * 2 + sample_idx * 2 + 2)
            landmarks = plt.imread(sample['landmark'])
            plt.imshow(landmarks)
            plt.title("Landmarks")
            plt.axis('off')

    plt.tight_layout()
    plt.savefig(f"{output_dir}/overview.png", dpi=300, bbox_inches='tight')
    plt.close()


extractor = EnhancedHandLandmarkExtractor()
dataset = extractor.load_dataset('asl_split_dataset/train')

visualization_data = visualize_dataset_samples(
        extractor,
        dataset,
        output_dir='asl_visualization',
        num_samples_per_class=3
    )

print(f"Generated visualizations for {len(visualization_data)} samples")
print("Results saved in 'asl_visualization' directory")


Found 1581 files belonging to 36 classes.


Generating visualizations: 100%|██████████| 50/50 [00:20<00:00,  2.44it/s]


Generated visualizations for 108 samples
Results saved in 'asl_visualization' directory


In [6]:

import os
import datetime
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (
    Dense, Dropout, BatchNormalization, Input,
    Multiply, Add, LayerNormalization, Activation,
    Lambda, Concatenate
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import (
    EarlyStopping, ReduceLROnPlateau, ModelCheckpoint,
    TensorBoard
)
import optuna
import pickle
import shap
from collections import defaultdict

def configure_gpu():
    """Configure GPU settings before any TensorFlow operations"""
    try:
        physical_devices = tf.config.list_physical_devices('GPU')
        if physical_devices:
            for device in physical_devices:
                tf.config.experimental.set_memory_growth(device, True)
            print(f"Found {len(physical_devices)} GPU(s). Memory growth enabled.")
            tf.keras.mixed_precision.set_global_policy('mixed_float16')
            print("Mixed precision training enabled")
        else:
            print("No GPU found. Using CPU for training.")
    except Exception as e:
        print(f"GPU configuration error: {str(e)}")
        print("Falling back to CPU training")

class GestureAnalyzer:
    def __init__(self, num_classes=36, input_dim=86):
        self.num_classes = num_classes
        self.input_dim = input_dim
        self.scaler = StandardScaler()
        self.model = None
        self.history = None
        self.class_names =[str(i) for i in range(10)]+ [chr(i) for i in range(65, 65+26)]

        # Create directories for saving results
        os.makedirs('models', exist_ok=True)
        os.makedirs('analysis', exist_ok=True)
        os.makedirs('plots', exist_ok=True)

    def attention_block(self, x, units):
        """Custom attention mechanism for feature importance"""
        attention = Dense(units, activation='tanh')(x)
        attention = Dense(units, activation='sigmoid')(attention)
        return Multiply()([x, attention])

    def residual_block(self, x, units, dropout_rate):
        """Residual block with pre-activation"""
        # Store input for residual connection
        input_tensor = x

        # First sub-block
        x = LayerNormalization()(x)
        x = Activation('relu')(x)
        x = Dense(units)(x)
        x = Dropout(dropout_rate)(x)

        # Second sub-block
        x = LayerNormalization()(x)
        x = Activation('relu')(x)
        x = Dense(units)(x)
        x = Dropout(dropout_rate)(x)

        # Adjust input dimensions if needed
        if input_tensor.shape[-1] != units:
            input_tensor = Dense(units)(input_tensor)

        # Add residual connection
        x = Add()([x, input_tensor])
        return x

    def load_and_preprocess_data(self):
        print("Loading and preprocessing data...")

        # Load datasets
        train_df = pd.read_csv('train_features.csv')
        val_df = pd.read_csv('val_features.csv')
        test_df = pd.read_csv('test_features.csv')

        # Create feature names for analysis
        self.feature_names = [f'landmark_{i}' for i in range(63)]
        self.feature_names.extend([f'geometric_{i}' for i in range(self.input_dim - 63)])

        # Separate features and labels
        X_train = train_df.drop('label', axis=1).values
        y_train = train_df['label'].values

        X_val = val_df.drop('label', axis=1).values
        y_val = val_df['label'].values

        X_test = test_df.drop('label', axis=1).values
        y_test = test_df['label'].values

        # Scale features
        X_train = self.scaler.fit_transform(X_train)
        X_val = self.scaler.transform(X_val)
        X_test = self.scaler.transform(X_test)

        # Save scaler
        with open('models/feature_scaler.pkl', 'wb') as f:
            pickle.dump(self.scaler, f)
        print("Scaler saved to models/feature_scaler.pkl")

        # Convert to float32 and categorical labels
        X_train = X_train.astype(np.float32)
        X_val = X_val.astype(np.float32)
        X_test = X_test.astype(np.float32)

        y_train = tf.keras.utils.to_categorical(y_train, self.num_classes)
        y_val = tf.keras.utils.to_categorical(y_val, self.num_classes)
        y_test = tf.keras.utils.to_categorical(y_test, self.num_classes)

        return (X_train, y_train), (X_val, y_val), (X_test, y_test)

    def create_model(self, params):
        inputs = Input(shape=(self.input_dim,))

        # Split features
        landmarks = Lambda(lambda x: x[:, :63])(inputs)
        enhanced = Lambda(lambda x: x[:, 63:])(inputs)

        # Process landmark features with attention
        x1 = Dense(params['units_1'])(landmarks)
        x1 = self.attention_block(x1, params['units_1'])
        x1 = self.residual_block(x1, params['units_1'], params['dropout_1'])

        # Process enhanced features
        x2 = Dense(params['units_2'])(enhanced)
        x2 = self.attention_block(x2, params['units_2'])
        x2 = self.residual_block(x2, params['units_2'], params['dropout_2'])

        # Combine features
        x = Concatenate()([x1, x2])
        x = Dense(params['units_3'])(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Dropout(params['dropout_3'])(x)

        outputs = Dense(self.num_classes, activation='softmax')(x)

        model = Model(inputs=inputs, outputs=outputs)

        optimizer = Adam(learning_rate=params['learning_rate'])
        if tf.keras.mixed_precision.global_policy().name == 'mixed_float16':
            optimizer = tf.keras.mixed_precision.LossScaleOptimizer(optimizer)

        model.compile(
            optimizer=optimizer,
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )

        return model

    def train_and_evaluate(self):
        # Load data
        (X_train, y_train), (X_val, y_val), (X_test, y_test) = self.load_and_preprocess_data()

        # Optimize hyperparameters
        study = optuna.create_study(direction='maximize')
        study.optimize(lambda trial: self.objective(trial, X_train, y_train, X_val, y_val),
                      n_trials=10)

        # Train final model
        self.model = self.create_model(study.best_params)

        # Set up callbacks
        callbacks = [
            EarlyStopping(monitor='val_accuracy', patience=15, restore_best_weights=True),
            ReduceLROnPlateau(monitor='val_accuracy', factor=0.2, patience=5),
            ModelCheckpoint('models/best_model.keras', monitor='val_accuracy', save_best_only=True),
            TensorBoard(log_dir=f"logs/fit/{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}")
        ]

        # Train model
        self.history = self.model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=100,
            batch_size=32,
            callbacks=callbacks,
            verbose=1
        )

        # Evaluate on test set
        self.perform_analysis(X_test, y_test)


    def objective(self, trial, X_train, y_train, X_val, y_val):
        """Optuna objective function for hyperparameter optimization"""
        params = {
            'units_1': trial.suggest_int('units_1', 128, 512),
            'units_2': trial.suggest_int('units_2', 64, 256),
            'units_3': trial.suggest_int('units_3', 32, 128),
            'dropout_1': trial.suggest_float('dropout_1', 0.2, 0.5),
            'dropout_2': trial.suggest_float('dropout_2', 0.2, 0.5),
            'dropout_3': trial.suggest_float('dropout_3', 0.2, 0.5),
            'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),
            'batch_size': trial.suggest_categorical('batch_size', [32, 64, 128])
        }

        model = self.create_model(params)

        early_stopping = EarlyStopping(
            monitor='val_accuracy',
            patience=10,
            restore_best_weights=True
        )

        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=50,
            batch_size=params['batch_size'],
            callbacks=[early_stopping],
            verbose=0
        )
        plot_final_metrics(history)

        return max(history.history['val_accuracy'])

    def perform_analysis(self, X_test, y_test):
        # Get predictions
        y_pred = self.model.predict(X_test)
        y_pred_classes = np.argmax(y_pred, axis=1)
        y_true_classes = np.argmax(y_test, axis=1)

        # Generate confusion matrix
        conf_matrix = confusion_matrix(y_true_classes, y_pred_classes)

        # Plot confusion matrix
        plt.figure(figsize=(15, 15))
        sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
                   xticklabels=self.class_names,
                   yticklabels=self.class_names)
        plt.title('Confusion Matrix')
        plt.xlabel('Predicted')
        plt.ylabel('True')
        plt.savefig('plots/confusion_matrix.png')
        plt.close()

        # Analyze misclassifications
        misclass_analysis = self.analyze_misclassifications(y_true_classes, y_pred_classes)

        # Feature importance analysis using SHAP
        # explainer = shap.DeepExplainer(self.model, X_test[:100])
        # shap_values = explainer.shap_values(X_test[:100])

        # # Plot SHAP summary
        # plt.figure(figsize=(12, 8))
        # shap.summary_plot(shap_values, X_test[:100], feature_names=self.feature_names,
        #                  show=False)
        plt.savefig('plots/shap_summary.png')
        plt.close()

        # Generate and save analysis report
        self.generate_analysis_report(conf_matrix, misclass_analysis, y_true_classes, y_pred_classes)

    def analyze_misclassifications(self, y_true, y_pred):
        misclassifications = defaultdict(list)
        for true, pred in zip(y_true, y_pred):
            if true != pred:
                misclassifications[self.class_names[true]].append(self.class_names[pred])

        analysis = {}
        for true_class, pred_classes in misclassifications.items():
            common_confusions = pd.Series(pred_classes).value_counts().head(3)
            analysis[true_class] = {
                'total_errors': len(pred_classes),
                'common_confusions': common_confusions.to_dict()
            }

        return analysis

    def generate_analysis_report(self, conf_matrix, misclass_analysis, y_true, y_pred):
        report = classification_report(y_true, y_pred, target_names=self.class_names)

        with open('analysis/model_analysis.txt', 'w') as f:
            f.write("Hand Gesture Recognition Model Analysis\n")
            f.write("=====================================\n\n")

            f.write("Classification Report:\n")
            f.write(report)
            f.write("\n\n")

            f.write("Misclassification Analysis:\n")
            f.write("-------------------------\n")
            for class_name, analysis in misclass_analysis.items():
                f.write(f"\nClass {class_name}:\n")
                f.write(f"Total errors: {analysis['total_errors']}\n")
                f.write("Most common confusions:\n")
                for confused_with, count in analysis['common_confusions'].items():
                    f.write(f"  - Confused with {confused_with}: {count} times\n")

            f.write("\nModel Architecture:\n")
            f.write("------------------\n")
            self.model.summary(print_fn=lambda x: f.write(x + '\n'))

def plot_final_metrics(history):
    plt.figure(figsize=(12, 5))

    # Plot accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy',
             color='#2563eb', linewidth=2)
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy',
             color='#16a34a', linewidth=2)
    plt.title('Model Accuracy', fontsize=12, pad=10)
    plt.xlabel('Epoch', fontsize=10)
    plt.ylabel('Accuracy', fontsize=10)
    plt.legend(loc='lower right')
    plt.grid(True, linestyle='--', alpha=0.7)

    # Plot loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss',
             color='#dc2626', linewidth=2)
    plt.plot(history.history['val_loss'], label='Validation Loss',
             color='#ea580c', linewidth=2)
    plt.title('Model Loss', fontsize=12, pad=10)
    plt.xlabel('Epoch', fontsize=10)
    plt.ylabel('Loss', fontsize=10)
    plt.legend(loc='upper right')
    plt.grid(True, linestyle='--', alpha=0.7)

    plt.tight_layout()
    plt.savefig('plots/training_metrics.png', dpi=300, bbox_inches='tight')
    plt.close()

def main():
    configure_gpu()

    print("Starting comprehensive gesture recognition analysis...")
    analyzer = GestureAnalyzer()
    analyzer.train_and_evaluate()

    print("\nAnalysis completed. Results saved in:")
    print("- models/best_model.keras")
    print("- models/feature_scaler.pkl")
    print("- plots/confusion_matrix.png")
    print("- plots/shap_summary.png")
    print("- analysis/model_analysis.txt")


main()


GPU configuration error: Physical devices cannot be modified after being initialized
Falling back to CPU training
Starting comprehensive gesture recognition analysis...
Loading and preprocessing data...


[I 2024-12-12 14:07:51,897] A new study created in memory with name: no-name-d8f7b587-d513-4f95-b929-4d93aff00517


Scaler saved to models/feature_scaler.pkl


  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),
[I 2024-12-12 14:08:42,267] Trial 0 finished with value: 0.9167736768722534 and parameters: {'units_1': 174, 'units_2': 153, 'units_3': 59, 'dropout_1': 0.4497069903640959, 'dropout_2': 0.3700507102030728, 'dropout_3': 0.3444633884528753, 'learning_rate': 0.00011641613627571961, 'batch_size': 64}. Best is trial 0 with value: 0.9167736768722534.
[I 2024-12-12 14:09:59,339] Trial 1 finished with value: 0.9337272047996521 and parameters: {'units_1': 435, 'units_2': 228, 'units_3': 71, 'dropout_1': 0.2201455098789232, 'dropout_2': 0.21567594520644612, 'dropout_3': 0.3546124040979851, 'learning_rate': 0.00019746134230359764, 'batch_size': 32}. Best is trial 1 with value: 0.9337272047996521.
[I 2024-12-12 14:10:50,541] Trial 2 finished with value: 0.9278191328048706 and parameters: {'units_1': 182, 'units_2': 110, 'units_3': 114, 'dropout_1': 0.2072776698883728, 'dropout_2': 0.20392595401328192, 'dropout_3': 0.37749136

Epoch 1/100
[1m481/481[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 22ms/step - accuracy: 0.5658 - loss: 1.8672 - val_accuracy: 0.8664 - val_loss: 0.4931 - learning_rate: 1.9746e-04
Epoch 2/100
[1m481/481[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.8956 - loss: 0.5681 - val_accuracy: 0.8942 - val_loss: 0.3458 - learning_rate: 1.9746e-04
Epoch 3/100
[1m481/481[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9385 - loss: 0.3207 - val_accuracy: 0.9119 - val_loss: 0.2966 - learning_rate: 1.9746e-04
Epoch 4/100
[1m481/481[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9488 - loss: 0.2534 - val_accuracy: 0.9163 - val_loss: 0.3011 - learning_rate: 1.9746e-04
Epoch 5/100
[1m481/481[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9642 - loss: 0.1765 - val_accuracy: 0.9181 - val_loss: 0.3011 - learning_rate: 1.9746e-04
Epoch 6/100
[1m481/481[0m [32m━━━━━━━━━━


Analysis completed. Results saved in:
- models/best_model.keras
- models/feature_scaler.pkl
- plots/confusion_matrix.png
- plots/shap_summary.png
- analysis/model_analysis.txt
