In [None]:
# 🎉 VERIFIKASI DATASET BERHASIL TER-DOWNLOAD
import glob

print("🔍 VERIFIKASI DATASET URBANSOUND8K")
print("=" * 50)

# Check audio folder structure
if os.path.exists('audio'):
    print("✅ Folder audio ditemukan!")
    
    total_files = 0
    for fold in range(1, 11):
        fold_path = f'audio/fold{fold}'
        if os.path.exists(fold_path):
            wav_files = len(glob.glob(os.path.join(fold_path, "*.wav")))
            total_files += wav_files
            print(f"   fold{fold}: {wav_files:4d} files")
        else:
            print(f"   fold{fold}: MISSING")
    
    print(f"\n📊 TOTAL: {total_files}/8732 file audio")
    
    if total_files == 8732:
        print("🎉 DATASET LENGKAP!")
        print("✅ Siap untuk ekstraksi MFCC dari audio asli")
        DATASET_READY = True
    elif total_files > 8000:
        print("✅ Dataset hampir lengkap (>8000 files)")
        print("✅ Cukup untuk penelitian")
        DATASET_READY = True
    else:
        print("⚠️  Dataset tidak lengkap")
        DATASET_READY = False
else:
    print("❌ Folder audio tidak ditemukan")
    DATASET_READY = False

# Test sample audio file
if DATASET_READY:
    sample_file = glob.glob("audio/fold1/*.wav")[0]
    print(f"\n🎵 Testing sample audio: {os.path.basename(sample_file)}")
    
    try:
        import librosa
        audio, sr = librosa.load(sample_file, sr=16000)
        print(f"✅ Audio loaded: {len(audio)} samples, {sr}Hz")
        print(f"   Duration: {len(audio)/sr:.2f} seconds")
        
        # Quick MFCC test
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=20)
        print(f"✅ MFCC extraction: {mfccs.shape}")
        
    except Exception as e:
        print(f"❌ Error testing audio: {e}")

print(f"\n🎯 STATUS: {'READY FOR REAL DATA' if DATASET_READY else 'USING SIMULATION'}")

## 1. Import Libraries dan Setup

In [None]:
# Import library yang diperlukan
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import librosa.display
import os
import warnings
warnings.filterwarnings('ignore')

# Deep Learning libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import to_categorical

# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

print("Library berhasil diimport!")
print(f"TensorFlow version: {tf.__version__}")

## 3. Ekstraksi Fitur MFCC
### Sesuai Tabel 3: Parameter Ekstraksi Fitur MFCC

In [None]:
# Parameter MFCC sesuai Tabel 3
MFCC_PARAMS = {
    'sampling_rate': 16000,  # 16kHz
    'frame_size': 25,        # 25 ms (400 sample)
    'hop_length': 10,        # 10 ms (160 sample)
    'n_mfcc': 20,           # 20 koefisien MFCC
    'window': 'hamming',     # Hamming window
    'n_filter_mel': 26,      # 26 filter mel
    'n_fft': 512            # 512 FFT
}

# Fungsi untuk ekstraksi MFCC
def extract_mfcc_features(file_path, max_len=87):
    """
    Ekstraksi fitur MFCC dari file audio
    max_len: panjang maksimum sequence (sesuai matriks 20x4x1)
    """
    try:
        # Load audio file
        audio, sr = librosa.load(file_path, sr=MFCC_PARAMS['sampling_rate'])
        
        # Ekstraksi MFCC
        mfccs = librosa.feature.mfcc(
            y=audio,
            sr=sr,
            n_mfcc=MFCC_PARAMS['n_mfcc'],
            n_fft=MFCC_PARAMS['n_fft'],
            hop_length=int(sr * MFCC_PARAMS['hop_length'] / 1000),
            win_length=int(sr * MFCC_PARAMS['frame_size'] / 1000),
            window=MFCC_PARAMS['window']
        )
        
        # Normalisasi
        mfccs = np.mean(mfccs.T, axis=0)
        
        # Padding atau truncate ke ukuran tetap
        if len(mfccs) > max_len:
            mfccs = mfccs[:max_len]
        else:
            mfccs = np.pad(mfccs, (0, max_len - len(mfccs)), mode='constant')
            
        return mfccs
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return np.zeros(max_len)

print("Fungsi ekstraksi MFCC telah didefinisikan!")
print(f"Parameter MFCC: {MFCC_PARAMS}")

## 4. Preprocessing Data

In [None]:
# 🎯 EKSTRAKSI FITUR MFCC DARI AUDIO ASLI
import os
import glob
from tqdm import tqdm

print("🎵 EKSTRAKSI FITUR MFCC DARI DATA AUDIO ASLI")
print("=" * 60)

def extract_real_features_from_urbansound8k():
    """
    Ekstraksi fitur MFCC dari dataset UrbanSound8K yang sesungguhnya
    Menggunakan 8732 file audio WAV asli
    """
    
    if not DATASET_READY:
        print("❌ Dataset tidak siap, menggunakan simulasi...")
        return simulate_mfcc_features(df), df['classID'].values, None
    
    features = []
    labels = []
    file_info = []
    failed_files = []
    
    print(f"📊 Total file yang akan diproses: {len(df)}")
    print("🔄 Memulai ekstraksi MFCC...")
    
    # Loop through each audio file in metadata
    for idx, row in tqdm(df.iterrows(), total=len(df), desc="Ekstraksi MFCC"):
        # Construct full file path
        fold_num = row['fold']
        filename = row['slice_file_name']
        audio_file_path = os.path.join('audio', f"fold{fold_num}", filename)
        
        try:
            if not os.path.exists(audio_file_path):
                failed_files.append(f"{filename} (not found)")
                continue
                
            # Extract MFCC features
            mfcc_features = extract_mfcc_features_enhanced(audio_file_path, target_length=87)
            
            if mfcc_features is not None:
                features.append(mfcc_features)
                labels.append(row['classID'])
                file_info.append({
                    'filename': filename,
                    'fold': fold_num,
                    'class': row['class'],
                    'classID': row['classID']
                })
            else:
                failed_files.append(f"{filename} (extraction failed)")
                
        except Exception as e:
            failed_files.append(f"{filename} (error: {str(e)[:50]})")
    
    # Convert to numpy arrays
    if features:
        features = np.array(features)
        labels = np.array(labels)
        
        print(f"\n✅ EKSTRAKSI BERHASIL!")
        print(f"📊 File berhasil: {len(features)}/8732")
        print(f"📊 File gagal: {len(failed_files)}")
        print(f"📊 Shape fitur: {features.shape}")
        print(f"📊 Shape label: {labels.shape}")
        print(f"📊 Unique classes: {len(np.unique(labels))}")
        
        if failed_files:
            print(f"\n⚠️  Beberapa file gagal diproses:")
            for fail in failed_files[:5]:  # Show first 5
                print(f"   - {fail}")
            if len(failed_files) > 5:
                print(f"   ... dan {len(failed_files)-5} file lainnya")
        
        # Calculate success rate
        success_rate = len(features) / len(df) * 100
        print(f"\n📈 Success Rate: {success_rate:.1f}%")
        
        if success_rate > 95:
            print("🎉 EXCELLENT! Ekstraksi hampir sempurna")
        elif success_rate > 90:
            print("✅ GOOD! Ekstraksi berhasil dengan baik")
        else:
            print("⚠️  MODERATE! Beberapa file bermasalah")
            
        return features, labels, file_info
    else:
        print("❌ Tidak ada file yang berhasil diekstrak!")
        return None, None, None

def extract_mfcc_features_enhanced(file_path, target_length=87):
    """
    Ekstraksi fitur MFCC yang robust dari file audio
    Optimized untuk publikasi SINTA
    """
    try:
        # Load audio file
        audio, sr = librosa.load(file_path, sr=MFCC_PARAMS['sampling_rate'])
        
        # Check minimum duration
        if len(audio) < sr * 0.5:  # Minimum 0.5 detik
            return None
        
        # Calculate parameters
        hop_length_samples = int(sr * MFCC_PARAMS['hop_length'] / 1000)
        win_length_samples = int(sr * MFCC_PARAMS['frame_size'] / 1000)
        
        # Extract MFCC features
        mfccs = librosa.feature.mfcc(
            y=audio,
            sr=sr,
            n_mfcc=MFCC_PARAMS['n_mfcc'],
            n_fft=MFCC_PARAMS['n_fft'],
            hop_length=hop_length_samples,
            win_length=win_length_samples,
            window=MFCC_PARAMS['window'],
            n_mels=MFCC_PARAMS.get('n_filter_mel', 26)
        )
        
        # Statistical aggregation untuk representasi yang stabil
        mfcc_mean = np.mean(mfccs, axis=1)
        mfcc_std = np.std(mfccs, axis=1)
        mfcc_delta = librosa.feature.delta(mfccs)
        mfcc_delta_mean = np.mean(mfcc_delta, axis=1)
        
        # Combine statistical features
        combined_features = np.concatenate([mfcc_mean, mfcc_std, mfcc_delta_mean])
        
        # Normalize to target length
        if len(combined_features) > target_length:
            combined_features = combined_features[:target_length]
        else:
            combined_features = np.pad(combined_features, 
                                     (0, target_length - len(combined_features)), 
                                     mode='constant', constant_values=0)
        
        return combined_features
        
    except Exception as e:
        return None

# Fallback simulation function (jika diperlukan)
def simulate_mfcc_features(metadata_df, target_length=87):
    """
    Simulasi fitur MFCC untuk testing (fallback)
    """
    print("⚠️  Menggunakan simulasi data (hanya untuk testing)")
    
    np.random.seed(42)
    n_samples = len(metadata_df)
    
    # Simulasi dengan karakteristik realistis
    features = []
    for i in range(n_samples):
        # Simulasi MFCC dengan pola yang berbeda per kelas
        class_id = metadata_df.iloc[i]['classID']
        base_pattern = np.random.normal(0, 1, target_length)
        
        # Add class-specific patterns
        class_offset = class_id * 0.5
        base_pattern += class_offset
        
        features.append(base_pattern)
    
    return np.array(features)

# JALANKAN EKSTRAKSI
print("🚀 MEMULAI EKSTRAKSI FITUR...")

# Extract features dari audio asli
X, y, audio_file_info = extract_real_features_from_urbansound8k()

if X is not None and y is not None:
    print(f"\n📊 HASIL AKHIR EKSTRAKSI:")
    print(f"✅ Features shape: {X.shape}")
    print(f"✅ Labels shape: {y.shape}")
    print(f"✅ Classes: {len(np.unique(y))} unique classes")
    print(f"✅ Mode: {'REAL AUDIO DATA' if audio_file_info else 'SIMULATION'}")
    
    # Tampilkan distribusi kelas
    unique, counts = np.unique(y, return_counts=True)
    print(f"\n📈 Distribusi per kelas:")
    for class_id, count in zip(unique, counts):
        class_name = df[df['classID'] == class_id]['class'].iloc[0]
        print(f"   {class_id}: {class_name:20s} - {count:4d} samples")
        
else:
    print("❌ Ekstraksi gagal total!")
    
print(f"\n🎯 STATUS: {'READY FOR TRAINING' if X is not None else 'FAILED'}")

In [None]:
# 🔧 PREPROCESSING DATA UNTUK CNN
print("🔧 PREPROCESSING DATA UNTUK CNN")
print("=" * 50)

print(f"📊 Input features shape: {X.shape}")
print(f"📊 Input labels shape: {y.shape}")

# Analyze feature dimensions
n_samples, n_features = X.shape
print(f"📊 Samples: {n_samples}, Features: {n_features}")

# Reshape untuk CNN input
# Karena kita punya 87 features (20 MFCC + 20 STD + 20 Delta + padding)
# Kita akan reshape ke format yang cocok untuk CNN

if n_features == 87:
    # Reshape ke (samples, height, width, channels) yang cocok untuk CNN
    # Option 1: Treat as 1D signal
    X_reshaped = X.reshape(X.shape[0], n_features, 1, 1)
    input_shape = (n_features, 1, 1)
    print(f"✅ Reshaped to: {X_reshaped.shape} (1D signal format)")
    
elif n_features == 60:  # Jika hanya 20*3 (MFCC + STD + Delta)
    # Reshape ke 2D format
    X_reshaped = X.reshape(X.shape[0], 20, 3, 1)
    input_shape = (20, 3, 1)
    print(f"✅ Reshaped to: {X_reshaped.shape} (2D MFCC format)")
    
else:
    # Generic reshape untuk feature length apapun
    # Cari faktor yang paling mendekati square
    import math
    sqrt_feat = int(math.sqrt(n_features))
    
    if sqrt_feat * sqrt_feat == n_features:
        # Perfect square
        X_reshaped = X.reshape(X.shape[0], sqrt_feat, sqrt_feat, 1)
        input_shape = (sqrt_feat, sqrt_feat, 1)
        print(f"✅ Reshaped to: {X_reshaped.shape} (square format)")
    else:
        # Find best rectangular shape
        for h in range(1, n_features + 1):
            if n_features % h == 0:
                w = n_features // h
                if h <= w and w <= h * 2:  # Prefer rectangular shape
                    X_reshaped = X.reshape(X.shape[0], h, w, 1)
                    input_shape = (h, w, 1)
                    print(f"✅ Reshaped to: {X_reshaped.shape} (rectangular format {h}x{w})")
                    break
        else:
            # Fallback: force into 1D
            X_reshaped = X.reshape(X.shape[0], n_features, 1, 1)
            input_shape = (n_features, 1, 1)
            print(f"✅ Reshaped to: {X_reshaped.shape} (fallback 1D format)")

# Encode labels
num_classes = len(np.unique(y))
y_categorical = to_categorical(y, num_classes=num_classes)

print(f"📊 Number of classes: {num_classes}")
print(f"📊 Labels encoded to: {y_categorical.shape}")

# Split data dengan stratified sampling
print(f"\n🔄 Splitting data...")

X_train, X_temp, y_train, y_temp = train_test_split(
    X_reshaped, y_categorical, 
    test_size=0.3, 
    random_state=42, 
    stratify=y
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, 
    test_size=0.5, 
    random_state=42, 
    stratify=y_temp.argmax(axis=1)
)

print(f"✅ Training set: {X_train.shape}, {y_train.shape}")
print(f"✅ Validation set: {X_val.shape}, {y_val.shape}")
print(f"✅ Test set: {X_test.shape}, {y_test.shape}")

# Verify class distribution
print(f"\n📈 Class distribution in splits:")
for split_name, y_split in [("Train", y_train), ("Val", y_val), ("Test", y_test)]:
    class_counts = np.sum(y_split, axis=0)
    print(f"   {split_name:5s}: {class_counts.astype(int)}")

print(f"\n🎯 Input shape for CNN: {input_shape}")
print(f"🎯 Ready for model training with REAL AUDIO DATA!")

## 5. Arsitektur Model CNN
### Sesuai Tabel 4: Arsitektur CNN

In [None]:
def create_cnn_model(input_shape, num_classes):
    """
    Membangun arsitektur CNN yang dioptimalkan untuk fitur MFCC
    Input: (87, 1, 1) - 87 fitur MFCC dari audio asli
    """
    print(f"🏗️  Building CNN for input shape: {input_shape}")
    
    model = Sequential([
        # Input Layer - Fitur MFCC (87, 1, 1)
        tf.keras.layers.Input(shape=input_shape),
        
        # Conv2D_1: 32 filter, kernel disesuaikan dengan input 1D
        Conv2D(32, (3, 1), activation='relu', padding='same'),
        BatchNormalization(),
        
        # Conv2D_2: 64 filter untuk ekstraksi fitur yang lebih dalam
        Conv2D(64, (3, 1), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 1)),
        
        # Conv2D_3: 128 filter untuk pattern recognition
        Conv2D(128, (3, 1), activation='relu', padding='same'),
        BatchNormalization(),
        Dropout(0.25),
        
        # Global Average Pooling untuk mengurangi overfitting
        tf.keras.layers.GlobalAveragePooling2D(),
        
        # Dense layers
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        
        Dense(128, activation='relu'),
        Dropout(0.3),
        
        # Output layer
        Dense(num_classes, activation='softmax')
    ])
    
    return model

# Buat model dengan input shape yang benar
model = create_cnn_model(input_shape, num_classes)

# Tampilkan arsitektur model
print("\n📋 ARSITEKTUR MODEL CNN:")
print("=" * 50)
model.summary()

# Hitung parameter
total_params = model.count_params()
trainable_params = sum([np.prod(var.shape) for var in model.trainable_variables])
print(f"\n📊 MODEL STATISTICS:")
print(f"   Total parameters: {total_params:,}")
print(f"   Trainable parameters: {trainable_params:,}")
print(f"   Model size (approx): {total_params * 4 / (1024*1024):.2f} MB")

## 6. Kompilasi dan Training Model
### Sesuai Tabel 5: Parameter Pelatihan Model

In [None]:
# Parameter pelatihan sesuai Tabel 5
TRAINING_PARAMS = {
    'optimizer': 'Adam',
    'learning_rate': 1e-4,
    'loss_function': 'categorical_crossentropy',
    'batch_size': 32,
    'epochs': 50,
    'early_stopping': True
}

# Kompilasi model
model.compile(
    optimizer=Adam(learning_rate=TRAINING_PARAMS['learning_rate']),
    loss=TRAINING_PARAMS['loss_function'],
    metrics=['accuracy']
)

# Callbacks
callbacks = [
    EarlyStopping(
        monitor='val_accuracy',
        patience=10,
        restore_best_weights=True
    ),
    ModelCheckpoint(
        'best_urban_sound_model.h5',
        monitor='val_accuracy',
        save_best_only=True
    )
]

print("Model berhasil dikompilasi!")
print(f"Parameter training: {TRAINING_PARAMS}")

In [None]:
# Training model
print("Memulai training model...")

history = model.fit(
    X_train, y_train,
    batch_size=TRAINING_PARAMS['batch_size'],
    epochs=TRAINING_PARAMS['epochs'],
    validation_data=(X_val, y_val),
    callbacks=callbacks,
    verbose=1
)

print("Training selesai!")

## 7. Evaluasi Model

In [None]:
# Plot training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# Evaluasi pada test set
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")

# Prediksi
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

# Classification report
class_names = df['class'].unique()
print("\nClassification Report:")
print(classification_report(y_true_classes, y_pred_classes, target_names=class_names))

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_true_classes, y_pred_classes)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

## 8. Implementasi Sistem Multimodal

In [None]:
class MultimodalFeedbackSystem:
    """
    Sistem umpan balik multimodal untuk penyandang tunarungu
    """
    
    def __init__(self, model):
        self.model = model
        self.class_names = [
            'air_conditioner', 'car_horn', 'children_playing', 'dog_bark',
            'drilling', 'engine_idling', 'gun_shot', 'jackhammer',
            'siren', 'street_music'
        ]
        
        # Pemetaan untuk umpan balik visual
        self.visual_mapping = {
            'air_conditioner': {'color': 'blue', 'icon': '❄️', 'priority': 'low'},
            'car_horn': {'color': 'red', 'icon': '🚗', 'priority': 'high'},
            'children_playing': {'color': 'yellow', 'icon': '👶', 'priority': 'medium'},
            'dog_bark': {'color': 'brown', 'icon': '🐕', 'priority': 'medium'},
            'drilling': {'color': 'orange', 'icon': '🔨', 'priority': 'high'},
            'engine_idling': {'color': 'gray', 'icon': '🚙', 'priority': 'medium'},
            'gun_shot': {'color': 'red', 'icon': '⚠️', 'priority': 'critical'},
            'jackhammer': {'color': 'orange', 'icon': '🔧', 'priority': 'high'},
            'siren': {'color': 'red', 'icon': '🚨', 'priority': 'critical'},
            'street_music': {'color': 'purple', 'icon': '🎵', 'priority': 'low'}
        }
        
        # Pemetaan untuk umpan balik haptik
        self.haptic_mapping = {
            'air_conditioner': {'pattern': 'continuous_low', 'intensity': 30},
            'car_horn': {'pattern': 'sharp_burst', 'intensity': 90},
            'children_playing': {'pattern': 'gentle_wave', 'intensity': 50},
            'dog_bark': {'pattern': 'short_pulse', 'intensity': 70},
            'drilling': {'pattern': 'aggressive_vibration', 'intensity': 85},
            'engine_idling': {'pattern': 'steady_rumble', 'intensity': 40},
            'gun_shot': {'pattern': 'shock_burst', 'intensity': 100},
            'jackhammer': {'pattern': 'rapid_pulse', 'intensity': 95},
            'siren': {'pattern': 'alternating_high', 'intensity': 100},
            'street_music': {'pattern': 'rhythmic_pulse', 'intensity': 35}
        }
    
    def predict_sound(self, audio_features):
        """
        Prediksi kelas suara dari fitur audio
        """
        # Reshape untuk prediksi
        features_reshaped = audio_features.reshape(1, 20, 1, 1)
        
        # Prediksi
        prediction = self.model.predict(features_reshaped, verbose=0)
        predicted_class_idx = np.argmax(prediction[0])
        confidence = prediction[0][predicted_class_idx]
        
        predicted_class = self.class_names[predicted_class_idx]
        
        return predicted_class, confidence
    
    def generate_visual_feedback(self, sound_class, confidence):
        """
        Generate umpan balik visual
        """
        visual_info = self.visual_mapping[sound_class]
        
        feedback = {
            'sound_detected': sound_class,
            'confidence': f"{confidence:.2f}",
            'visual_cue': {
                'color': visual_info['color'],
                'icon': visual_info['icon'],
                'priority': visual_info['priority'],
                'alert_level': self._get_alert_level(visual_info['priority'])
            }
        }
        
        return feedback
    
    def generate_haptic_feedback(self, sound_class, confidence):
        """
        Generate umpan balik haptik
        """
        haptic_info = self.haptic_mapping[sound_class]
        
        # Adjust intensity based on confidence
        adjusted_intensity = int(haptic_info['intensity'] * confidence)
        
        feedback = {
            'pattern': haptic_info['pattern'],
            'intensity': adjusted_intensity,
            'duration': self._calculate_duration(sound_class),
            'frequency': self._calculate_frequency(sound_class)
        }
        
        return feedback
    
    def _get_alert_level(self, priority):
        alert_levels = {
            'low': 1,
            'medium': 2,
            'high': 3,
            'critical': 4
        }
        return alert_levels.get(priority, 1)
    
    def _calculate_duration(self, sound_class):
        # Duration in milliseconds
        durations = {
            'gun_shot': 100,
            'siren': 1000,
            'car_horn': 500,
            'jackhammer': 800,
            'drilling': 600,
            'dog_bark': 300,
            'children_playing': 400,
            'engine_idling': 1500,
            'air_conditioner': 2000,
            'street_music': 1200
        }
        return durations.get(sound_class, 500)
    
    def _calculate_frequency(self, sound_class):
        # Frequency in Hz for haptic feedback
        frequencies = {
            'gun_shot': 200,
            'siren': 150,
            'car_horn': 180,
            'jackhammer': 250,
            'drilling': 220,
            'dog_bark': 120,
            'children_playing': 80,
            'engine_idling': 60,
            'air_conditioner': 40,
            'street_music': 100
        }
        return frequencies.get(sound_class, 100)
    
    def process_real_time(self, audio_features):
        """
        Pemrosesan real-time untuk sistem multimodal
        """
        # Prediksi suara
        sound_class, confidence = self.predict_sound(audio_features)
        
        # Generate feedback
        visual_feedback = self.generate_visual_feedback(sound_class, confidence)
        haptic_feedback = self.generate_haptic_feedback(sound_class, confidence)
        
        return {
            'timestamp': pd.Timestamp.now(),
            'prediction': {
                'class': sound_class,
                'confidence': confidence
            },
            'visual_feedback': visual_feedback,
            'haptic_feedback': haptic_feedback
        }

# Inisialisasi sistem multimodal
multimodal_system = MultimodalFeedbackSystem(model)
print("Sistem multimodal telah diinisialisasi!")

In [None]:
# Demo sistem multimodal
sample_idx = 0
audio_sample = X_test[sample_idx]
true_label_idx = np.argmax(y_test[sample_idx])

class_names_list = ['air_conditioner', 'car_horn', 'children_playing', 'dog_bark',
                   'drilling', 'engine_idling', 'gun_shot', 'jackhammer',
                   'siren', 'street_music']

print(f"True class: {class_names_list[true_label_idx]}")

result = multimodal_system.process_real_time(audio_sample)

print(f"Predicted: {result['prediction']['class']}")
print(f"Confidence: {result['prediction']['confidence']:.3f}")
print(f"Visual: {result['visual_feedback']['visual_cue']['icon']} {result['visual_feedback']['visual_cue']['color']}")
print(f"Haptic: {result['haptic_feedback']['pattern']}, {result['haptic_feedback']['intensity']}%")

In [None]:
# Analisis performa per kelas
from sklearn.metrics import precision_recall_fscore_support

# Hitung metrik untuk setiap kelas
precision, recall, f1, support = precision_recall_fscore_support(
    y_true_classes, y_pred_classes, average=None
)

# Buat DataFrame hasil
results_df = pd.DataFrame({
    'Class': class_names,
    'Precision': precision,
    'Recall': recall,
    'F1-Score': f1,
    'Support': support
})

results_df = results_df.round(4)
print("Hasil Evaluasi per Kelas:")
print(results_df)

# Metrik overall
overall_precision = np.mean(precision)
overall_recall = np.mean(recall)
overall_f1 = np.mean(f1)

print(f"\nMetrik Overall:")
print(f"Akurasi: {test_accuracy:.4f}")
print(f"Precision (macro): {overall_precision:.4f}")
print(f"Recall (macro): {overall_recall:.4f}")
print(f"F1-Score (macro): {overall_f1:.4f}")

In [None]:
# Visualisasi performa per kelas
plt.figure(figsize=(15, 5))

# Plot 1: Precision, Recall, F1-Score per kelas
plt.subplot(1, 3, 1)
x = np.arange(len(class_names))
width = 0.25

plt.bar(x - width, precision, width, label='Precision', alpha=0.8)
plt.bar(x, recall, width, label='Recall', alpha=0.8)
plt.bar(x + width, f1, width, label='F1-Score', alpha=0.8)

plt.xlabel('Kelas Suara')
plt.ylabel('Skor')
plt.title('Metrik Evaluasi per Kelas')
plt.xticks(x, class_names, rotation=45, ha='right')
plt.legend()
plt.grid(True, alpha=0.3)

# Plot 2: Distribusi confidence score
plt.subplot(1, 3, 2)
confidence_scores = np.max(y_pred, axis=1)
plt.hist(confidence_scores, bins=20, alpha=0.7, color='skyblue', edgecolor='black')
plt.xlabel('Confidence Score')
plt.ylabel('Frekuensi')
plt.title('Distribusi Confidence Score')
plt.grid(True, alpha=0.3)

# Plot 3: Accuracy vs Confidence threshold
plt.subplot(1, 3, 3)
thresholds = np.arange(0.1, 1.0, 0.05)
accuracies = []

for threshold in thresholds:
    mask = confidence_scores >= threshold
    if np.sum(mask) > 0:
        acc = accuracy_score(y_true_classes[mask], y_pred_classes[mask])
        accuracies.append(acc)
    else:
        accuracies.append(0)

plt.plot(thresholds, accuracies, marker='o', linewidth=2)
plt.xlabel('Confidence Threshold')
plt.ylabel('Accuracy')
plt.title('Accuracy vs Confidence Threshold')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Simpan model dan hasil
model.save('urban_sound_multimodal_model.h5')
results_df.to_csv('evaluation_results.csv', index=False)

# Simpan parameter penelitian
research_params = {
    'mfcc_params': MFCC_PARAMS,
    'training_params': TRAINING_PARAMS,
    'model_performance': {
        'test_accuracy': float(test_accuracy),
        'test_loss': float(test_loss),
        'precision_macro': float(overall_precision),
        'recall_macro': float(overall_recall),
        'f1_score_macro': float(overall_f1)
    }
}

import json
with open('research_parameters.json', 'w') as f:
    json.dump(research_params, f, indent=2)

print("Model dan hasil penelitian telah disimpan!")
print("Files:")
print("- urban_sound_multimodal_model.h5")
print("- evaluation_results.csv")
print("- research_parameters.json")