In [31]:
import os
import sys
import yaml
import json
import pickle
import numpy as np
import librosa
import mlflow
from tqdm import tqdm
from src.logger import logging
from src.exception import MyException
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [2]:
def load_feature_chunks(features_dir="artifacts/features"):
        """Load feature chunks from pickle files"""
        try:
            with open(os.path.join(features_dir, "features_chunks.pkl"), "rb") as f:
                features_chunks = pickle.load(f)
            with open(os.path.join(features_dir, "cats_chunks.pkl"), "rb") as f:
                cats_chunks = pickle.load(f)
            with open(os.path.join(features_dir, "subcats_chunks.pkl"), "rb") as f:
                subcats_chunks = pickle.load(f)
            
            logging.info(f"Successfully loaded {len(features_chunks)} feature chunks")
            return features_chunks, cats_chunks, subcats_chunks
            
        except Exception as e:
            logging.error(f"Failed to load feature chunks: {e}")
            raise MyException(e, sys) from e


In [3]:
features_chunks, cats_chunks, subcats_chunks =load_feature_chunks()

[ 2025-08-25 15:23:43,234 ] root - INFO - Successfully loaded 1 feature chunks


In [4]:
subcats_chunks[0]

['Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',
 'Women_Screaming',


In [6]:
cats_chunks[0]

['Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',

[{'mfccs': array([[-8.418389  , -7.3609896 , -6.0194254 , ..., -5.0742884 ,
          -5.0127244 , -5.1715326 ],
         [ 0.20323192,  0.48531485,  0.13414691, ...,  0.75484186,
           0.71932083,  0.7656631 ],
         [ 0.2030494 , -0.7411071 , -1.2514418 , ..., -0.8344193 ,
          -0.8147956 , -0.68963635],
         ...,
         [ 0.19444093,  0.12599468,  0.17711511, ...,  0.11091011,
           0.11495073,  0.18954253],
         [ 0.19435267,  0.06518317,  0.06353828, ...,  0.07250341,
           0.1479082 ,  0.204443  ],
         [ 0.19423059,  0.21830605,  0.2467118 , ...,  0.0671245 ,
           0.09788524,  0.155885  ]], shape=(40, 216), dtype=float32),
  'mel_spec': array([[-1.0189555 , -1.0189555 , -1.0189555 , ..., -1.0189555 ,
          -1.0189555 , -1.0189555 ],
         [-1.0189555 , -1.0189555 , -1.0189555 , ..., -1.0189555 ,
          -1.0189555 , -1.0189555 ],
         [-1.0189555 , -1.0189555 , -1.0189555 , ..., -1.0189555 ,
          -1.0189555 , -0.962693

In [None]:
def prepare_data(features, labels_category, labels_subcategory):
        """Prepare training data"""
        try:
            X_mfcc = np.array([f['mfccs'] for f in features])
            
            category_set = sorted(set(labels_category))
            subcategory_set = sorted(set(labels_subcategory))
            
            category_to_idx = {cat: idx for idx, cat in enumerate(category_set)}
            subcategory_to_idx = {subcat: idx for idx, subcat in enumerate(subcategory_set)}
            
            y_category = np.array([category_to_idx[cat] for cat in labels_category])
            y_subcategory = np.array([subcategory_to_idx[subcat] for subcat in labels_subcategory])
            
            y_category_onehot = tf.keras.utils.to_categorical(y_category, num_classes=len(category_set))
            y_subcategory_onehot = tf.keras.utils.to_categorical(y_subcategory, num_classes=len(subcategory_set))
            
            # Train-test split
            X_train, X_test, y_train_cat, y_test_cat, y_train_subcat, y_test_subcat = train_test_split(
                X_mfcc, y_category_onehot, y_subcategory_onehot,
                test_size=self.model_config['validation_split'],
                random_state=config['random_state'],
                stratify=y_category
            )
            
            # Add channel dimension
            X_train = X_train[..., np.newaxis]
            X_test = X_test[..., np.newaxis]
            
            return {
                'X_train': X_train, 'X_test': X_test,
                'y_train_cat': y_train_cat, 'y_test_cat': y_test_cat,
                'y_train_subcat': y_train_subcat, 'y_test_subcat': y_test_subcat,
                'category_mapping': {idx: cat for cat, idx in category_to_idx.items()},
                'subcategory_mapping': {idx: subcat for subcat, idx in subcategory_to_idx.items()}
            }
        except Exception as e:
            logging.error(f"Failed to prepare data: {e}")
            raise MyException(e, sys) from e

In [16]:
features_chunks[0]

[{'mfccs': array([[-8.418389  , -7.3609896 , -6.0194254 , ..., -5.0742884 ,
          -5.0127244 , -5.1715326 ],
         [ 0.20323192,  0.48531485,  0.13414691, ...,  0.75484186,
           0.71932083,  0.7656631 ],
         [ 0.2030494 , -0.7411071 , -1.2514418 , ..., -0.8344193 ,
          -0.8147956 , -0.68963635],
         ...,
         [ 0.19444093,  0.12599468,  0.17711511, ...,  0.11091011,
           0.11495073,  0.18954253],
         [ 0.19435267,  0.06518317,  0.06353828, ...,  0.07250341,
           0.1479082 ,  0.204443  ],
         [ 0.19423059,  0.21830605,  0.2467118 , ...,  0.0671245 ,
           0.09788524,  0.155885  ]], shape=(40, 216), dtype=float32),
  'mel_spec': array([[-1.0189555 , -1.0189555 , -1.0189555 , ..., -1.0189555 ,
          -1.0189555 , -1.0189555 ],
         [-1.0189555 , -1.0189555 , -1.0189555 , ..., -1.0189555 ,
          -1.0189555 , -1.0189555 ],
         [-1.0189555 , -1.0189555 , -1.0189555 , ..., -1.0189555 ,
          -1.0189555 , -0.962693

In [33]:
X_mfcc=np.array([f['mfccs'] for f in features_chunks[0]])

In [6]:
X_mffc

array([[[-8.41838932e+00, -7.36098957e+00, -6.01942539e+00, ...,
         -5.07428837e+00, -5.01272440e+00, -5.17153263e+00],
        [ 2.03231916e-01,  4.85314846e-01,  1.34146914e-01, ...,
          7.54841864e-01,  7.19320834e-01,  7.65663087e-01],
        [ 2.03049406e-01, -7.41107106e-01, -1.25144184e+00, ...,
         -8.34419310e-01, -8.14795613e-01, -6.89636350e-01],
        ...,
        [ 1.94440931e-01,  1.25994682e-01,  1.77115113e-01, ...,
          1.10910110e-01,  1.14950731e-01,  1.89542532e-01],
        [ 1.94352672e-01,  6.51831701e-02,  6.35382757e-02, ...,
          7.25034103e-02,  1.47908196e-01,  2.04442993e-01],
        [ 1.94230586e-01,  2.18306050e-01,  2.46711805e-01, ...,
          6.71245009e-02,  9.78852361e-02,  1.55884996e-01]],

       [[-5.85773993e+00, -5.47738314e+00, -5.57354355e+00, ...,
         -4.35873556e+00, -5.35404205e+00, -6.42968082e+00],
        [ 6.38370574e-01,  5.85139871e-01,  6.11572087e-01, ...,
          1.73825467e+00,  1.54899573e

In [7]:
category_set=sorted(set(cats_chunks[0]))

In [8]:
category_set

['Danger', 'Non-Danger']

In [9]:
subcategory_set=sorted(set(subcats_chunks[0]))  

In [10]:
subcategory_set

['Women_Screaming',
 'angry',
 'calm',
 'disgust',
 'fearful',
 'happy',
 'male_anger',
 'neutral',
 'sad',
 'surprised']

In [11]:
category_to_idx={cat:idx for idx,cat in enumerate(category_set)}

In [12]:
category_to_idx

{'Danger': 0, 'Non-Danger': 1}

In [13]:
subcategory_to_idx={subcat:idx for idx,subcat in enumerate(subcategory_set)}

In [14]:
subcategory_to_idx

{'Women_Screaming': 0,
 'angry': 1,
 'calm': 2,
 'disgust': 3,
 'fearful': 4,
 'happy': 5,
 'male_anger': 6,
 'neutral': 7,
 'sad': 8,
 'surprised': 9}

In [19]:
cats_chunks[0]

['Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',
 'Danger',

In [20]:
category_to_idx['Danger']

0

In [16]:
y_category=np.array([category_to_idx[cat] for cat in cats_chunks[0]])

In [17]:
y_category

array([0, 0, 0, ..., 1, 1, 1], shape=(1422,))

In [21]:
y_subcategory = np.array([subcategory_to_idx[subcat] for subcat in subcats_chunks[0]])  

In [22]:
y_subcategory

array([0, 0, 0, ..., 9, 9, 9], shape=(1422,))

In [23]:
y_category_onehot=tf.keras.utils.to_categorical(y_category,num_classes=len(category_set))

In [24]:
y_category_onehot

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       ...,
       [0., 1.],
       [0., 1.],
       [0., 1.]], shape=(1422, 2))

In [25]:
y_subcategory_onehot=tf.keras.utils.to_categorical(y_category,num_classes=len(subcategory_set))

In [27]:
y_subcategory_onehot

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.]], shape=(1422, 10))

In [29]:
y_subcategory_onehot.shape

(1422, 10)

In [32]:
X_mfcc

NameError: name 'X_mfcc' is not defined

In [34]:
X_train, X_test, y_train_cat, y_test_cat, y_train_subcat, y_test_subcat = train_test_split(
                X_mfcc, y_category_onehot, y_subcategory_onehot,
                test_size=0.2,
                random_state=43,
                stratify=y_category
            )

In [36]:
X_train.shape

(1137, 40, 216)

In [37]:
X_train=X_train[...,np.newaxis]

In [39]:
X_train.shape

(1137, 40, 216, 1)

In [None]:
input_shape = prepared_data['X_train'].shape[1:]
num_categories = prepared_data['y_train_cat'].shape[1]
num_subcategories = prepared_data['y_train_subcat'].shape[1]

In [40]:
input_shape=X_train.shape[1:]

In [41]:
input_shape

(40, 216, 1)

array([[0., 1.],
       [0., 1.],
       [0., 1.],
       ...,
       [1., 0.],
       [1., 0.],
       [0., 1.]], shape=(1137, 2))

In [43]:
y_train_cat.shape[1]

2

In [45]:
y_train_subcat.shape[1]

10

In [None]:
history = model.fit(
                prepared_data['X_train'],
                {
                    'category_output': prepared_data['y_train_cat'],
                    'subcategory_output': prepared_data['y_train_subcat']
                },
                validation_data=(
                    prepared_data['X_test'],
                    {
                        'category_output': prepared_data['y_test_cat'],
                        'subcategory_output': prepared_data['y_test_subcat']
                    }
                ),
                epochs=self.model_config['epochs'],
                batch_size=self.model_config['batch_size'],
                callbacks=callbacks
            )
