---
---

# Modelling

---
---

In [7]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers
import sys
sys.path.append("../src")
from load_config import load_constants_from_yaml
from sklearn.preprocessing import LabelEncoder

Define constants

In [8]:
constants = load_constants_from_yaml('../constants.yml')

SAMPLING_RATING = constants["SAMPLING_RATING"]
FRAME_LENGTH_ENERGY = constants["FRAME_LENGTH_ENERGY"]
THRESHOLD_PERCENTAGE = constants["THRESHOLD_PERCENTAGE"]
MIN_SILENCE_DURATION = constants["MIN_SILENCE_DURATION"]
HOP_LENGTH = constants["HOP_LENGTH"]
TEST_SIZE = 0.2
FIRST_LAYER_NEURONS = 128
SECOND_LAYER_NEURONS = 64
RANDOM_STATE = 42
processed_data_path = "../data/processed/"

In [9]:
EPOCHS = 10
BATCH_SIZE=32
VALIDATION_SPLIT=0.2

Load data

In [10]:
df = pd.read_csv(processed_data_path+"df_transformed.csv")

In [11]:
df.head(5)

Unnamed: 0,label,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,mfcc_10,mfcc_11,mfcc_12,mfcc_13
0,1,-2.199065,-0.881256,0.591127,-0.169963,0.58226,0.748747,1.134152,0.801761,0.953608,0.726335,0.872313,0.664804,0.865082
1,1,-2.2682,-1.028002,0.353531,-0.367497,0.426105,0.683014,1.162779,0.914434,1.118985,0.903471,1.027281,0.782093,0.92111
2,1,-2.359687,-1.228744,-0.007906,-0.736143,0.002961,0.278296,0.772721,0.56124,0.813884,0.648693,0.813558,0.586497,0.760301
3,1,-2.359693,-1.228758,-0.007932,-0.736172,0.002924,0.278254,0.772673,0.561186,0.813826,0.648631,0.813491,0.586419,0.760223
4,1,-2.359697,-1.228765,-0.007944,-0.736186,0.002906,0.278235,0.77265,0.561161,0.813798,0.648601,0.813459,0.586382,0.760185


Split data set

In [12]:
X = df.drop("label", axis=1)
y = df["label"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE)

### Model architecture

Define the model

In [13]:
model = keras.Sequential([
    # Input shape based on MFCC features
    layers.Input(shape=X_train.shape[1:]),
    layers.Flatten(),  # Flatten the input
    # Dense layer with ReLU activation
    layers.Dense(FIRST_LAYER_NEURONS, activation='relu'),
    # Dense layer with ReLU activation
    layers.Dense(SECOND_LAYER_NEURONS, activation='relu'),
    # Output layer with sigmoid activation for binary classification
    layers.Dense(1, activation='sigmoid')
])

Compile the model

In [14]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

### Train the model

In [15]:
model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=VALIDATION_SPLIT)

Epoch 1/10


  return self.fn(y_true, y_pred, **self._fn_kwargs)


[1m11232/11240[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - accuracy: 0.6062 - loss: 0.0000e+00

  return self.fn(y_true, y_pred, **self._fn_kwargs)


[1m11240/11240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 3ms/step - accuracy: 0.6062 - loss: 0.0000e+00 - val_accuracy: 0.6064 - val_loss: 0.0000e+00
Epoch 2/10
[1m11240/11240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 3ms/step - accuracy: 0.6082 - loss: 0.0000e+00 - val_accuracy: 0.6064 - val_loss: 0.0000e+00
Epoch 3/10
[1m11240/11240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 3ms/step - accuracy: 0.6073 - loss: 0.0000e+00 - val_accuracy: 0.6064 - val_loss: 0.0000e+00
Epoch 4/10
[1m11240/11240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 3ms/step - accuracy: 0.6089 - loss: 0.0000e+00 - val_accuracy: 0.6064 - val_loss: 0.0000e+00
Epoch 5/10
[1m11240/11240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 4ms/step - accuracy: 0.6075 - loss: 0.0000e+00 - val_accuracy: 0.6064 - val_loss: 0.0000e+00
Epoch 6/10
[1m11240/11240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 4ms/step - accuracy: 0.6078 - loss: 0.0000e+00 - val_a

<keras.src.callbacks.history.History at 0x7cd627c78d60>

### Evaluate the model on training set

In [16]:
# Evaluate the model on training set
train_loss, train_accuracy = model.evaluate(X_train, y_train)
print(f"Train loss: {train_loss}")
print(f"Train accuracy: {train_accuracy}")

[1m14050/14050[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 2ms/step - accuracy: 0.6071 - loss: 0.0000e+00
Train loss: 0.0
Train accuracy: 0.6072508692741394


### Evaluate the model on the testing set

In [17]:
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)


[1m3513/3513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.6098 - loss: 0.0000e+00
Test Loss: 0.0
Test Accuracy: 0.6092995405197144
