In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import CategoricalCrossentropy
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input

In [2]:
# Load the dataset
data = pd.read_csv('voice.csv')

In [3]:
# Preprocessing
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

In [4]:
# Check the input data
print(f"X_train shape: {X.shape}, y_train shape: {y.shape}")

X_train shape: (3168, 20), y_train shape: (3168,)


In [5]:
# One-hot encoding for labels
y = pd.get_dummies(y).values

In [6]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:

# Build the MLP model
# Define the input layer
input_layer = Input(shape=(X_train.shape[1],))

# Add the dense layers
x = Dense(128, activation='relu')(input_layer)
x = Dropout(0.2)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.2)(x)

# Add the output layer
output_layer = Dense(2, activation='softmax')(x)

# Create the model
model = Model(inputs=input_layer, outputs=output_layer)

In [10]:
# Compile the model
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

In [11]:
# Train the model
model.fit(X_train, y_train, batch_size=32, epochs=100, validation_data=(X_test, y_test))

Epoch 1/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.7641 - loss: 0.5007 - val_accuracy: 0.9700 - val_loss: 0.1181
Epoch 2/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9600 - loss: 0.1208 - val_accuracy: 0.9795 - val_loss: 0.0680
Epoch 3/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9687 - loss: 0.0918 - val_accuracy: 0.9811 - val_loss: 0.0631
Epoch 4/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9765 - loss: 0.0704 - val_accuracy: 0.9811 - val_loss: 0.0580
Epoch 5/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9758 - loss: 0.0713 - val_accuracy: 0.9842 - val_loss: 0.0580
Epoch 6/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9772 - loss: 0.0625 - val_accuracy: 0.9748 - val_loss: 0.0610
Epoch 7/100
[1m80/80[0m [32m━━━

<keras.src.callbacks.history.History at 0x1d233ddbca0>

In [12]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f'Test accuracy: {accuracy}')

20/20 - 0s - 2ms/step - accuracy: 0.9811 - loss: 0.0824
Test accuracy: 0.9810725450515747


In [13]:
# Make predictions
predictions = model.predict(X_test)

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


In [14]:
# Convert predictions to labels
predicted_labels = np.argmax(predictions, axis=1)

In [15]:
# Check the input data
print(f"X_train shape: {X.shape}, y_train shape: {y.shape}")

X_train shape: (3168, 20), y_train shape: (3168, 2)


In [16]:
# Print sample predictions
for i in range(10):
    predicted_label = predicted_labels[i]
    true_label = ['female' if label == 0 else 'male' for label in y_test[i]]

    print(f'Predicted label: {predicted_label}, True label: {true_label[0]}')

Predicted label: 0, True label: female
Predicted label: 1, True label: male
Predicted label: 1, True label: male
Predicted label: 0, True label: female
Predicted label: 0, True label: female
Predicted label: 0, True label: female
Predicted label: 1, True label: male
Predicted label: 1, True label: male
Predicted label: 0, True label: female
Predicted label: 1, True label: male
