In [2]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [3]:
# Dependencies
import numpy as np
import pandas as pd

In [4]:
import tensorflow
tensorflow.keras.__version__

'2.2.4-tf'

In [5]:
df = pd.read_csv("Data/responses.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()
df.head()

Unnamed: 0,Music,Slow songs or fast songs,Dance,Folk,Country,Classical music,Musical,Pop,Rock,Metal or Hardrock,...,Age,Height,Weight,Number of siblings,Gender,Left - right handed,Education,Only child,Village - town,House - block of flats
0,5.0,3.0,2.0,1.0,2.0,2.0,1.0,5.0,5.0,1.0,...,20.0,163.0,48.0,1.0,female,right handed,college/bachelor degree,no,village,block of flats
1,4.0,4.0,2.0,1.0,1.0,1.0,2.0,3.0,5.0,4.0,...,19.0,163.0,58.0,2.0,female,right handed,college/bachelor degree,no,city,block of flats
2,5.0,5.0,2.0,2.0,3.0,4.0,5.0,3.0,5.0,3.0,...,20.0,176.0,67.0,2.0,female,right handed,secondary school,no,city,block of flats
4,5.0,3.0,4.0,3.0,2.0,4.0,3.0,5.0,3.0,1.0,...,20.0,170.0,59.0,1.0,female,right handed,secondary school,no,village,house/bungalow
5,5.0,3.0,2.0,3.0,2.0,3.0,3.0,2.0,5.0,5.0,...,20.0,186.0,77.0,1.0,male,right handed,secondary school,no,city,block of flats


In [11]:
selected_features = df[['Folk','Country','Dance','Classical music','Pop','Rock']]


In [12]:
X = selected_features
y = df["Gender"]
print(X.shape, y.shape)

(674, 6) (674,)


In [13]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [14]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)

In [15]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [16]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [17]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [18]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [23]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=6))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [24]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [25]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 100)               700       
_________________________________________________________________
dense_4 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_5 (Dense)              (None, 2)                 202       
Total params: 11,002
Trainable params: 11,002
Non-trainable params: 0
_________________________________________________________________


In [26]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2
)

Epoch 1/60
505/505 - 0s - loss: 0.6726 - acc: 0.6020
Epoch 2/60
505/505 - 0s - loss: 0.6662 - acc: 0.5960
Epoch 3/60
505/505 - 0s - loss: 0.6674 - acc: 0.6040
Epoch 4/60
505/505 - 0s - loss: 0.6555 - acc: 0.6000
Epoch 5/60
505/505 - 0s - loss: 0.6553 - acc: 0.6020
Epoch 6/60
505/505 - 0s - loss: 0.6514 - acc: 0.6317
Epoch 7/60
505/505 - 0s - loss: 0.6487 - acc: 0.6139
Epoch 8/60
505/505 - 0s - loss: 0.6467 - acc: 0.6257
Epoch 9/60
505/505 - 0s - loss: 0.6457 - acc: 0.6218
Epoch 10/60
505/505 - 0s - loss: 0.6459 - acc: 0.6198
Epoch 11/60
505/505 - 0s - loss: 0.6433 - acc: 0.6257
Epoch 12/60
505/505 - 0s - loss: 0.6415 - acc: 0.6455
Epoch 13/60
505/505 - 0s - loss: 0.6390 - acc: 0.6416
Epoch 14/60
505/505 - 0s - loss: 0.6373 - acc: 0.6337
Epoch 15/60
505/505 - 0s - loss: 0.6403 - acc: 0.6396
Epoch 16/60
505/505 - 0s - loss: 0.6354 - acc: 0.6515
Epoch 17/60
505/505 - 0s - loss: 0.6315 - acc: 0.6594
Epoch 18/60
505/505 - 0s - loss: 0.6305 - acc: 0.6495
Epoch 19/60
505/505 - 0s - loss: 0.62

<tensorflow.python.keras.callbacks.History at 0x20c77e9d128>

In [27]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

169/169 - 0s - loss: 0.6722 - acc: 0.6213
Normal Neural Network - Loss: 0.672189685014578, Accuracy: 0.6213017702102661


In [28]:
encoded_predictions = model.predict_classes(X_test_scaled[:5])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [29]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: ['male' 'female' 'female' 'female' 'female']
Actual Labels: ['male', 'female', 'female', 'male', 'female']
