In [59]:
import warnings
warnings.filterwarnings("ignore")

In [60]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.regularizers import l2

# Data 1 CNN

In [61]:
column_names = ["acousticness",	"liveness",	"speechiness",	"valence",	"music_genre"]
df = pd.read_csv("data/task2/music_genre.csv", names = column_names)
df.head(2)

Unnamed: 0,acousticness,liveness,speechiness,valence,music_genre
0,0.00468,0.115,0.0748,0.759,0
1,0.0127,0.124,0.03,0.531,0


In [62]:
df["music_genre"].value_counts()

music_genre
0    5000
1    5000
2    5000
3    5000
4    5000
5    5000
6    5000
7    5000
8    5000
9    5000
Name: count, dtype: int64

In [63]:
df.dtypes
# all_numeric

acousticness    float64
liveness        float64
speechiness     float64
valence         float64
music_genre       int64
dtype: object

In [64]:
X = df[["acousticness", "liveness", "speechiness", "valence"]].values
y = df["music_genre"].values

In [65]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [66]:
# 3d shape
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [67]:
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation="relu", input_shape=(X_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.5))  # regularization
model.add(Dense(10, activation="softmax")) # i have 10 classes

In [68]:
# sparse_categorical_crossentropy"s designed for multi-class classification tasks with sparse labels
# where each sample is associated with a single class index
# default learning rate is 0.001
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [69]:
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, verbose = 0)

In [70]:
# epoch 10 31.13%
# epoch 100 32.81%
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Test Accuracy: 32.61%


In [71]:
from my_utils import get_hyperparameters
get_hyperparameters(model)

Learning Rate: 0.0010
Number of Hidden Layers: 4
Number of Neurons in Each Hidden Layer: [10]
Activation Functions: ['relu', 'softmax']
Loss Function: sparse_categorical_crossentropy
Optimizer: Adam
Metrics for Evaluation: ['loss', 'accuracy']


In [72]:
training_accuracy = history.history["accuracy"][-1]
print(f"Training Accuracy: {training_accuracy * 100:.2f}")

Training Accuracy: 33.10


# Data 1 MLP

In [73]:
#MLP is a feedforward neural network that consists of multiple layers of fully connected neurons

# One-hot encode music_genre
X = df[["acousticness", "liveness", "speechiness", "valence"]].values
y = pd.get_dummies(df["music_genre"]).values

In [74]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [75]:
# MLP model with L2 regularization with 0.01 strength
model = keras.Sequential()
model.add(Dense(128, activation="relu", input_shape=(X_train.shape[1],), kernel_regularizer=keras.regularizers.l2(0.01)))
model.add(Dense(64, activation="relu", kernel_regularizer=keras.regularizers.l2(0.01)))
model.add(Dense(32, activation="relu", kernel_regularizer=keras.regularizers.l2(0.01)))
model.add(Dense(10, activation="softmax"))

In [76]:
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [77]:
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, verbose = 0)

In [78]:
# epoch 10 31.89%
# epoch 100 32.45%
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Test Accuracy: 28.45%


In [79]:
training_accuracy = history.history["accuracy"][-1]
print(f"Training Accuracy: {training_accuracy * 100:.2f}")

Training Accuracy: 28.87


In [80]:
get_hyperparameters(model)

Learning Rate: 0.0010
Number of Hidden Layers: 2
Number of Neurons in Each Hidden Layer: [64, 32]
Activation Functions: ['relu', 'relu', 'relu', 'softmax']
Loss Function: categorical_crossentropy
Optimizer: Adam
Metrics for Evaluation: ['loss', 'accuracy']


# Data 2 CNN

In [82]:
column_names = ["HighBP",	"HighChol",	"CholCheck",	"BMI",	"Smoker",	"Sex",	"Age",	"Diabetes_binary"]
df_b = pd.read_csv("data/task2/diabetes_binary.csv", names = column_names)
df_b.head(2)

Unnamed: 0,HighBP,HighChol,CholCheck,BMI,Smoker,Sex,Age,Diabetes_binary
0,1,0,1,26,0,1,4,0
1,1,1,1,26,1,1,12,0


In [83]:
X = df_b.drop(columns=["Diabetes_binary"]).values
y = df_b["Diabetes_binary"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [84]:
input_shape = X_train.shape[1]
X_train.shape

(49484, 7)

In [85]:
#CNN
cnn_model = Sequential()
cnn_model.add(Conv1D(filters=64, kernel_size=3, activation="relu", input_shape=(input_shape, 1)))
cnn_model.add(MaxPooling1D(pool_size=2))
cnn_model.add(Flatten())
cnn_model.add(Dense(128, activation="relu"))
model.add(Dropout(0.5))  # regularization
cnn_model.add(Dense(1, activation="sigmoid"))

In [86]:
# binary classification
cnn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [87]:
# changing to 3d data format instead of convertng separately
history = cnn_model.fit(X_train[:, :, np.newaxis], y_train, epochs=100, batch_size=32, validation_split=0.2, verbose = 0)

In [88]:
# epoch 10 70.97
# epoch 100 70.81
cnn_test_loss, cnn_test_accuracy = cnn_model.evaluate(X_test[:, :, np.newaxis], y_test)
print(f"CNN Test Accuracy: {cnn_test_accuracy * 100:.2f}%")

CNN Test Accuracy: 70.85%


In [89]:
training_accuracy = history.history["accuracy"][-1]
print(f"Training Accuracy: {training_accuracy * 100:.2f}")

Training Accuracy: 71.11


In [90]:
get_hyperparameters(cnn_model)

Learning Rate: 0.0010
Number of Hidden Layers: 3
Number of Neurons in Each Hidden Layer: [1]
Activation Functions: ['relu', 'sigmoid']
Loss Function: binary_crossentropy
Optimizer: Adam
Metrics for Evaluation: ['loss', 'accuracy']


# Data 2 MLP

In [91]:
mlp_model = Sequential()
mlp_model.add(Dense(128, activation="relu", input_shape=(input_shape,), kernel_regularizer=l2(0.01)))
mlp_model.add(Dense(64, activation="relu", kernel_regularizer=l2(0.01)))
mlp_model.add(Dense(32, activation="relu", kernel_regularizer=l2(0.01)))
mlp_model.add(Dense(1, activation="sigmoid"))

In [92]:
mlp_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [93]:
history = mlp_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, verbose = 0)

In [94]:
# epoch 10 72.42
# epoch 100 72.20
mlp_test_loss, mlp_test_accuracy = mlp_model.evaluate(X_test, y_test)
print(f"MLP Test Accuracy: {mlp_test_accuracy * 100:.2f}%")

MLP Test Accuracy: 72.31%


In [95]:
trainin_accuracy = history.history["accuracy"][-1]
print(f"Training Accuracy: {training_accuracy * 100:.2f}")

Training Accuracy: 71.11


In [96]:
get_hyperparameters(mlp_model)

Learning Rate: 0.0010
Number of Hidden Layers: 2
Number of Neurons in Each Hidden Layer: [64, 32]
Activation Functions: ['relu', 'relu', 'relu', 'sigmoid']
Loss Function: binary_crossentropy
Optimizer: Adam
Metrics for Evaluation: ['loss', 'accuracy']
