In [1]:
import numpy as np # used for arrays & loading data
import tensorflow as tf # arrays & loading data
from tensorflow.keras.models import Sequential  # model type that we will use
from tensorflow.keras.layers import Dense # we will use Dense layers
from tensorflow.keras.activations import linear, relu, sigmoid # some activation functions that we may use
from sklearn.preprocessing import StandardScaler # z-score normalization 

# suppress warnings
tf.get_logger().setLevel('ERROR')
tf.autograph.set_verbosity(0)

In [2]:
# loading all the training data
data = np.loadtxt('train.csv', delimiter=',', skiprows=1 )
print(data.shape)

(17996, 15)


In [3]:
X = data[:,:-1] # forming the input and output of the training data
y = data[:,-1]

y = np.expand_dims(y, axis=1) # make y 2D - the commands later will require it


# split into training , cross validation and test sets
from sklearn.model_selection import train_test_split
# TRAINING SET - 60%
X_train, X_temporary, y_train, y_temporary = train_test_split(X, y, test_size=0.40, random_state=1)

# the rest of 40% - CV SET(20%) and TEST SET(20%)
X_cv, X_test, y_cv, y_test = train_test_split(X_temporary, y_temporary, test_size=0.50, random_state=1)
del X_temporary, y_temporary

print(f"training input shape:{X_train.shape}")
print(f"training output shape:{y_train.shape}")
print(f"cv input shape:{X_cv.shape}")
print(f"cv output shape:{y_cv.shape}")
print(f"test input shape:{X_test.shape}")
print(f"test output shape:{y_test.shape}")

# applying z-score to all the training, cv and test data - make it compact for the algorithms to work better
standard_scaler = StandardScaler()
X_train_scaled = standard_scaler.fit_transform(X_train)
X_cv_scaled = standard_scaler.transform(X_cv)
X_test_scaled = standard_scaler.transform(X_test)  # use transform, because we want the same z-score used for the training data

training input shape:(10797, 14)
training output shape:(10797, 1)
cv input shape:(3599, 14)
cv output shape:(3599, 1)
test input shape:(3600, 14)
test output shape:(3600, 1)


In [4]:
# we will use a Sequential model with Dense layers

model= Sequential(
    [
        tf.keras.Input(shape=(14,)), # input size (each song has 14 features)
        Dense(12,activation="relu", name="layer2"),   # usually, for multiclassification we use relu for all layers
        Dense(11,activation="linear", name="layer3"), # but for the last layer we use linear 
    ], name="multiclass_model"
)

In [5]:
# see details about the parameters and output of activation at every layer 
model.summary()

In [6]:
# define loss and optimizer of the Adam's algorithm
model.compile(
    # this is similar to gradient descent, but it is a much improved version
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), # multiclass loss
    optimizer=tf.keras.optimizers.Adam(0.001), # preimplemented optimizer
)

In [7]:
# train the model "epochs" times
model.fit(
    X_train_scaled,y_train,
    epochs=40,
)

Epoch 1/40
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 420us/step - loss: 2.3513
Epoch 2/40
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 456us/step - loss: 1.8049
Epoch 3/40
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 437us/step - loss: 1.5701
Epoch 4/40
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 410us/step - loss: 1.4793
Epoch 5/40
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 437us/step - loss: 1.4226
Epoch 6/40
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 390us/step - loss: 1.4020
Epoch 7/40
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 410us/step - loss: 1.3866
Epoch 8/40
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 390us/step - loss: 1.3622
Epoch 9/40
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 456us/step - loss: 1.3662
Epoch 10/40
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

<keras.src.callbacks.history.History at 0x2e23ebca6f0>

In [8]:
y_prediction = model.predict(X_train_scaled)  # prediction on train dataset (output matrix, where each row has 11 elements - corresponding to the nr of classes)
print(y_prediction.shape)               

[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 437us/step
(10797, 11)


In [9]:
y_prediction_classes=np.empty((1,1))

# since we did not use softmax to see the exact probability for every class, argmax will help us to choose the index of the greatest element on each row
print(y_prediction_classes.shape)
for i in range(y_prediction.shape[0]):
    max_element_index = np.argmax(y_prediction[i]) # this index represents the class predicted
    y_prediction_classes = np.concatenate((y_prediction_classes,[[max_element_index]]),axis=0)
y_prediction_classes = y_prediction_classes[1:]
print(y_prediction_classes)

(1, 1)
[[10.]
 [ 4.]
 [ 6.]
 ...
 [ 8.]
 [ 9.]
 [10.]]


In [10]:
error = np.mean(y_prediction_classes != y_train) 
print(f"Training Set Classification Error: {error}")

Training Set Classification Error: 0.4918032786885246
