# Neural Network

Explain what it is and why you picked it.

# Libraries

Import matplotlib, csv, numpy, and torch.

In [1]:
from DataLoader import DataLoader
from neural_network import NeuralNetwork

from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import torch.optim as optim
import torch.nn as nn
import numpy as np
import torch
import csv
import math

%matplotlib inline

# set default size of plots
plt.rcParams['figure.figsize'] = (8.0, 8.0)
plt.rcParams['image.interpolation'] = 'nearest'

torch.manual_seed(0)


%load_ext autoreload
%autoreload 2

### Metric

The metric we will be using is the `precision_score`, since we are mostly just interested in how well the model performs in making predictions.

Gauging the actual Positive/Negative is not as important as precision is a metric that isolates the performance of positive  predictions created by the model. It focuses more on the ratio of correctly predicted positive instances to the total predicted positive instances.

In [2]:
def get_Score(model, X_train, X_test, y_train, y_test, verbose = 1):
    
    #Retrieve the precision score of test & train via sklearn's precision_score
    precision_train = precision_score(model.predict(X_train), y_train, average=None)
    precision_test = precision_score(model.predict(X_test), y_test, average=None)
    
    #Graph the scores using the graph_Score function if verbose is set to TRUE
    if verbose:
        graph_Scores((precision_train, precision_test))
        
        print(f"Train Avg Precision : {precision_train.mean():.4f}")
        print(f"Test Avg Precision : {precision_test.mean():.4f}")
        
    return precision_train, precision_test

### Music Dataset
We will use the music dataset as out dataset. Each instance represents distinct features with the song:
- `Artist Name` - Name of artist
- `Track Name` - Name of song
- `Popularity` - a value between 0 and 100, with 100 being the most popular
- `danceability` - describes how suitable a track is for dancing
- `energy` - perceptual measure of intensity and activity
- `key` - The key the track is in
- `loudness` - The overall loudness of a track in decibels (dB)
- `mode` - indicates the modality (major or minor) of a track
- `speechiness` - detects the presence of spoken words in a track
- `acousticness` - A measure for whether the track is acoustic
- `instrumentalness` - Predicts whether a track contains no vocals
- `liveness` - Detects the presence of an audience in the recording
- `valence` - Describes the musical positiveness conveyed by a track
- `tempo` - The overall estimated tempo of a track in beats per minute (BPM)
- `duration_inmin/ms` - Duration in ms
- `time_signature` - Specifies how many beats are in each bar (or measure)

The songs can be divided into 11 different genres.
- `Pop` - class 0
- `Hip-hop` - class 1
- `Blues` - class 2
- `Indian-pop` - class 3
- `Country` - class 4
- `Rap` - class 5
- `Rock` - class 6
- `Ambient` - class 7
- `Metal` - class 8
- `R&B` - class 9
- `Indie` - class 10


### Loading of dataset

In [3]:
classes = {
    'Pop': 0,
    'Hip-hop': 1,
    'Blues': 2,
    'Indian-pop': 3,
    'Country': 4,
    'Rap': 5,
    'Rock': 6,
    'Ambient': 7,
    'Metal': 8,
    'R&B': 9,
    'Indie': 10,
}

with open('Dataset 6 - Music Dataset/music.csv', 'r') as csv_file:
    raw_data = csv.reader(csv_file)
    X_music = np.empty((0, 13), float)  # 13 numeric feature columns
    y_music = np.empty((0, 1), int)
    
    next(raw_data)  # Skip the header row
    
    for row in raw_data:
        # Convert the relevant columns to float and add to X_music
        features = np.array([float(row[2]),  # Popularity
                             float(row[3]),  # danceability
                             float(row[4]),  # energy
                             float(row[5]),  # key
                             float(row[6]),  # loudness
                             float(row[7]),  # mode
                             float(row[8]),  # speechiness
                             float(row[9]),  # acousticness
                             float(row[11]), # liveness
                             float(row[12]), # valence
                             float(row[13]), # tempo
                             float(row[14]), # duration_inmin/ms
                             float(row[15])  # time_signature
                            ])
        X_music = np.vstack([X_music, features])
        
        # Append the class label
        y_music = np.append(y_music, np.array([classes[row[16]]]))  # Class column
    
# Reshape y_music to have the shape (N, 1)
y_music = np.expand_dims(y_music, 1)

print('Training data shape:', X_music.shape)
print('Ground truth values shape:', y_music.shape)


KeyError: '5'

In [None]:
X_music_0 = X_music[y_music == 0]
X_music_1 = X_music[y_music == 1]
X_music_2 = X_music[y_music == 2]
X_music_3 = X_music[y_music == 3]
X_music_4 = X_music[y_music == 4]
X_music_5 = X_music[y_music == 5]
X_music_6 = X_music[y_music == 6]
X_music_7 = X_music[y_music == 7]
X_music_8 = X_music[y_music == 8]
X_music_9 = X_music[y_music == 9]
X_music_10 = X_music[y_music == 10]

print('Number of class 0:', len(X_music_0))
print('Number of class 1:', len(X_music_1))
print('Number of class 2:', len(X_music_2))
print('Number of class 3:', len(X_music_3))
print('Number of class 4:', len(X_music_4))
print('Number of class 5:', len(X_music_5))
print('Number of class 6:', len(X_music_6))
print('Number of class 7:', len(X_music_7))
print('Number of class 8:', len(X_music_8))
print('Number of class 9:', len(X_music_9))
print('Number of class 10:', len(X_music_10))

Divide the dataset into the train and test set. The test set will contain X instances for each class.

In [None]:
np.random.seed(1)

# Select 10 `class 0` instances
selected_0 = np.random.choice(np.arange(len(X_music_0)),
                              size=10,
                              replace=False)

# Select 10 `class 1` instances
selected_1 = np.random.choice(np.arange(len(X_music_1)),
                              size=10,
                              replace=False)

# Select 10 `class 2` instances
selected_2 = np.random.choice(np.arange(len(X_music_2)),
                              size=10,
                              replace=False)
# Select 10 `class 3` instances
selected_3 = np.random.choice(np.arange(len(X_music_3)),
                              size=10,
                              replace=False)

# Select 10 `class 4` instances
selected_4 = np.random.choice(np.arange(len(X_music_4)),
                              size=10,
                              replace=False)

# Select 10 `class 5` instances
selected_5 = np.random.choice(np.arange(len(X_music_5)),
                              size=10,
                              replace=False)
# Select 10 `class 6` instances
selected_6 = np.random.choice(np.arange(len(X_music_6)),
                              size=10,
                              replace=False)

# Select 10 `class 7` instances
selected_7 = np.random.choice(np.arange(len(X_music_7)),
                              size=10,
                              replace=False)

# Select 10 `class 8` instances
selected_8 = np.random.choice(np.arange(len(X_music_8)),
                              size=10,
                              replace=False)

# Select 10 `class 9` instances
selected_9 = np.random.choice(np.arange(len(X_music_9)),
                              size=10,
                              replace=False)

# Select 10 `class 10` instances
selected_10 = np.random.choice(np.arange(len(X_music_10)),
                              size=10,
                              replace=False)

# Form the test set
X_test = np.concatenate((X_iris_0[selected_0],
                         X_iris_1[selected_1],
                         X_iris_1[selected_2],
                         X_iris_1[selected_2],
                         X_iris_1[selected_3],
                         X_iris_1[selected_4],
                         X_iris_1[selected_5],
                         X_iris_1[selected_6],
                         X_iris_1[selected_7],
                         X_iris_1[selected_8],
                         X_iris_1[selected_9],
                         X_iris_2[selected_10]))
y_test = np.concatenate((np.array([0 for _ in range(10)]),
                         np.array([1 for _ in range(10)]),
                         np.array([2 for _ in range(10)]),
                         np.array([3 for _ in range(10)]),
                         np.array([4 for _ in range(10)]),
                         np.array([5 for _ in range(10)]),
                         np.array([6 for _ in range(10)]),
                         np.array([7 for _ in range(10)]),
                         np.array([8 for _ in range(10)]),
                         np.array([9 for _ in range(10)]),
                         np.array([10 for _ in range(10)])))

print(X_test.shape)
print(y_test.shape)

In [None]:
X_train = np.concatenate((np.delete(X_music_0, selected_0, 0),
                          np.delete(X_music_1, selected_1, 0),
                          np.delete(X_music_2, selected_2, 0)),
                          np.delete(X_music_3, selected_3, 0),
                          np.delete(X_music_4, selected_4, 0),
                          np.delete(X_music_5, selected_5, 0),
                          np.delete(X_music_6, selected_6, 0),
                          np.delete(X_music_7, selected_7, 0),
                          np.delete(X_music_8, selected_8, 0),
                          np.delete(X_music_9, selected_9, 0),
                          np.delete(X_music_10, selected_10, 0))
y_train = np.concatenate((np.array([0 for _ in range(40)]),
                          np.array([1 for _ in range(40)]),
                          np.array([2 for _ in range(40)]),
                          np.array([3 for _ in range(40)]),
                          np.array([4 for _ in range(40)]),
                          np.array([5 for _ in range(40)]),
                          np.array([6 for _ in range(40)]),
                          np.array([7 for _ in range(40)]),
                          np.array([8 for _ in range(40)]),
                          np.array([9 for _ in range(40)]),
                          np.array([10 for _ in range(40)])))

print(X_train.shape)
print(y_train.shape)

Convert the np.ndarray arrays to torch.Tensor. We use torch.Tensor in PyTorch.

In [None]:
X_train = torch.Tensor(X_train)
y_train = torch.Tensor(y_train)
X_test = torch.Tensor(X_test)
y_test = torch.Tensor(y_test)

## Setting up the training pipeline

In [None]:
network = NeuralNetwork(13, 11, list_hidden=(20, 10))
network.create_network()
network.init_weights()
network.forward(X_train, verbose = True)

Well use Adam as the optimizer with the following parameters:
- `params` - the parameters of the network
- `lr` = 0.001

Then instantiated a nn.CrossEntropyLoss() object.

In [None]:
optimizer = optim.Adam(network.parameters(), 0.001)
criterion = nn.CrossEntropyLoss()

### Dataset

We will be using the dataset titled: `Dataset 6 - Music Dataset` for our machine learning model.

In [None]:
data_loader = DataLoader('Dataset 6 - Music Dataset/music.csv', True, True).df['standardize']

## Training the network

In [None]:
e = 0
max_epochs = 300
is_converged = False
previous_loss = 0
losses = []

# For each epoch
while e < max_epochs and is_converged is not True:
    
    current_epoch_loss = 0
    
    # TODO: Get the batch for this epoch.
    X_batch, y_batch = data_loader.get_batch(mode='train')
    
    # For each batch
    for X, y in zip(X_batch, y_batch):
        X = torch.Tensor(X)
        y = torch.Tensor(y).to(torch.long)

        optimizer.zero_grad()

        scores, probabilities = network.forward(X)
        
        loss = criterion(scores, y)
        
        loss.backward()
        
        optimizer.step()
        
        current_epoch_loss += loss.item()
    
    average_loss = current_epoch_loss / len(X_batch)
    losses.append(average_loss)
    
    # Display the average loss per epoch
    print('Epoch:', e + 1, '\tLoss: {:.6f}'.format(average_loss))
    
    if abs(previous_loss - loss) < 0.00000005:
        is_converged = True
    else:
        previous_loss = loss
        e += 1

### Trying out the trained network on the test data
Setting it to eval mode first to avoid updating the weights.

In [None]:
network.eval()

Then perform forwad propagation on the test data and get the prediction results.

In [None]:
scores,probalities = network.forward(X_test);
predictions = network.predict(X_test)
print("Predictions: ", predictions)

Get the accuracy of the network

In [None]:
num_correct = torch.sum(predictions == y_test)
accuracy = num_correct/y_test.shape[0]

In [None]:
# Read the ver of the dataframe you want to use
# df = DataLoader('Dataset 6 - Music Dataset/music.csv', True, True).df['raw']