# Parkinson's Disease detection with zero crossing rate audio features using Logistic Regression coded without libraries.

# 1. Import the dependencies.

In [None]:
import numpy as np
import pandas as pd
from typing import Tuple
import random
import math
import librosa
import os

# 2. Load the audios with Librosa, and preprocess them using Peak Amplitude Normalization.
- load_audio will return all the audios with their sampling rate.
- to_positive will turn all values to postive numbers (audio is 1D).
- get_max will return the maximum value in each audio array.
- peak_amplitude_normalize will preprocess audios with this formula 10 ** (peak / 20) / maximum_value.
- hamming_window uses this formula 0.54 - 0.46 * math.cos((2 * math.pi * n) / frame_length - 1) to every frame of the audio and returns windowed audios.
- frame_audio_signal uses hamming_window on frames of audios and returns framed audios.

In [None]:
def load_audio(file_path: str) -> Tuple:
    audio, sampling_rate = librosa.load(file_path, sr=44100)
    return audio, sampling_rate


def to_positive(n_array):  # Turn all values to positive values
    for i in range(len(n_array)):
        if n_array[i] < 0:
            n_array[i] = -1 * n_array[i]

    return n_array


def get_max(n_array):  # Get the maximum value
    max_value = n_array[0]
    for i in range(1, len(n_array), 1):
        if n_array[i] > max_value:
            max_value = n_array[i]

    return max_value


def peak_amplitude_normalize(audio_data, peak=-3.0):  # Calculate a scaling factor based on the specific peak value
    n_array = to_positive(audio_data)               # (-3 dB) and multiply the entire audio signal by the scaling factor
    maximum_value = get_max(n_array)
    scaling = 10 ** (peak / 20) / maximum_value
    normalized_audio = audio_data * scaling
    return normalized_audio


def hamming_window(frame_length):
    window = np.zeros(frame_length)
    n_negative_one = frame_length - 1
    for n in range(frame_length):
        window[n] = 0.54 - 0.46 * math.cos((2 * math.pi * n) / n_negative_one)
    return window


# Function to frame the signal using a Hamming window
def frame_audio_signal(audio_data, frame_length):
    length_audio = len(audio_data)
    number_frames = length_audio // frame_length
    framed_audio = np.zeros((frame_length, number_frames))
    for i in range(number_frames):
        start = i * frame_length
        stop = start + frame_length
        samples = audio_data[start:stop] * hamming_window(frame_length)
        framed_audio[:, i] = samples

    return framed_audio

# 3. Extract Zero Cross rating features.
- zero_crossing uses Numpy to count how many times the audio crosses zero.
- zero_crossing_features uses zero_crossing to extract features for all audios.

In [None]:
def zero_crossing(frames):
    zcr = np.mean(np.abs(np.diff(np.sign(frames))), axis=1)
    return zcr


def zero_crossing_features() -> Tuple:
    directory = '/content/drive/MyDrive/HYP TRAIN DATA/'
    zcr_features = []
    status = []
    for file_name in os.listdir(directory):
        if file_name.endswith('.wav'):
            if "P" in file_name:
                status.append(1)

            if "C" in file_name:
                status.append(0)

            file_path = os.path.join(directory, file_name)
            audio, sampling_rate = load_audio(file_path)
            frame_time = 0.02  # Duration of each frame in seconds
            frame_size = int(frame_time * sampling_rate)  # Number of samples in each frame
            preprocessed_audio = peak_amplitude_normalize(audio)
            frames = frame_audio_signal(preprocessed_audio, frame_size)
            zcr = zero_crossing(frames)
            zcr_features.append(np.array([zcr.mean()]))

    zcr_features, status = np.array(zcr_features), np.array(status)
    return zcr_features, status

# 4. Logistic Regression class with data members learning rate, number of iterations, weights and bias.
- Logistic Regression uses Sigmoid function to turn every value to 1 or 0.
- The weights and bias are derived using Gradient Descent.

In [None]:
class LogisticRegression:
    # Constructor for setting learning rate, number of iterations, weight, and bias
    def __init__(self, learning_rate, num_iterations, weight, bias):
        self._learning_rate = learning_rate
        self._num_iterations = num_iterations
        self._weight = weight
        self._bias = bias

    # Getter - get the learning rate
    def get_learning_rate(self):
        return self._learning_rate

    # Getter - get the number of iterations
    def get_num_iterations(self):
        return self._num_iterations

    def update_weight(self, weight):
        self._weight = weight

    def update_bias(self, bias):
        self._bias = bias

    def return_weight(self):
        return self._weight

    def return_bias(self):
        return self._bias

    # For fitting the dataset
    def fitting(self, data_points, y_values):
        self._num_rows, self._num_cols = self.shape(data_points)  # Tuple - shape returns num of rows and cols

        # Setting the weight and bias values to zeros
        # self._we8ght = np.zeros(self._num_cols)
        self._we8ght = []

        for c in range(self._num_cols):
            self._we8ght.append(random.random())

        self._we8ght = np.array(self._we8ght)
        self._byas = random.random()
        self._x_data_points = data_points
        self._y_values = y_values

        # Gradient Descent implementation
        for n in range(self.get_num_iterations()):
            weight_bias = self.increment_weight_bias(self._x_data_points, self._we8ght, self._byas, self._y_values)
            self._we8ght = weight_bias[0]
            self._byas = weight_bias[1]
            self.update_weight(self._we8ght)
            self.update_bias(self._byas)

    def zee_formula(self, data_points, weights, bias):
        self._data_points = data_points
        self._weights = weights
        self._bias_ = bias
        return np.dot(self._data_points, self._weights) + self._bias_

    def shape(self, x) -> Tuple:
        row, col = len(x), len(x[0])
        self._row, self._col = row, col
        self._row_col = (self._row, self._col)
        return self._row_col

    def exponential(self, array):
        exp = []
        for v in array:
            value = math.exp(v)
            exp.append(value)

        exp = np.array(exp)
        return exp

    def summation(self, array):
        sum = 0
        for v in array:
            sum += v

        return sum

    def increment_weight_bias(self, data_points, weights, bias, y_values) -> Tuple:
        # Sigmoid formula
        z = self.zee_formula(data_points, weights, bias)
        y_sigmoid = 1 / (1 + np.exp(-z))

        rows_cols = self.shape(data_points)
        self._xx = data_points
        self._yy = y_values

        # Gradients
        dw = (1 / rows_cols[0]) * np.dot(self._xx.T, (y_sigmoid - self._yy))
        db = (1 / rows_cols[1]) * self.summation(y_sigmoid - self._yy)

        # Incrementing the weights and bias
        self._we = weights
        self._bi = bias
        self._the_weight = self._we - self.get_learning_rate() * dw
        self._the_bias = self._bi - self.get_learning_rate() * db
        weight_bias = (self._the_weight, self._the_bias)

        return weight_bias

    def predict(self, data_points):
        self._X = data_points
        zee = self.zee_formula(self._X, self.return_weight(), self.return_bias())
        y_predictions = 1 / (1 + self.exponential(-zee))

        for i in range(len(y_predictions)):
            if y_predictions[i] > 0.5:
                y_predictions[i] = 1

            else:
                y_predictions[i] = 0

        return y_predictions

    def single_predict(self, data_point):
        self._X = data_point
        zee = self.zee_formula(self._X, self.return_weight(), self.return_bias())
        y_prediction = 1 / (1 + np.exp(-zee))

        if y_prediction > 0.5:
            y_prediction = 1

        else:
            y_prediction = 0

        return y_prediction

# 5. Standard scaler and splitting of the data.
- standard_scaler uses means and std deviations to standardize the features.
- split splits the data into training and testing sets.

In [None]:
def standard_scaler(features):
    # Computation of mean and standard deviation
    means = features.mean(axis=0)
    std_deviations = features.std(axis=0)

    # Standardizing the features
    standardized_features = (features - means) / std_deviations
    standardized_features = np.array(standardized_features)

    return standardized_features


def _random_indexes(number, size, random_state):  # For selecting the indexes for test features
    if size > number:
        raise ValueError(str(size) + " features can't be chosen out of " + str(number))
    random_indexes = []
    random.seed(random_state)
    random_index = random.randrange(0, number, 1)
    random_indexes.append(random_index)
    for _ in range(1, size, 1):
        random_index = random.randrange(0, number, 1)
        while random_index in random_indexes:
            random_index = random.randrange(0, number, 1)

        random_indexes.append(random_index)
    random_indexes = np.array(random_indexes)

    return random_indexes


def split(features, targets, test_size, random_state=42):
    number_of_samples = len(targets)
    t_size = test_size * number_of_samples
    t_size = int(t_size) + 1

    random_indexes = _random_indexes(number_of_samples, t_size, random_state)

    x_training, x_testing, y_training, y_testing = [], [], [], []
    features = list(features)
    targets = list(targets)
    for i in range(len(random_indexes)):
        x_testing.append(features[random_indexes[i]])
        y_testing.append(targets[random_indexes[i]])

    for i in range(len(features)):
        if i in random_indexes:
            pass
        else:
            x_training.append(features[i])
            y_training.append(targets[i])

    x_training, x_testing, y_training, y_testing = np.array(x_training), np.array(x_testing), np.array(y_training), \
        np.array(y_testing)

    return x_training, x_testing, y_training, y_testing

# 6. Metrics
- _confusion_matrix computes true positives, fales positives, true negatives, and false negatives.
- accuracy_score, precision_score, recall_score, f1_score andd confusion_matrix compute accuracy, precison, recall, and f1 scores and confusion matrix.

In [None]:
def _confusion_matrix(y_testing, y_prediction):
    # Computing confusion matrix
    length_of_labels = len(y_testing)
    true_positive, false_positive, true_negative, false_negative = 0, 0, 0, 0

    for i in range(length_of_labels):
        if y_testing[i] == 1:
            if y_testing[i] == y_prediction[i]:
                true_positive += 1

            else:
                false_positive += 1

        if y_testing[i] == 0:
            if y_testing[i] == y_prediction[i]:
                true_negative += 1

            else:
                false_negative += 1

    return true_positive, false_positive, true_negative, false_negative


def accuracy_score(y_testing, y_preds):
    tp, fp, tn, fn = _confusion_matrix(y_testing, y_preds)
    accuracy = (tp + tn) / (tp + fp + tn + fn)
    return accuracy


def precision_score(y_testing, y_preds):
    tp, fp, tn, fn = _confusion_matrix(y_testing, y_preds)
    precision = tp / (tp + fp)
    return precision


def recall_score(y_testing, y_preds):
    tp, fp, tn, fn = _confusion_matrix(y_testing, y_preds)
    tp_fn = tp + fn
    if tp_fn == 0:
        return 0.0
    else:
        recall = tp / tp_fn
        return recall


def f1_score(y_testing, y_preds):
    precision = precision_score(y_testing, y_preds)
    recall = recall_score(y_testing, y_preds)
    precision_recall = precision + recall
    if precision_recall == 0:
        return 0.0
    else:
        f1 = (2 * precision * recall) / precision_recall
        return f1


def confusion_matrix(y_testing, y_preds):
    tp, fp, tn, fn = _confusion_matrix(y_testing, y_preds)
    con_mat = []
    positives = [tp, fp]
    negatives = [fn, tn]
    con_mat.append(positives)
    con_mat.append(negatives)
    return con_mat

# 7. Main function.


In [None]:
if __name__ == "__main__":
    zcr_feats, y_labels = zero_crossing_features()
    features = standard_scaler(zcr_feats)
    x_train, x_valid, y_train, y_valid = split(features, y_labels, test_size=0.2)

    model = LogisticRegression(learning_rate=0.01, num_iterations=10000, weight=0, bias=0)
    model.fitting(x_train, y_train)
    predictions = model.predict(x_valid)

    # Metrics on the validation data
    accuracy = accuracy_score(y_valid, predictions)
    precision = precision_score(y_valid, predictions)
    recall = recall_score(y_valid, predictions)
    f1 = f1_score(y_valid, predictions)
    confusion_mat = confusion_matrix(y_valid, predictions)

    print("Evaluation on the validation data.")
    print("Accuracy score on the validation data: ", accuracy)
    print("Precision score on the validation data: ", precision)
    print("Recall score on the validation data: ", recall)
    print("F1 score on the validation data: ", f1)
    print("Confusion matrix on the validation data: ", confusion_mat)

Evaluation on the validation data.
Accuracy score on the validation data:  0.75
Precision score on the validation data:  0.0
Recall score on the validation data:  0.0
F1 score on the validation data:  0.0
Confusion matrix on the validation data:  [[0, 3], [0, 9]]
