### Necessary libraries

In [None]:
import numpy as np
import librosa
import matplotlib.pyplot as plt
import scipy
from spafe.utils import vis
from spafe.features.lpc import lpc, lpcc

## Silence removal algorithm

In [None]:
def silence_removing(y, sr):

  #I calculate the mean and standard deviation for the first 1600 samples
  test = y[:1600]
  stand_dev = np.std(test)
  mean = np.mean(test)  

  x= np.zeros((y.shape))

  #I check if the value of the one-dimensional Mahalanobis distance function is greater than 3 
  # This is the detection of voice and non-voice parts i niegłosowych
  for i in range(0,x.shape[0]):
    if (abs(y[i]-mean))/stand_dev > 3:
      x[i] = 1
    else:
      x[i] = 0

  frame_size = int(sr/100)
  lista = np.split(x,range(frame_size,x.shape[0],frame_size))

  arr_numb = int(math.ceil(len(x)/(sr/100.0)))
  probes_numb = int(math.ceil(sr/100))
  temp_arr= np.zeros((arr_numb,probes_numb))
  index = 0 

  for arr in lista:
    index += 1
    zeros = 0
    ones = 0
    for i in arr:
      if i == 0:
        zeros += 1
      if i == 1:
        ones += 1
    if zeros + ones == probes_numb:
      if zeros > ones:
        for i in range(0,probes_numb):
          if arr[i] == 1:
            temp_arr[index-1][i] = 0
      if ones >= zeros:
        for i in range(0,probes_numb):
          temp_arr[index-1][i] = 1

  one_temp_arr = temp_arr.flatten()

  result = list()
  for i in range(0,len(x)):
    if one_temp_arr[i] == 1:
      result.append(y[i])
  result=np.array(result)

  return result, sr 

## Feature extraction

In [None]:
def extract_features(index, row):

   src = '/content/drive/MyDrive/clips/' + data['path'][index]
   X, sample_rate = librosa.load(src, sr=None)
   
   # Removal of non-voiced portions
   X, sample_rate = silence_removing(X, sample_rate)

   # Generating the Mel frequency cepstral coefficients (MFCC) from the time series
   mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)

   # Lpcs calculation
   lpcs = lpc(sig=X, fs=sample_rate, num_ceps=20)

   # Lpccs calculation
   lpccs = lpcc(sig=X, fs=sample_rate, num_ceps=20, normalize = True)

   # Calculating the chromatogram from the run of the power spectrogram.
   stft = np.abs(librosa.stft(X)) # Generates a Short-time Fourier transform (STFT) to use in the chroma_stft
   chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)

   # Computing the spectogram on the Mel scale
   mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)

   # Calculation of spectral contrast
   contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)

   # Calculation of the tonal features of the center of gravity of the spectrum (tonnetz)
   tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)

   # Calculation of spectral features
   spectral = list()
   spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=X, sr=sample_rate)) # środek ciężkości widma
   spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=X, sr=sample_rate)) #pasmo widmowe
   spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=X, sr=sample_rate)) # spadek widmowy
   spectral.append(spectral_centroid)
   spectral.append(spectral_bandwidth)
   spectral.append(spectral_rolloff)

   return mfccs, chroma, mel, contrast, tonnetz, spectral, lpcs, lpccs

## Classification

### Logistic Regression

## Neural Network