In [None]:
pip install nolds

Collecting nolds
  Downloading nolds-0.5.2-py2.py3-none-any.whl (39 kB)
Installing collected packages: nolds
Successfully installed nolds-0.5.2


In [None]:
import numpy as np
import librosa
import warnings
import nolds
# Ignore all warnings
warnings.filterwarnings("ignore")

In [None]:
# Load audio file
audio_file = "/content/Recording.m4a"
y, sr = librosa.load(audio_file, sr=None)

# Extract pitch using YIN algorithm in librosa
pitch, _ = librosa.core.piptrack(y=y, sr=sr, fmax=600, fmin=50)

# Calculate the mean pitch value (ignoring zeros)
mean_pitch = np.mean(pitch[pitch > 0])

# Find fo, fhi, and flo
fo = mean_pitch
fhi = np.max(pitch[pitch > 0])
flo = np.min(pitch[pitch > 0])

print(f"Fundamental Frequency (fo): {fo:.6f} Hz")
print(f"Highest Frequency (fhi): {fhi:.6f} Hz")
print(f"Lowest Frequency (flo): {flo:.6f} Hz")

Fundamental Frequency (fo): 340.664032 Hz
Highest Frequency (fhi): 597.622253 Hz
Lowest Frequency (flo): 72.572464 Hz


In [None]:
y.shape

(269312,)

In [None]:
pitch

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [None]:
# Calculate time between consecutive samples
time_diff = np.diff(librosa.samples_to_time(np.arange(len(y)), sr=sr))

# Calculate jitter as the standard deviation of time differences
jitter_percentage = (np.std(time_diff) / np.mean(time_diff)) * 100
jitter_percentage

1.1509564038325873e-09

In [None]:
# Calculate absolute jitter as the mean absolute difference of time differences
jitter_absolute = np.mean(np.abs(time_diff))
jitter_absolute

2.0833333333333336e-05

In [None]:
# Calculate RAP, PPQ, Jitter:DDP
rap = np.mean(np.abs(np.diff(np.diff(pitch))))
ppq = np.mean(np.abs(np.diff(pitch)))
jitter_ddp = rap * 3
rap

1.4159557

In [None]:
ppq

0.7135988

In [None]:
jitter_ddp

4.247866988182068

In [None]:
# Calculate Shimmer, Shimmer(dB), Shimmer:APQ3, Shimmer:APQ5, Shimmer:DDA
shimmer = np.mean(np.abs(np.diff(y)))
shimmer_db = 20 * np.log10(np.mean(np.abs(np.diff(y))))
shimmer_apq3 = np.mean(np.abs(np.diff(np.diff(y))))
shimmer_apq5 = np.mean(np.abs(np.diff(np.diff(np.diff(y)))))

In [None]:
shimmer

0.0045029805

In [None]:
shimmer_db

-46.92999839782715

In [None]:
shimmer_apq3

0.002009001

In [None]:
shimmer_apq5

0.002356627

In [None]:
# Calculate NHR and HNR using librosa
nhr = librosa.effects.harmonic(y)[0]
hnr = librosa.effects.harmonic(y)[0] / librosa.effects.percussive(y)[0]

In [None]:
nhr

-3.4569282e-06

In [None]:
hnr

-1.0000006

In [None]:
spread1 = np.std(np.diff(pitch))
spread2 = np.std(np.diff(np.diff(pitch)))

In [None]:
spread1

16.705574

In [None]:
spread2

28.084023

In [None]:
mdvp_apq = np.mean(np.abs(np.diff(y, 2)))

In [None]:
mdvp_apq

0.002009001

In [None]:
shimmer_dda = np.mean(np.abs(np.diff(np.abs(y))))

In [None]:
shimmer_dda

0.004364014

In [None]:
dfa_value = nolds.dfa(y)

In [None]:
dfa_value

0.06668169159264219

In [None]:
# Sample ground truth pitch values
ground_truth_pitch = np.array([-18.13, -19.41, -23.37, -23.77, -26.09, -28.63, -30.43, -38.40, -39.31, -41.38, -60.42])

# Example pitch values (replace with your actual pitch values)
pitch = np.random.rand(len(ground_truth_pitch))

# Ensure the length of pitch is the same as ground_truth_pitch
pitch = pitch[:len(ground_truth_pitch)]

# Calculate PPE
ppe = np.mean(np.abs((ground_truth_pitch - pitch) / ground_truth_pitch)) / 10

print(f"PPE: {ppe:.6f}")

PPE: 0.101706


In [None]:
print(f"(fo): {fo:.6f} Hz")
print(f"(fhi): {fhi:.6f} Hz")
print(f"(flo): {flo:.6f} Hz")
print(f"Jitter(%): {jitter_percentage:.10f}")
print(f"Jitter(Abs): {jitter_absolute:.6f}")
print(f"RAP: {rap:.6f}")
print(f"PPQ: {ppq:.6f}")
print(f"Jitter:DDP: {jitter_ddp:.6f}")
print(f"Shimmer: {shimmer:.6f}")
print(f"Shimmer(dB): {shimmer_db:.6f}")
print(f"Shimmer:APQ3: {shimmer_apq3:.6f}")
print(f"Shimmer:APQ5: {shimmer_apq5:.6f}")
print(f"NHR: {nhr:.6f}")
print(f"HNR: {hnr:.6f}")
print(f"PPE: {ppe:.6f}%")
print(f"Spread1: {spread1:.6f}")
print(f"Spread2: {spread2:.6f}")
print(f"MDVP:APQ: {mdvp_apq:.6f}")
print(f"Shimmer:DDA: {shimmer_dda:.6f}")
print(f"DFA: {dfa_value:.6f}")

(fo): 340.664032 Hz
(fhi): 597.622253 Hz
(flo): 72.572464 Hz
Jitter(%): 0.0000000012
Jitter(Abs): 0.000021
RAP: 1.415956
PPQ: 0.713599
Jitter:DDP: 4.247867
Shimmer: 0.004503
Shimmer(dB): -46.929998
Shimmer:APQ3: 0.002009
Shimmer:APQ5: 0.002357
NHR: -0.000003
HNR: -1.000001
PPE: 0.101706%
Spread1: 16.705574
Spread2: 28.084023
MDVP:APQ: 0.002009
Shimmer:DDA: 0.004364
