In [1]:
import librosa
import math

def PitchPorPersona(y,sr):
    f0, voiced_flag, voiced_probs=librosa.pyin(
        y,
        fmin=librosa.note_to_hz('C2'),
        fmax=librosa.note_to_hz('C7'),
        sr=sr)
    f0 = [x for x in f0 if not isinstance(x, float) or not math.isnan(x)]
    f0 = [i for i in f0 if i != 0]
    return f0

import numpy as np

def Promediador(x):
    return np.mean(x)

import pyloudnorm as pyln
import librosa 

def LoudnessPorPersona(y,sr):
    meter = pyln.Meter(sr)
    loudness = meter.integrated_loudness(y)
    return loudness

In [None]:
from pydub import AudioSegment
from pydub.silence import split_on_silence

def WordSplitter(chunk_name,path,min_silence_len,silence_thresh,chunk_storage_directory):
    sound_file = AudioSegment.from_wav(path)
    audio_chunks = split_on_silence(sound_file,min_silence_len,silence_thresh)
    for i, chunk in enumerate(audio_chunks):
        out_file = f"{chunk_storage_directory}/chunk{i}_de_{chunk_name}.wav"
        print("exporting", out_file)
        chunk.export(out_file, format="wav")

chunk_name = 'HolaComoEstasPitched2'
path = 'C:/Users/Asus/OneDrive/Escritorio/MapaDeVoces/Prototipos/Prototipo2/Audios/HolaComoEstasPitched51.wav'
chunk_storage_directory = 'C:/Users/Asus/OneDrive/Escritorio/MapaDeVoces/Prototipos/Prototipo3/AudiosPrototipo3'
min_silence_len = 11 # must be silent for at least half a second
silence_thresh=-25 # consider it silent if quieter than -16 dBFS    
WordSplitter(chunk_name,path,min_silence_len,silence_thresh)

In [None]:
import pandas as pd

def CalculadoraParametrosAcusticos(path_carpeta, nombre, cant_audios, extension, cant_basesdedatos):
    df = pd.DataFrame(columns=['Pitch', 'Loudness','Tipo'])
    for j in range(cant_basesdedatos):
        for i in range(cant_audios):
            path = f'{path_carpeta}/{nombre[j]}{i}.{extension[j]}'    
            y,sr = librosa.load(path)
            p=Promediador(PitchPorPersona(y,sr))
            l=Promediador(LoudnessPorPersona(y,sr))
            if j==0:
                variable_tipo = 'Normal' 
            elif j==1:
                variable_tipo = 'Pitched'
            df = df.append({'Pitch':p, 'Loudness':l,'Tipo':variable_tipo},ignore_index=True)
    return df

In [None]:
path_carpeta = 'C:/Users/Asus/OneDrive/Escritorio/MapaDeVoces/Prototipos/Prototipo2/Audios'
nombre = ['HolaComoEstas','HolaComoEstasPitched']
extension = ['opus','wav']
cant_audios = 56
cant_basesdedatos = 2

BaseDeDatos=CalculadoraParametrosAcusticos(path_carpeta,nombre,cant_audios,extension,cant_basesdedatos)

In [None]:
import umap
import seaborn as sns
from sklearn.preprocessing import StandardScaler

reducer = umap.UMAP()
pitch = BaseDeDatos['Pitch']
pitch = np.asarray(pitch)
pitch = pitch.reshape(-1, 1)
scaled_pitch = StandardScaler().fit_transform(pitch) # estandariza los valores

embedding = reducer.fit_transform(pitch) 

In [None]:
import plotly.express as px

fig = px.scatter(
    embedding, x=0, y=1,
    color=BaseDeDatos.Tipo, labels={'color': 'Tipo'}
)

fig.update_layout(
    autosize=True,
    width=800,
    height=500,
)
fig.show()