In [3]:
%pip install praat-parselmouth

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [1]:
from parselmouth.praat import *
from os import listdir
import pandas as pd
import parselmouth
import numpy as np

In [2]:
def syllable_nulei_detection(file_name):
        """
        Syllable nuclei detection with a praat script.

        Args:
            file_name: path of audio file

        Returns:
            float : the syllable nuclei's onset
            int : the number of extacted syllables
        """
        print("Extracting syllable intervals from '{}'...".format(file_name))

        # Use Praat script to extract syllables
        # For each file name, we first run the Praat script, passing the desired parameters
        # This script was slightly adapted, as it used to take a directory as argument and loop
        #         over the audio files in that directory but now only takes a single file name
        #         and executes the algorithm for that file
        # As described in the script file, these parameters are: 'Silence threshold (dB)',
        #         'Minimum dip between peaks (dB)', 'Minimum pause duration', and the filename

        silence_threshold = -25
        minimum_dip_between_peaks = 2
        minimum_pause_duration = 0.3
        objects = run_file('syllable_nuclei.praat', silence_threshold, minimum_dip_between_peaks,
                           minimum_pause_duration, file_name)

        # The script selects two objects at the end, the Sound object and the TextGrid
        # These two objects are returned in a list, and now we assign the second one to the variable 'textgrid'
        textgrid = objects[1]

        # Call the Praat command "Get number of points" to query the amount of points in the first tier
        first_tier = 1
        syllable_number = call(textgrid, "Get number of points", first_tier)

        # Make a list that queries the time of the point in the TextGrid for all points 1 to n
        #         (through a Python 'list comprehension', in this case, but one could also repeatedly 'append')
        syllable_nuclei = [call(textgrid, "Get time of point", 1, i + 1) for i in range(syllable_number)]

        # Use NumPy to calculate intervals between the syllable nuclei
        # syllable_intervals = np.diff(syllable_nuclei)
        return syllable_nuclei, syllable_number

In [3]:
files = listdir('/Users/azanella/Documents/repo_git/ai-hackatech/StudioVRAI/hackatech/raw-audio')
files

['QG_20230404144514_1_3329410_3583690_9046_55482.wav',
 'QG_20230411144521_1_6466900_6730600_9952_78448.wav',
 'QG_20230516144510_1_2814980_3082510_11655_123375.wav',
 'QG_20230606143049_1_5188860_5447990_12853_98321.wav',
 'QG_20230411144521_1_3589780_3847440_131450_155750.wav',
 'QG_20230411144521_1_1799010_2112820_118429_132268.wav',
 'QG_20230404144514_1_4028770_4294050_15414_131582.wav',
 'QG_20230523144507_1_6804310_6977820_17791_82776.wav',
 'QG_20230411144521_1_276170_524080_59282_76298.wav',
 'QG_20230411144521_1_276170_524080_9514_17199.wav',
 'QG_20230516144510_1_310070_652390_7424_139276.wav',
 'QG_20230411144521_1_3060700_3328720_6341_59256.wav',
 'QG_20230523144507_1_5782600_6042070_129709_203474.wav',
 'QG_20230523144507_1_5282460_5527520_9583_121176.wav',
 'QG_20230411144521_1_6466900_6730600_88206_123137.wav',
 'QG_20230411144521_1_3589780_3847440_155941_172013.wav',
 'QG_20230411144521_1_1540700_1799010_153052_178890.wav',
 'QG_20230404144514_1_1071010_1342160_196852_

In [4]:
nb_syllabes_liste = []
pitch_modes = []
std_vitesses_syll = []

for file in files:
    syllabes_timestamps, nb_syllabes = syllable_nulei_detection(f'/Users/azanella/Downloads/raw-audio/{file}')
    nb_syllabes_liste.append(nb_syllabes)
    std_vitesses_syll.append(np.std(pd.Series(syllabes_timestamps).diff()))

    snd = parselmouth.Sound(f'/Users/azanella/Downloads/raw-audio/{file}')
    pitch = snd.to_pitch()
    pitch_values = list(pitch.selected_array['frequency'])

    pitch_values_new = [ pitch_value for pitch_value in pitch_values  if pitch_value != 0 ]

    count, values = np.histogram(pitch_values_new)
    pitch_mode = values[count.argmax()]
    pitch_modes.append(pitch_mode)

Extracting syllable intervals from '/Users/azanella/Downloads/raw-audio/QG_20230404144514_1_3329410_3583690_9046_55482.wav'...
Extracting syllable intervals from '/Users/azanella/Downloads/raw-audio/QG_20230411144521_1_6466900_6730600_9952_78448.wav'...
Extracting syllable intervals from '/Users/azanella/Downloads/raw-audio/QG_20230516144510_1_2814980_3082510_11655_123375.wav'...
Extracting syllable intervals from '/Users/azanella/Downloads/raw-audio/QG_20230606143049_1_5188860_5447990_12853_98321.wav'...
Extracting syllable intervals from '/Users/azanella/Downloads/raw-audio/QG_20230411144521_1_3589780_3847440_131450_155750.wav'...
Extracting syllable intervals from '/Users/azanella/Downloads/raw-audio/QG_20230411144521_1_1799010_2112820_118429_132268.wav'...
Extracting syllable intervals from '/Users/azanella/Downloads/raw-audio/QG_20230404144514_1_4028770_4294050_15414_131582.wav'...
Extracting syllable intervals from '/Users/azanella/Downloads/raw-audio/QG_20230523144507_1_6804310_

In [5]:
df_new = pd.DataFrame({'file_name': files, 'nb_syllabes':nb_syllabes_liste, 'pitch_mode':pitch_modes, 'std_vitesses_syll': std_vitesses_syll})
df_new

Unnamed: 0,file_name,nb_syllabes,pitch_mode,std_vitesses_syll
0,QG_20230404144514_1_3329410_3583690_9046_55482...,235,174.174541,0.136079
1,QG_20230411144521_1_6466900_6730600_9952_78448...,320,127.597881,0.145192
2,QG_20230516144510_1_2814980_3082510_11655_1233...,550,130.775636,0.176611
3,QG_20230606143049_1_5188860_5447990_12853_9832...,429,113.059435,0.130182
4,QG_20230411144521_1_3589780_3847440_131450_155...,112,154.221554,0.154159
...,...,...,...,...
140,QG_20230411144521_1_2852380_3060700_45562_5833...,61,244.723752,0.103865
141,QG_20230516144510_1_2065030_2328650_13036_1173...,443,175.997108,0.221359
142,QG_20230509144503_1_3501590_3719560_10448_1004...,384,161.094615,0.159785
143,QG_20230411144521_1_2112820_2342300_79237_9208...,56,324.933621,0.212373


In [6]:
df = pd.read_csv('/Users/azanella/Documents/repo_git/ai-hackatech/StudioVRAI/hackatech/data-csv/tchatche_split_x.csv')
df['file_name']=df['file_name'].astype(str)
df_new['file_name']=df_new['file_name'].astype(str)
df_syll = df.merge(df_new, on='file_name')
df_syll['ecart_pitch'] = df_syll['pitch_max'] - df_syll['pitch_mode']
df_syll['speed_syll'] = df_syll['nb_syllabes'] / df_syll['duration']
df_syll

Unnamed: 0,file_name,rating,duration,transcript,speed,intensity_avg,intensity_std,intensity_min,intensity_max,pitch_avg,...,mfcc1_max,mfcc2_avg,mfcc2_std,mfcc2_min,mfcc2_max,nb_syllabes,pitch_mode,std_vitesses_syll,ecart_pitch,speed_syll
0,QG_20230404144514_1_1071010_1342160_1283_32095...,7,30.811774,La parole est à Madame Sandra Rogol pour le gr...,21.420383,61.986624,8.011046,38.471184,74.344699,277.552922,...,608.373617,-189.272256,80.048660,-400.907286,53.735662,143,268.794548,0.177521,226.914867,4.641083
1,QG_20230404144514_1_1071010_1342160_39370_4386...,8,4.493384,"En fait, toutes les personnes que vos réformes...",133.529662,58.555230,11.130439,40.060172,74.633179,257.859616,...,578.667946,-193.903590,70.654590,-363.756655,-18.703706,18,248.203017,0.061662,136.583290,4.005890
2,QG_20230404144514_1_1071010_1342160_74461_8730...,6,12.838239,On évalue à 8 à 10000000 le nombre de personne...,200.962142,62.371415,9.924436,35.941441,74.467563,276.012197,...,655.406787,-203.887754,79.779320,-392.417789,47.712070,63,228.365794,0.111225,355.141322,4.907215
3,QG_20230404144514_1_1071010_1342160_124958_130...,6,5.349266,Mettre en danger sanitaire et alimentaire la p...,112.164916,61.651891,7.341411,47.242503,73.520737,270.716905,...,557.193586,-174.619983,80.597447,-343.678773,19.960022,24,274.755224,0.082811,197.418560,4.486597
4,QG_20230404144514_1_1071010_1342160_144858_182...,8,37.230893,Vous seriez un peu plus crédible dans votre in...,45.123817,63.153302,7.400139,41.816996,73.855343,223.024390,...,582.499480,-212.057757,62.273174,-432.594033,-2.517094,186,199.137773,0.132196,376.249384,4.995851
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140,QG_20230606143049_1_4131660_4401170_9172_11901...,4,109.839912,Merci Madame la Présidente. Ma question s'adre...,50.254957,55.906207,10.019248,21.307589,73.525577,169.650055,...,660.628248,-154.019409,67.813250,-394.369863,98.831707,478,128.232970,0.153074,470.348089,4.351788
141,QG_20230606143049_1_4401170_4658940_9657_10173...,3,92.078959,"Monsieur le ministre de l'Industrie, je souhai...",42.354953,53.729482,14.080871,20.863504,74.807030,227.715132,...,590.002806,-150.646592,83.299204,-422.164288,114.628467,336,202.391501,0.212840,286.429900,3.649042
142,QG_20230606143049_1_4658940_4933650_10519_1257...,1,115.240779,"Merci Madame la Présidente, Madame la Première...",41.131273,55.534139,12.236728,21.047287,73.762018,318.873015,...,560.683494,-177.151921,81.668866,-496.680142,153.284505,474,333.818563,0.220948,253.424203,4.113127
143,QG_20230606143049_1_4933650_5188860_12476_1296...,1,117.186535,Merci Madame la Présidente. Ma question s'adre...,38.912321,55.641764,12.188877,24.091451,73.166007,246.145626,...,590.255354,-176.949404,73.751276,-405.402295,83.508661,513,237.279534,0.175118,362.174797,4.377636


In [7]:
df_syll[['nb_syllabes', 'speed_syll']].describe()

Unnamed: 0,nb_syllabes,speed_syll
count,145.0,145.0
mean,265.151724,4.539818
std,183.217324,0.456407
min,17.0,3.206705
25%,106.0,4.240135
50%,202.0,4.563101
75%,437.0,4.920901
max,637.0,5.510917


In [28]:
df_syll.to_csv('tchatche_split_x_new.csv', index=False)

# Test Catboost model

In [None]:
%pip install catboost

Defaulting to user installation because normal site-packages is not writeable
Collecting catboost
  Downloading catboost-1.2.2-cp39-cp39-macosx_11_0_universal2.whl (25.8 MB)
[K     |████████████████████████████████| 25.8 MB 5.5 MB/s eta 0:00:01
Collecting plotly
  Downloading plotly-5.18.0-py3-none-any.whl (15.6 MB)
[K     |████████████████████████████████| 15.6 MB 4.5 MB/s eta 0:00:01
[?25hCollecting graphviz
  Downloading graphviz-0.20.1-py3-none-any.whl (47 kB)
[K     |████████████████████████████████| 47 kB 4.6 MB/s eta 0:00:01
Collecting tenacity>=6.2.0
  Downloading tenacity-8.2.3-py3-none-any.whl (24 kB)
Installing collected packages: tenacity, plotly, graphviz, catboost
Successfully installed catboost-1.2.2 graphviz-0.20.1 plotly-5.18.0 tenacity-8.2.3
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [36]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# drop the 'rating' column
X = df.drop(['rating', 'file_name', 'transcript'], axis=1)

# standardize the data 
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# get outlier indexes and remove
outlier_indexes = np.where((X_scaled > 3) | (X_scaled < -3))
X_clean = np.delete(X_scaled, outlier_indexes, axis=0)
rating = np.delete(df.rating.values, outlier_indexes, axis=0)


In [41]:
# split the data into training and testing sets
#X_train, X_test, y_train, y_test = train_test_split(X_clean, rating, test_size=0.2, random_state=42)

train_ratio = 0.75
validation_ratio = 0.15
test_ratio = 0.10

# train is now 75% of the entire data set
x_train, x_test, y_train, y_test = train_test_split(X_clean, rating, test_size=1 - train_ratio)

# test is now 10% of the initial data set
# validation is now 15% of the initial data set
x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=test_ratio/(test_ratio + validation_ratio)) 

print(x_train, x_val, x_test)

[[-1.14736596  1.00239229  1.12233542 ... -1.55299557  1.73239824
  -0.98788169]
 [ 1.19523759 -1.49513179 -2.38350915 ... -0.80621451  0.33277805
  -0.89734907]
 [-0.14008283 -0.21534349 -1.32496413 ...  0.14146077 -1.13116052
   0.30446745]
 ...
 [ 0.83618877 -0.97812445 -2.96285491 ...  1.41638211 -0.3960554
   1.04294285]
 [ 0.29361454 -0.38095616  0.49715186 ...  1.22007879 -0.63924351
   0.0470215 ]
 [-0.54365021  1.29163715  1.12778515 ...  0.11384804 -1.65380649
  -0.85172585]] [[-5.82585446e-01  8.56909334e-01  1.36797950e+00 -1.31624667e+00
   3.75514234e-01  1.30212410e+00  9.71616131e-01  1.38735403e+00
  -3.70928743e-01  6.71846436e-01  1.64552472e+00 -1.44951618e+00
   3.66509488e-01  3.20222452e-01 -2.97945960e-01 -1.59051385e+00
   1.13090172e+00 -6.89947171e-01 -2.54160150e-02 -2.83180275e+00
   1.22884861e+00 -1.65696621e+00]
 [-9.30521802e-01  7.19247000e-01 -8.90555624e-01  1.24064421e+00
   1.10122869e-01  8.54588168e-01 -1.49743918e-01 -5.84992973e-01
   6.2340041

In [38]:
from catboost import CatBoostClassifier, Pool, metrics, cv
from sklearn.metrics import accuracy_score

In [42]:
model = CatBoostClassifier(
    custom_loss=[metrics.Accuracy()],
    random_seed=42,
    logging_level='Silent'
)

In [43]:
model.fit(
    x_train, y_train,
    #cat_features=categorical_features_indices,
    eval_set=(x_val, y_val),
    logging_level='Verbose',  # you can uncomment this for text output
    plot=True
);

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Learning rate set to 0.102899
0:	learn: 2.0419441	test: 2.0910766	best: 2.0910766 (0)	total: 4.52ms	remaining: 4.52s
1:	learn: 2.0138400	test: 2.0901551	best: 2.0901551 (1)	total: 6.12ms	remaining: 3.05s
2:	learn: 1.9853809	test: 2.0875328	best: 2.0875328 (2)	total: 7.85ms	remaining: 2.61s
3:	learn: 1.9607222	test: 2.0928651	best: 2.0875328 (2)	total: 9.61ms	remaining: 2.39s
4:	learn: 1.9326969	test: 2.0907490	best: 2.0875328 (2)	total: 11.3ms	remaining: 2.25s
5:	learn: 1.9112627	test: 2.0879844	best: 2.0875328 (2)	total: 14.2ms	remaining: 2.35s
6:	learn: 1.8806849	test: 2.1018360	best: 2.0875328 (2)	total: 16.7ms	remaining: 2.37s
7:	learn: 1.8577666	test: 2.1080197	best: 2.0875328 (2)	total: 18.6ms	remaining: 2.31s
8:	learn: 1.8316776	test: 2.1023012	best: 2.0875328 (2)	total: 20.3ms	remaining: 2.23s
9:	learn: 1.8062506	test: 2.1010266	best: 2.0875328 (2)	total: 21.8ms	remaining: 2.16s
10:	learn: 1.7838811	test: 2.0959603	best: 2.0875328 (2)	total: 23.3ms	remaining: 2.1s
11:	learn: 1.

In [44]:
predictions = model.predict(x_test)
predictions_probs = model.predict_proba(x_test)
print(predictions[:10])
print(predictions_probs[:10])

[[1]
 [1]
 [4]
 [3]
 [3]
 [4]
 [2]
 [5]
 [5]
 [3]]
[[0.14435604 0.1210451  0.13514956 0.13125995 0.12235908 0.11567507
  0.11355803 0.11659718]
 [0.14016863 0.11833979 0.12488171 0.11835071 0.13264659 0.12131026
  0.12586023 0.11844208]
 [0.13497145 0.12163431 0.12283493 0.13657628 0.13054609 0.11929084
  0.11707873 0.11706736]
 [0.12751075 0.13158553 0.14149828 0.11745774 0.12668815 0.11749572
  0.12022829 0.11753554]
 [0.12555087 0.12710629 0.13036923 0.11949105 0.12567984 0.12583197
  0.11954995 0.1264208 ]
 [0.13670535 0.12381533 0.12150064 0.14127968 0.13170825 0.11835084
  0.11331163 0.11332827]
 [0.12366307 0.13575463 0.13365061 0.12465977 0.12671329 0.11759259
  0.12034779 0.11761824]
 [0.12905743 0.13352212 0.11863228 0.11597652 0.14616849 0.12243069
  0.11594041 0.11827206]
 [0.12905743 0.13352212 0.11863228 0.11597652 0.14616849 0.12243069
  0.11594041 0.11827206]
 [0.12609735 0.12303085 0.133625   0.11976587 0.13120313 0.12335714
  0.11985463 0.12306603]]


In [45]:
from sklearn.metrics import mean_squared_error

In [46]:
# evaluate the performance of the model using mean squared error
mse = mean_squared_error(y_test, predictions)
print('Mean squared error:', mse)

Mean squared error: 6.416666666666667
