In [2]:
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

### Create dataframe TORGO dysarthric

In [2]:
def dislevel (spk):
    if spk=='F01' or spk=='M01' or spk=='M02' or spk=='M04' or spk=='M05' :
        dis='Sev-Mod-Sev'
        disid='0'
    elif spk=='F03' or spk=='F04' or spk=='M03' :
        dis='Mod-Mild'
        disid='1'
    return dis, disid
    
def spkID (spk):
    speaker=['F01', 'F03', 'F04', 'M01', 'M02', 'M03', 'M04', 'M05']
    if spk in speaker:
        id=speaker.index(spk)
        return id
    else:
        raise Exception(f'speaker non esiste')  

In [None]:
# create a dataframe with all data from TORGO dysarthric

df_original = pd.DataFrame(columns=['filename', 'speaker', 'id','disartria', 'label', 'block','text','split', 'durata'])
file_path='TTDS/dataset/output/filelists/TORGO/TORGO_split.csv'
df = pd.read_csv(file_path)
df= df[df['corpus'] != 'TORGO_control']
speaker=df['speaker'].unique()
blocks=df['block'].unique()

i=0

for spk in speaker:
    df_speaker=df[df['speaker']==spk]
    df_speaker.head()
    for block in blocks:
        df_speaker_block=df_speaker[df_speaker['block']==block] 
        for index, row in df_speaker_block.iterrows():
            wav=row.iloc[0]
            speaker = wav.split('/')[6]
            id_spk=spkID(speaker)
            split=''
            dis, disid=dislevel(speaker)
            block=row.iloc[3]
            text=row.iloc[4]
            durata= row.iloc[7]
            df_original.loc[i]=[wav,speaker,id_spk,dis,disid,block,text,split,durata]
            i+=1

df_original.to_csv('original_speech.csv', index=False)

['B1' 'B2' 'B3']
['TORGO_F01' 'TORGO_F03' 'TORGO_F04' 'TORGO_M01' 'TORGO_M02' 'TORGO_M03'
 'TORGO_M04' 'TORGO_M05']


In [4]:
df_original.head()

Unnamed: 0,filename,speaker,id,disartria,label,block,text,split,durata
0,/home/tbasili/TTDS/dataset/TORGO_DIR/F01/Sessi...,F01,0,Sev-Mod-Sev,0,B1,STICK,,1.8
1,/home/tbasili/TTDS/dataset/TORGO_DIR/F01/Sessi...,F01,0,Sev-Mod-Sev,0,B1,PAT,,1.95
2,/home/tbasili/TTDS/dataset/TORGO_DIR/F01/Sessi...,F01,0,Sev-Mod-Sev,0,B1,UP,,1.95
3,/home/tbasili/TTDS/dataset/TORGO_DIR/F01/Sessi...,F01,0,Sev-Mod-Sev,0,B1,MEAT,,0.75
4,/home/tbasili/TTDS/dataset/TORGO_DIR/F01/Sessi...,F01,0,Sev-Mod-Sev,0,B1,MEAT,,1.95


#### Add column to the original dataframe to split between phrases and word

In [5]:
filepath = '/home/tbasili/TTDS/ResNet/dataset_analysis/original_speech.csv'
df = pd.read_csv(filepath, sep=',')
# df.head(25)
id_speaker = df['speaker'].unique() #list with unique speakers id
print(id_speaker)

['F01' 'F03' 'F04' 'M01' 'M02' 'M03' 'M04' 'M05']


In [None]:
#classification between word and phrase
tot = [] 
for n in range(len(id_speaker)):
    df_speaker_prompt = df[df['speaker'] == id_speaker[n]]
    for text in df_speaker_prompt['text']:
        if ' ' in text:
            tot.append('phrase')
        else: 
            tot.append('word')
df['type'] = tot       
print(len(tot))

In [None]:
df.to_csv('original_speech_v2.csv', sep=',', index=False) # save the dataframe with the new column

#### Now we check some statistics

In [None]:
new_df = pd.read_csv('original_speech_v2.csv', sep=',')

new_id_speaker = new_df['speaker'].unique() #list with unique speakers id

# single_patient_df = new_df[new_df['speaker'] == new_id_speaker[0]] #------> chenge index to switch patient
single_patient_df = new_df
print(len(single_patient_df))
single_patient_df.head(25)



In [None]:
single_patient_df['type'].value_counts().plot(kind='bar')
plt.title('Word vs Phrase')
plt.xlabel('Type')
plt.ylabel('Count')
print(single_patient_df['type'].value_counts())

In [None]:
df_phrase = single_patient_df[single_patient_df['type'] == 'phrase']
df_word = single_patient_df[single_patient_df['type'] == 'word']

In [None]:
mean_phrase = df_phrase['durata'].mean()
print('durata_mean_phrase', mean_phrase)
mean_word = df_word['durata'].mean()
print('durata_mean_word', mean_word)
max_d_phrase = df_phrase['durata'].max() #---> idxmax ritorna l'indice del valore massimo
print('max_d_phrase', max_d_phrase)
max_d_word = df_word['durata'].max()
print('max_d_word', max_d_word)
min_d_phrase = df_phrase['durata'].min()
print('min_d_phrase', min_d_phrase)
min_d_word = df_word['durata'].min()
print('min_d_word', min_d_word)
plt.figure(figsize=(10, 5))
df_phrase['durata'].hist(bins=50, alpha=0.5, color='blue', label='Phrase')
plt.figure(figsize=(10, 5))
df_word['durata'].hist(bins= 50, alpha=0.5, color='red', label='Word')


In [11]:
df_phrase.head()

Unnamed: 0,filename,speaker,id,disartria,label,block,text,split,durata,type
6,/home/tbasili/TTDS/dataset/TORGO_DIR/F01/Sessi...,F01,0,Sev-Mod-Sev,0,B1,HE SLOWLY TAKES A SHORT WALK IN THE OPEN AIR E...,,6.9,phrase
13,/home/tbasili/TTDS/dataset/TORGO_DIR/F01/Sessi...,F01,0,Sev-Mod-Sev,0,B1,YOU WISHED TO KNOW ALL ABOUT MY GRANDFATHER,,5.85,phrase
18,/home/tbasili/TTDS/dataset/TORGO_DIR/F01/Sessi...,F01,0,Sev-Mod-Sev,0,B1,THE QUICK BROWN FOX JUMPS OVER THE LAZY DOG,,4.95,phrase
19,/home/tbasili/TTDS/dataset/TORGO_DIR/F01/Sessi...,F01,0,Sev-Mod-Sev,0,B1,SHE HAD YOUR DARK SUIT IN GREASY WASH WATER AL...,,7.95,phrase
22,/home/tbasili/TTDS/dataset/TORGO_DIR/F01/Sessi...,F01,0,Sev-Mod-Sev,0,B1,GIVING THOSE WHO OBSERVE HIM A PRONOUNCED FEEL...,,12.6,phrase


### Create dataframe with all synthetic data

In [None]:
def dislevel (spk):
    if spk=='F01' or spk=='M01' or spk=='M02' or spk=='M04' or spk=='M05' :
        dis='Sev-Mod-Sev'
        disid='0'
    elif spk=='F03' or spk=='F04' or spk=='M03' :
        dis='Mod-Mild'
        disid='1'
    return dis, disid
    
def spkID (spk):
    speaker=['F01', 'F03', 'F04', 'M01', 'M02', 'M03', 'M04', 'M05']
    if spk in speaker:
        id=speaker.index(spk)
        return id
    else:
        raise Exception(f'speaker non esiste')  
    

def get_text(ref_path,speaker,block,flag):
    labelpath=os.path.join(ref_path,f"TORGO_{speaker}", f"{block}_labels.txt")
    with open(labelpath, 'r') as f:
        lines=f.readlines()
    label=lines[flag].strip()
    return label

In [None]:
syn_dir='/TTDS/dataset/output/Grad-TTS_TORGO/HFGN_inference/TORGO_all' 
ref_path='/TTDS/dataset/output/filelists/speechdiff/TORGO'

df_syn = pd.DataFrame(columns=['filename', 'speaker', 'id','disartria', 'label', 'block','text','split', 'durata'])
speaker=['F01','F03','F04','M01','M02','M03', 'M04','M05']
blocks=['B1','B2','B3']
i=0
for s in speaker:
    dis, disid= dislevel(s)
    id= spkID(s)
    for block in blocks:
        flag=0
        file= [f for f in os.listdir(os.path.join(syn_dir,f"TORGO_{s}",block,"inference_files"))]
        file.sort(key=lambda x: int(os.path.splitext(x)[0]))
        for f in file:
            i+=1
            txt=get_text(ref_path,s,block,flag)
            flag+=1
            split=''
            durata=librosa.get_duration(path=os.path.join(syn_dir,f"TORGO_{s}",block,"inference_files",f))
            df_syn.loc[i] = [os.path.join(syn_dir,f"TORGO_{s}",block,"inference_files", f), s, id, dis, disid, block, txt, split, durata]

df_syn.to_csv('/home/tbasili/TTDS/ResNet/dataset_analysis/synthetic_speech.csv', index=False)



#### Add column to the synthetic speech dataframe to split between phrases and word

In [None]:
df_syn=pd.read_csv('TTDS/ResNet/dataset_analysis/synthetic_speech.csv',sep=',')
id_speaker = df_syn['speaker'].unique() #list with unique speakers id
print('Speaker: ', id_speaker)

tot = [] 
for n in range(len(id_speaker)):
    df_speaker_prompt = df_syn[df_syn['speaker'] == id_speaker[n]]
    for text in df_speaker_prompt['text']:
        if ' ' in text:
            tot.append('phrase')
        else: 
            tot.append('word')
df_syn['type'] = tot       
print('Totale audio sintetizzati: ', len(tot))

df_syn.head(10)

In [None]:
df_syn.to_csv('/home/tbasili/TTDS/ResNet/dataset_analysis/synthetic_speech_v2.csv', sep=',', index=False) # save the dataframe with the new column

#### Analysis

In [None]:
df_syn['type'].value_counts().plot(kind='bar')
plt.title('Word vs Phrase')
plt.xlabel('Type')
plt.ylabel('Count')
print(df_syn['type'].value_counts())

df_syn_phrase = df_syn[df_syn['type'] == 'phrase']
df_syn_word = df_syn[df_syn['type'] == 'word']

mean_phrase = df_syn_phrase['durata'].mean()
print('mean_d_phrase', mean_phrase)
mean_word = df_syn_word['durata'].mean()
print('mean_d_word', mean_word)
max_d_phrase = df_syn_phrase['durata'].max() #---> idxmax ritorna l'indice del valore massimo
print('max_d_phrase', max_d_phrase)
max_d_word = df_syn_word['durata'].max()
print('max_d_word', max_d_word)
min_d_phrase = df_syn_phrase['durata'].min()
print('min_d_phrase', min_d_phrase)
min_d_word = df_syn_word['durata'].min()
print('min_d_word', min_d_word)
plt.figure(figsize=(10, 5))
df_syn_phrase['durata'].hist(bins=50, alpha=0.5, color='blue', label='Phrase')
plt.figure(figsize=(10, 5))
df_syn_word['durata'].hist(bins= 50, alpha=0.5, color='red', label='Word')

### VAD example

In [None]:
from IPython.display import Audio, display
import librosa
import numpy as np
from vad import EnergyVAD #https://pypi.org/project/vad/
import matplotlib.pyplot as plt
import os
#from extract_windows import extract_non_overlapped_windows
import noisereduce as nr

In [None]:
audio_filepath=df_phrase['filename'].iloc[1002]
text=df_phrase['text'].iloc[1002]
print('Transcription: ', text)
print('File audio: ', audio_filepath)
sampling_rate = 16000
audio, sr = librosa.load(audio_filepath, sr=sampling_rate)
print(f'Audio file length: {len(audio)} samples at {sr} Hz')
sound = Audio(audio, rate=sampling_rate)
display(sound)

In [None]:
# normalize

def normalize_audio(audio):
    max_val = np.max(np.abs(audio))
    if max_val > 0:
        return audio / max_val
    return audio

In [None]:
audio=normalize_audio(audio)

segnale = audio[-500:]
energia = np.sum(segnale**2)
thr = energia + 0.065
frame_len = 50
shift = 20
print(f'Signal energy: {energia}')
print(f'VAD threshold: {thr}')

vad = EnergyVAD(
    sample_rate = sampling_rate,
    frame_length = frame_len, # in millesecondi 
    frame_shift = shift, # in milliseconds 20
    energy_threshold = thr, # you may need to adjust this value
    pre_emphasis = 0.95,
) 

voice_activity = vad(audio)

frame_len_samples = int(sampling_rate * frame_len //1000)
frame_shift_samples = int(sampling_rate * shift // 1000 )


### PLOT ###
plt.figure(figsize=(10, 5))
#plt.subplot(2,1,1)
plt.plot(audio)
plt.plot(np.repeat(voice_activity, frame_shift_samples ))
#plt.subplot(2,1,2)
#plt.plot(voice_activity,"r")
# plt.savefig("vad_output.png")
plt.show()

In [None]:
# return the start and end sample of each audio segment

def start_end(audio, activity):
    segments = []
    in_segment = False

    for i, val in enumerate(activity):
        if val == 1 and not in_segment:
            start = i * frame_shift_samples
            in_segment = True
        elif val == 0 and in_segment:
            end = i * frame_shift_samples + frame_len_samples
            segments.append((start, end))
            in_segment = False

    # Se il parlato arriva fino alla fine
    if in_segment:
        end = len(audio)
        segments.append((start, end))

    return segments



In [None]:
import soundfile as sf
segments = []
in_segment = False

for i, val in enumerate(voice_activity):
    if val == 1 and not in_segment:
        start = i * frame_shift_samples
        in_segment = True
    elif val == 0 and in_segment:
        end = i * frame_shift_samples + frame_len_samples
        segments.append((start, end))
        in_segment = False

# Se il parlato arriva fino alla fine
if in_segment:
    end = len(audio)
    segments.append((start, end))

# Crea una cartella di output
output_dir = ""
os.makedirs(output_dir, exist_ok=True)

# Save segment
for idx, (start, end) in enumerate(segments):
    segment_audio = audio[start:end]
    energy_seg=np.sum(np.square(segment_audio))
    print(energy_seg)
    #if energy_seg>=3:
    output_path = os.path.join(output_dir, f"segment_{idx+1}.wav")
    sf.write(output_path, segment_audio, samplerate=sampling_rate)
    print(f"Salvato: {output_path} ({end - start} samples)")

### Segment all phrases from TORGO dysarthric

In [212]:
df_phrase.head(1306)

Unnamed: 0,filename,speaker,id,disartria,label,block,text,split,durata,type
6,/home/tbasili/TTDS/dataset/TORGO_DIR/F01/Sessi...,F01,0,Sev-Mod-Sev,0,B1,HE SLOWLY TAKES A SHORT WALK IN THE OPEN AIR E...,,6.900,phrase
13,/home/tbasili/TTDS/dataset/TORGO_DIR/F01/Sessi...,F01,0,Sev-Mod-Sev,0,B1,YOU WISHED TO KNOW ALL ABOUT MY GRANDFATHER,,5.850,phrase
18,/home/tbasili/TTDS/dataset/TORGO_DIR/F01/Sessi...,F01,0,Sev-Mod-Sev,0,B1,THE QUICK BROWN FOX JUMPS OVER THE LAZY DOG,,4.950,phrase
19,/home/tbasili/TTDS/dataset/TORGO_DIR/F01/Sessi...,F01,0,Sev-Mod-Sev,0,B1,SHE HAD YOUR DARK SUIT IN GREASY WASH WATER AL...,,7.950,phrase
22,/home/tbasili/TTDS/dataset/TORGO_DIR/F01/Sessi...,F01,0,Sev-Mod-Sev,0,B1,GIVING THOSE WHO OBSERVE HIM A PRONOUNCED FEEL...,,12.600,phrase
...,...,...,...,...,...,...,...,...,...,...
5528,/home/tbasili/TTDS/dataset/TORGO_DIR/M05/Sessi...,M05,7,Sev-Mod-Sev,0,B3,YOU'D BE BETTER OFF TAKING A COLD SHOWER,,10.570,phrase
5538,/home/tbasili/TTDS/dataset/TORGO_DIR/M05/Sessi...,M05,7,Sev-Mod-Sev,0,B3,I HAVE HAD MY BELL RUNG,,7.365,phrase
5541,/home/tbasili/TTDS/dataset/TORGO_DIR/M05/Sessi...,M05,7,Sev-Mod-Sev,0,B3,STUDENTS WATCHED AS HE GOT OUT,,8.325,phrase
5548,/home/tbasili/TTDS/dataset/TORGO_DIR/M05/Sessi...,M05,7,Sev-Mod-Sev,0,B3,BEG THAT GUARD FOR ONE GALLON OF GAS,,9.635,phrase


In [None]:
# return identification code 

def codice(filepath):
    parti=os.path.split(filepath)
    tag1=parti[1].split('.')[0]
    tag2=parti[0].split('/')
    stringa=f'{tag2[5]}_{tag2[6]}_{tag2[7]}_{tag2[8]}_{tag1}'
    return stringa


In [None]:
df_phrase_segment= pd.DataFrame(columns=['filename', 'speaker', 'id','disartria', 'label', 'block', 'text', 'split',  'durata', 'type'])

sampling_rate = 16000
frame_len = 50
shift=20
#energia=0.020
frame_len_samples = int(sampling_rate * frame_len //1000)
frame_shift_samples = int(sampling_rate * shift // 1000 )

# create folder where to save the audio files
output_dir = ""         
os.makedirs(output_dir, exist_ok=True)

row=0

for i in range(len(df_phrase)):

    audio_filepath=df_phrase['filename'].iloc[i]
    cod=codice(audio_filepath) 
    speaker=df_phrase['speaker'].iloc[i]

    # create subdirectory for each speaker
    speaker_dir=os.path.join(output_dir, speaker)
    if os.path.isdir(speaker_dir):
        print('Esiste')
    else:
       os.makedirs(speaker_dir, exist_ok=True) 
    
    id_spk=df_phrase['id'].iloc[i]
    disartria=df_phrase['disartria'].iloc[i]
    label=df_phrase['label'].iloc[i]
    
    
    print(f'Segmenting audio: {audio_filepath}') 
    audio, sr = librosa.load(audio_filepath, sr=sampling_rate)
    audio=normalize_audio(audio)
    segnale = audio[-500:]
    energia=np.sum(segnale**2)
    thr = energia+0.065

    vad = EnergyVAD(
    sample_rate = sampling_rate,
    frame_length = frame_len, # in millesecondi 
    frame_shift = shift, # in milliseconds 
    energy_threshold = thr, 
    pre_emphasis = 0.95,
    ) 

    voice_activity = vad(audio)
    start_end_segment=start_end(audio, voice_activity)
    

    for idx, (start, end) in enumerate(start_end_segment):
        row+=1
        segment_audio = audio[start:end]
        path_segment = os.path.join(speaker_dir, f"{cod}_{idx+1}.wav")
        sf.write(path_segment, segment_audio, samplerate=sampling_rate)
        durata=librosa.get_duration(path=path_segment)
        df_phrase_segment.loc[row]=[path_segment, speaker, id_spk, disartria, label, '' , '', '', durata, 'segment' ]
    

In [215]:
df_phrase_segment.to_csv('/home/tbasili/TTDS/ResNet/dataset_analysis/df_segment.csv', sep=',', index=False)

In [None]:
mean_segment = df_phrase_segment['durata'].mean()
print('Durata media di un segmento', mean_segment)

max_d_segment = df_phrase_segment['durata'].max() #---> idxmax ritorna l'indice del valore massimo
print('Durata massima di un segmento', max_d_segment)

min_d_segment = df_phrase_segment['durata'].min()
print('Durata minima di un segmento', min_d_segment)

print('Totale file audio', len(df_phrase_segment))


Durata media di un segmento 0.4940222004959939
Durata massima di un segmento 22.53
Durata minima di un segmento 0.06
10484


In [None]:
df_phrase_segment_clear=df_phrase_segment[df_phrase_segment['durata'] >=0.15]
df_phrase_segment_clear.head()
mean_segment = df_phrase_segment_clear['durata'].mean()
print('Durata media di un segmento', mean_segment)

max_d_segment = df_phrase_segment_clear['durata'].max() 
print('Durata massima di un segmento', max_d_segment)

min_d_segment = df_phrase_segment_clear['durata'].min()
print('Durata minima di un segmento', min_d_segment)

df_phrase_segment_clear.to_csv('df_segment_clear.csv', sep=',', index=False)

print('File rimossi:', len(df_phrase_segment_clear)-len(df_phrase_segment_clear))


In [None]:
df_word_and_segment= pd.concat([df_word, df_phrase_segment], ignore_index=True)
print(len(df_word_and_segment))

df_word_and_segment=df_word_and_segment[df_word_and_segment['durata']<=5]
print(len(df_word_and_segment))

df_word_and_segment=df_word_and_segment[df_word_and_segment['durata']>=0.15]
print('Totale file audio: ',len(df_word_and_segment))

#print('Analisi del dataframe contente parole e frasi segmentate')
mean = df_word_and_segment['durata'].mean()
print('Durata media di un audio: ', mean)

max = df_word_and_segment['durata'].max() 
print('Durata massima di un audio: ', max)

min = df_word_and_segment['durata'].min()
print('Durata minima di un audio: ', min)

plt.figure(figsize=(10, 5))
df_word_and_segment['durata'].hist(bins= 50, alpha=0.5, color='red', label='Word')

#df_word_and_segment.to_csv('df_word_and_segment.csv', sep=',', index=False)

### Segment all phrases from synthetic dataset

In [None]:
def codice(filepath):
    parti=os.path.split(filepath)
    tag1=parti[1].split('.')[0]
    tag2=parti[0].split('/')
    stringa=f'{tag2[6]}_{tag2[9]}_{tag2[10]}_{tag1}'
    return stringa

In [None]:
sampling_rate = 16000
df_syn_phrase_segment=df = pd.DataFrame(columns=['filename', 'speaker', 'id','disartria', 'label', 'block', 'text', 'split',  'durata', 'type'])
frame_len = 50
shift=20
frame_len_samples = int(sampling_rate * frame_len //1000)
frame_shift_samples = int(sampling_rate * shift // 1000 )

output_dir = ""
os.makedirs(output_dir, exist_ok=True)

valore=0

for i in range(len(df_syn_phrase)):
    print(i)

    audio_filepath=df_syn_phrase['filename'].iloc[i]
    cod=codice(audio_filepath) # codice identificativo audio sorgente 
    speaker=df_syn_phrase['speaker'].iloc[i]

    # creo la subdirectory per ogni speaker
    speaker_dir=os.path.join(output_dir, speaker)
    if os.path.isdir(speaker_dir):
        print('Esiste')
    else:
       os.makedirs(speaker_dir, exist_ok=True) 
    
    id_spk=df_syn_phrase['id'].iloc[i]
    disartria=df_syn_phrase['disartria'].iloc[i]
    label=df_syn_phrase['label'].iloc[i]
    block=''
    text=''
    split=''
    
    print(f'Segmenting audio: {audio_filepath}') 
    audio, sr = librosa.load(audio_filepath, sr=sampling_rate)
    audio=normalize_audio(audio)
    segnale = audio[-500:]
    energia=np.sum(segnale**2)
    thr = energia+0.0655 

    vad = EnergyVAD(
    sample_rate = sampling_rate,
    frame_length = frame_len, # in millesecondi 
    frame_shift = shift, # in milliseconds 20
    energy_threshold = thr, # you may need to adjust this value
    pre_emphasis = 0.95,
    ) 

    voice_activity = vad(audio)
    start_end_segment=start_end(audio,voice_activity)
    

    for idx, (start, end) in enumerate(start_end_segment):
        valore+=1
        segment_audio = audio[start:end]
        path_segment = os.path.join(speaker_dir, f"{cod}_{idx+1}.wav")
        sf.write(path_segment, segment_audio, samplerate=sampling_rate)
        durata=librosa.get_duration(path=path_segment)
        df_syn_phrase_segment.loc[valore]=[path_segment, speaker, id_spk, disartria, label, block , text , split , durata, 'segment' ]
    

In [None]:
df_syn_phrase_segment.to_csv('df_syn_segment.csv', sep=',', index=False)

In [None]:
mean_segment = df_syn_phrase_segment['durata'].mean()
print('Durata media di un segmento', mean_segment)

max_d_segment = df_syn_phrase_segment['durata'].max() 
print('Durata massima di un segmento', max_d_segment)

min_d_segment = df_syn_phrase_segment['durata'].min()
print('Durata minima di un segmento', min_d_segment)

print(len(df_syn_phrase_segment))

In [None]:
df_syn_word_and_segment= pd.concat([df_syn_word, df_syn_phrase_segment], ignore_index=True)
print('totale file (word and segment): ',len(df_syn_word_and_segment))

df_syn_word_and_segment=df_syn_word_and_segment[df_syn_word_and_segment['durata']<=5]
print('totale file con durata minore di 6s: ', len(df_syn_word_and_segment))

df_syn_word_and_segment=df_syn_word_and_segment[df_syn_word_and_segment['durata']>=0.15]
print('totale file con durata minore di 6s e maggiore di 0.15s: ',len(df_syn_word_and_segment))

print('Analisi del dataframe contente parole e frasi segmentate')
mean = df_syn_word_and_segment['durata'].mean()
print('Durata media di un audio', mean)

max = df_syn_word_and_segment['durata'].max() 
print('Durata massima di un audio', max)

min = df_syn_word_and_segment['durata'].min()
print('Durata minima di un audio', min)

plt.figure(figsize=(10, 5))
df_syn_word_and_segment['durata'].hist(bins= 50, alpha=0.5, color='red', label='Word')

df_syn_word_and_segment.to_csv('/home/tbasili/TTDS/ResNet/dataset_analysis/df_syn_word_and_segment.csv', sep=',', index=False)