# Imports:

In [1]:
#!/usr/bin/python
# -*- coding: utf-8 -*-

import os
import librosa
import asyncio
import numpy as np
import pandas as pd
import scipy.stats as stats
import time
import nest_asyncio
#from speechpy import feature
#import aiofiles
import time
import parselmouth

# Extract frequency features:

## Get Metadata

In [15]:
project_root =  os.path.dirname(os.path.dirname(os.getcwd()))
CVFolder = os.path.join(project_root, "CommonVoice-En")

source = os.path.join(project_root, 'CommonVoice-En', 'Full')
files = os.listdir(source)

test = pd.read_csv(os.path.join(CVFolder, "test.tsv"), delimiter="	")
test = test[(test.gender == 'male') | (test.gender == 'female')]

train = pd.read_csv(os.path.join(CVFolder, "train.tsv"), delimiter="	")
train = train[(train.gender == 'male') | (train.gender == 'female')]

validated = pd.read_csv(os.path.join(CVFolder, "validated.tsv"), delimiter="	")
validated = validated[(validated.gender == 'male') | (validated.gender == 'female')]

final = pd.concat([test, train,validated])

fem_final = final[(final.gender == 'female')]
fem_final = fem_final[:1000]
male_final = final[(final.gender == 'male')]
male_final = male_final[:1000]
final = pd.concat([fem_final, male_final])

files_without_wav = [file.replace('.wav', '') for file in files]

df = final[final['path'].isin(files_without_wav)].reset_index()
df

Unnamed: 0,index,client_id,path,sentence,up_votes,down_votes,age,gender,accent
0,8,01dc88c61ec0eb8f22ab6ee768fb1f13780f2869f70336...,2f8392c2dfa63d2a7fdfebc053f9eff02246387575e205...,We couldn't make it in a week!,2,1,fourties,female,us
1,83,0c00d476cc4b12f5c7b751412871a725f548c05315241a...,33f6f6793f00ed68593ce9560406006e7c9693faacccbb...,"I promise, I will be careful!",2,0,teens,female,other
2,340,369ebdcea5e434ee2261477f9ebf862cd5aeac002541cb...,06c73d5731753b0f9accd4b68494a7b677ba8db954c7f9...,"Vermicelli A trio, or musical piece for three ...",2,0,twenties,female,us
3,500,53def0cb62cb696817d35356fcdea8850ca5bdbfbea29d...,52b40bf55615a1a76458fafa29e1225c0bed5b7df0f3a4...,Plastic surgery has become more popular.,2,0,fourties,female,us
4,835,8f759eb1f4d42a9161c6f1235122b6ca5d5f3e9c3c383b...,48081fddb6fe56d7238739344274f98f70f4a4335f37fc...,I'm going to them.,3,0,twenties,female,england
...,...,...,...,...,...,...,...,...,...
794,970,a3e9ba570eff338be233bf5795fb62d030f535631fedac...,2afa3d37e26d4c0abccf567c24431dba4094eb3d012bea...,Some men are standing around a grill as some a...,2,0,twenties,male,us
795,973,a3e9ba570eff338be233bf5795fb62d030f535631fedac...,3382f58c3e8268ef8196df2dd123a5be9c87542f33a68f...,A woman riding a blue bike is carrying a white...,3,0,twenties,male,us
796,974,a3e9ba570eff338be233bf5795fb62d030f535631fedac...,3775fea31d0de1fe21deb3c11d363df0776d606878adb9...,Two men skateboarding in the forest.,2,0,twenties,male,us
797,975,a3e9ba570eff338be233bf5795fb62d030f535631fedac...,4ff1fdee808c24ac5caf5c020c3d8d6cca47f0b5d8e075...,Two people talking on a dock.,2,0,twenties,male,us


## Extract features:

In [30]:
async def get_frequencies(count, row, file):
    file_path = os.path.join(project_root,"CommonVoice-En", "Full", file)

    if row.gender == 'male': 
        gender = 1
    if row.gender == 'female':
        gender = 0

    audio_data = parselmouth.Sound(file_path)
    audio_data = audio_data.values[0]
    sample_rate = 16000
    
    step = int(sample_rate/5) #3200 sampling points every 1/5 sec
    window_frequencies = []

    for i in range(0,len(audio_data),step):
        ft = np.fft.fft(audio_data[i:i+step]) #fft returns the list N complex numbers
        freqs = librosa.fft_frequencies(sr=16000, n_fft=len(ft))
        freqs = np.fft.fftfreq(len(ft)) #fftq tells you the frequencies associated with the coefficients
        imax = np.argmax(np.abs(ft))
        freq = freqs[imax]
        freq_in_hz = abs(freq *sample_rate)
        window_frequencies.append(freq_in_hz)

    return window_frequencies, gender, file

In [31]:
async def get_features(count, row):
    async with sem:
        file = row.path + '.wav'
        frequencies, gender, file_name = await get_frequencies(count, row, file)

        nobs, minmax, mean, variance, skew, kurtosis =  stats.describe(frequencies)
        median   = np.median(frequencies)
        mode     = stats.mode(frequencies).mode[0]
        std      = np.std(frequencies)
        low,peak = minmax
        q75,q25  = np.percentile(frequencies, [75 ,25])
        iqr      = q75 - q25

        features_list.append([file_name, nobs, mean, skew, kurtosis, median, mode, std, low, peak, q25, q75, iqr, gender])
        print(f"\r{count}/{len(df)}", end='')

        return nobs, mean, skew, kurtosis, median, mode, std, low, peak, q25, q75, iqr

In [32]:
# #Calculo de tempo de disparo
start_time = time.time()

#inicio do Loop
loop = asyncio.get_event_loop()

#Controle de requisições por vez
sem = asyncio.Semaphore(2000)

#Array de tasks
sents = []

nest_asyncio.apply()

#Coleta as recomendações para envio
gender_list = []
file_list = []
features_list = []

for k, row in df.iterrows():
     sent = asyncio.ensure_future(get_features(count=k+1, row=row))

# done, _ = loop.run_until_complete(asyncio.wait(sents))

female
1/799female
2/799female
3/799female
4/799female
5/799female
6/799female
7/799female
8/799female
9/799female
10/799female
11/799female
12/799female
13/799female
14/799female
15/799female
16/799female
17/799female
18/799female
19/799female
20/799female
21/799female
22/799female
23/799female
24/799female
25/799female
26/799female
27/799female
28/799female
29/799female
30/799female
31/799female
32/799female
33/799female
34/799female
35/799female
36/799female
37/799female
38/799female
39/799female
40/799female
41/799female
42/799female
43/799female
44/799female
45/799female
46/799female
47/799female
48/799female
49/799female
50/799female
51/799female
52/799female
53/799female
54/799female
55/799female
56/799female
57/799female
58/799female
59/799female
60/799female
61/799female
62/799female
63/799female
64/799female
65/799female
66/799female
67/799female
68/799female
69/799female
70/799female
71/799female
72/799female
73/799female
74/799female
75/799female
76/799female
77/799female
7

In [33]:
dataframe_features = pd.DataFrame(features_list, columns = ['FileName', 'nobs', 'mean', 'skew', 'kurtosis', 'median', 'mode', 'std', 'low', 'peak', 'q25', 'q75', 'iqr', 'Gender'])
dataframe_features

Unnamed: 0,FileName,nobs,mean,skew,kurtosis,median,mode,std,low,peak,q25,q75,iqr,Gender
0,2f8392c2dfa63d2a7fdfebc053f9eff02246387575e205...,66,102.348485,4.147523,18.221901,50.0,45.0,137.119224,0.0,890.0,45.00,108.75,63.75,0
1,33f6f6793f00ed68593ce9560406006e7c9693faacccbb...,103,230.026294,2.706938,6.621953,105.0,95.0,316.039870,15.0,1660.0,85.00,180.00,95.00,0
2,06c73d5731753b0f9accd4b68494a7b677ba8db954c7f9...,103,651.933657,2.526111,6.171771,145.0,90.0,1096.607296,0.0,5115.0,90.00,795.00,705.00,0
3,52b40bf55615a1a76458fafa29e1225c0bed5b7df0f3a4...,112,337.172619,2.990574,9.371681,212.5,400.0,444.804824,0.0,2195.0,98.75,400.00,301.25,0
4,48081fddb6fe56d7238739344274f98f70f4a4335f37fc...,44,139.550395,6.348443,38.547236,75.0,70.0,351.816990,50.0,2440.0,70.00,95.00,25.00,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
794,2afa3d37e26d4c0abccf567c24431dba4094eb3d012bea...,89,179.213483,2.796121,6.124384,35.0,15.0,423.069242,0.0,1715.0,15.00,50.00,35.00,1
795,3382f58c3e8268ef8196df2dd123a5be9c87542f33a68f...,80,119.812500,4.037067,16.370266,42.5,15.0,279.340119,0.0,1575.0,20.00,70.00,50.00,1
796,3775fea31d0de1fe21deb3c11d363df0776d606878adb9...,45,201.777778,2.466445,4.207987,40.0,20.0,434.972739,0.0,1540.0,20.00,75.00,55.00,1
797,4ff1fdee808c24ac5caf5c020c3d8d6cca47f0b5d8e075...,48,78.307292,5.805409,34.608072,22.5,20.0,209.769294,0.0,1445.0,15.00,45.00,30.00,1


In [34]:
dataframe_features.to_csv('D:\dev\Speaker-Gender-Recognition\data\CommonVoice-En/Features_data.csv', index=False)

# Get MFCCs

## Get Metadata

In [2]:
project_root =  os.path.dirname(os.path.dirname(os.getcwd()))
CVFolder = os.path.join(project_root, "CommonVoice-En")

source = os.path.join(project_root, 'CommonVoice-En', 'Full')
files = os.listdir(source)

test = pd.read_csv(os.path.join(CVFolder, "test.tsv"), delimiter="	")
test = test[(test.gender == 'male') | (test.gender == 'female')]

train = pd.read_csv(os.path.join(CVFolder, "train.tsv"), delimiter="	")
train = train[(train.gender == 'male') | (train.gender == 'female')]

validated = pd.read_csv(os.path.join(CVFolder, "validated.tsv"), delimiter="	")
validated = validated[(validated.gender == 'male') | (validated.gender == 'female')]

final = pd.concat([test, train,validated])

fem_final = final[(final.gender == 'female')]
fem_final = fem_final[:1000]
male_final = final[(final.gender == 'male')]
male_final = male_final[:1000]
final = pd.concat([fem_final, male_final])

files_without_wav = [file.replace('.wav', '') for file in files]

df = final[final['path'].isin(files_without_wav)].reset_index()
df


Unnamed: 0,index,client_id,path,sentence,up_votes,down_votes,age,gender,accent
0,8,01dc88c61ec0eb8f22ab6ee768fb1f13780f2869f70336...,2f8392c2dfa63d2a7fdfebc053f9eff02246387575e205...,We couldn't make it in a week!,2,1,fourties,female,us
1,83,0c00d476cc4b12f5c7b751412871a725f548c05315241a...,33f6f6793f00ed68593ce9560406006e7c9693faacccbb...,"I promise, I will be careful!",2,0,teens,female,other
2,340,369ebdcea5e434ee2261477f9ebf862cd5aeac002541cb...,06c73d5731753b0f9accd4b68494a7b677ba8db954c7f9...,"Vermicelli A trio, or musical piece for three ...",2,0,twenties,female,us
3,500,53def0cb62cb696817d35356fcdea8850ca5bdbfbea29d...,52b40bf55615a1a76458fafa29e1225c0bed5b7df0f3a4...,Plastic surgery has become more popular.,2,0,fourties,female,us
4,835,8f759eb1f4d42a9161c6f1235122b6ca5d5f3e9c3c383b...,48081fddb6fe56d7238739344274f98f70f4a4335f37fc...,I'm going to them.,3,0,twenties,female,england
...,...,...,...,...,...,...,...,...,...
794,970,a3e9ba570eff338be233bf5795fb62d030f535631fedac...,2afa3d37e26d4c0abccf567c24431dba4094eb3d012bea...,Some men are standing around a grill as some a...,2,0,twenties,male,us
795,973,a3e9ba570eff338be233bf5795fb62d030f535631fedac...,3382f58c3e8268ef8196df2dd123a5be9c87542f33a68f...,A woman riding a blue bike is carrying a white...,3,0,twenties,male,us
796,974,a3e9ba570eff338be233bf5795fb62d030f535631fedac...,3775fea31d0de1fe21deb3c11d363df0776d606878adb9...,Two men skateboarding in the forest.,2,0,twenties,male,us
797,975,a3e9ba570eff338be233bf5795fb62d030f535631fedac...,4ff1fdee808c24ac5caf5c020c3d8d6cca47f0b5d8e075...,Two people talking on a dock.,2,0,twenties,male,us


## Extract MFCCs

In [3]:
async def extract_MFCCs(count, row):
    async with sem:
        file = row.path + '.wav'

        file_path = os.path.join(project_root,"CommonVoice-En", "Full", file)

        if row.gender == 'female':
            gender = 0
        if row.gender == 'male': 
            gender = 1
        
        audio_data = parselmouth.Sound(file_path)
        audio_data = audio_data.values[0]
        sample_rate = 16000
        
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate)
        
        mfccs_mean = list(np.mean(mfccs.T, axis= 0))
        
        sample_features = mfccs_mean
        sample_features.insert(0,str(file))
        sample_features.append(gender)
        
        print(f"\r{count}/{len(df)}",end='')
        features_list.append(sample_features)
    
        return 

In [4]:
# #Calculo de tempo de disparo
start_time = time.time()

#inicio do Loop
loop = asyncio.get_event_loop()

#Controle de requisições por vez
sem = asyncio.Semaphore(600)

#Array de tasks
sents = []

nest_asyncio.apply()

#Coleta as recomendações para envio
gender_list = []
file_list = []
features_list = []

for k, row in df.iterrows():
        sent = asyncio.ensure_future(extract_MFCCs(count=k+1, row=row))
#         sents.append(sent)
 
# done, _ = loop.run_until_complete(asyncio.wait(sents))

799/799

In [6]:
dataframe_features = pd.DataFrame(features_list, columns = ['FileName','MFCC_1','MFCC_2','MFCC_3','MFCC_4','MFCC_5',
                                                            'MFCC_6','MFCC_7','MFCC_8','MFCC_9','MFCC_10','MFCC_11',
                                                            'MFCC_12','MFCC_13','MFCC_14','MFCC_15','MFCC_16','MFCC_17',
                                                            'MFCC_18','MFCC_19','MFCC_20','Gender'])
dataframe_features

Unnamed: 0,FileName,MFCC_1,MFCC_2,MFCC_3,MFCC_4,MFCC_5,MFCC_6,MFCC_7,MFCC_8,MFCC_9,...,MFCC_12,MFCC_13,MFCC_14,MFCC_15,MFCC_16,MFCC_17,MFCC_18,MFCC_19,MFCC_20,Gender
0,2f8392c2dfa63d2a7fdfebc053f9eff02246387575e205...,-408.913274,92.802238,24.222945,21.564401,0.837400,20.131803,4.406617,8.008858,8.173683,...,2.465940,5.942938,0.313347,3.104217,-2.834497,-0.554444,1.736628,0.561705,1.665345,0
1,33f6f6793f00ed68593ce9560406006e7c9693faacccbb...,-338.503637,146.092735,1.907427,-12.908492,33.085521,33.898751,-1.440834,-11.088388,1.965735,...,-1.213857,3.551892,-5.383912,-10.387132,-1.989165,2.685330,-2.035855,-5.382838,0.166376,0
2,06c73d5731753b0f9accd4b68494a7b677ba8db954c7f9...,-455.740432,77.141277,4.925703,14.756924,13.384630,34.220768,4.420631,3.191059,10.809214,...,-2.791192,-7.731319,-8.188610,3.202001,0.105393,-7.219017,-6.251872,-3.729273,0.017557,0
3,52b40bf55615a1a76458fafa29e1225c0bed5b7df0f3a4...,-484.662055,82.932846,-5.790933,-5.898868,15.347033,3.785266,-1.394950,4.707987,0.150487,...,8.818200,1.674253,-3.287812,-7.106608,-8.278183,-2.637173,0.688595,-3.859376,-4.317977,0
4,48081fddb6fe56d7238739344274f98f70f4a4335f37fc...,-354.570978,76.553183,25.357306,11.706292,14.841730,11.357447,8.998382,9.299078,6.971776,...,1.853223,0.308829,-2.777751,-6.462977,-5.243096,-1.656487,-1.738631,-2.810475,-1.947313,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
794,2afa3d37e26d4c0abccf567c24431dba4094eb3d012bea...,-386.263630,120.371827,5.352067,28.265075,-5.130548,26.342742,5.820971,0.027935,1.957019,...,1.974348,8.326172,2.741778,7.063979,10.189332,-2.652470,-0.551666,3.843953,1.325541,1
795,3382f58c3e8268ef8196df2dd123a5be9c87542f33a68f...,-394.255766,133.201820,8.429066,41.176245,-9.144433,30.214971,0.939496,6.002862,12.723422,...,6.096548,11.915752,-10.176714,4.880812,7.115240,-3.316691,6.399162,2.597431,-1.501949,1
796,3775fea31d0de1fe21deb3c11d363df0776d606878adb9...,-396.111007,129.581651,1.125031,42.453387,-9.490710,31.725901,13.995674,-3.001004,5.215216,...,2.052305,6.906460,-4.149303,8.000287,9.208161,-8.589908,-0.724473,8.975612,1.855020,1
797,4ff1fdee808c24ac5caf5c020c3d8d6cca47f0b5d8e075...,-406.443017,102.721273,9.219459,25.019276,2.174081,22.180863,4.808151,2.660074,2.931871,...,6.019701,8.432873,-7.251087,3.890966,5.462469,-0.590436,5.690388,7.703315,2.319307,1


In [7]:
dataframe_features.to_csv('D:\dev\Speaker-Gender-Recognition\data\CommonVoice-En/MFCCs_data.csv', index=False)

# Get F0

## Extract Metadata

In [8]:
project_root =  os.path.dirname(os.path.dirname(os.getcwd()))
CVFolder = os.path.join(project_root, "CommonVoice-En")

source = os.path.join(project_root, 'CommonVoice-En', 'Full')
files = os.listdir(source)

test = pd.read_csv(os.path.join(CVFolder, "test.tsv"), delimiter="	")
test = test[(test.gender == 'male') | (test.gender == 'female')]

train = pd.read_csv(os.path.join(CVFolder, "train.tsv"), delimiter="	")
train = train[(train.gender == 'male') | (train.gender == 'female')]

validated = pd.read_csv(os.path.join(CVFolder, "validated.tsv"), delimiter="	")
validated = validated[(validated.gender == 'male') | (validated.gender == 'female')]

final = pd.concat([test, train,validated])

fem_final = final[(final.gender == 'female')]
fem_final = fem_final[:1000]
male_final = final[(final.gender == 'male')]
male_final = male_final[:1000]
final = pd.concat([fem_final, male_final])

files_without_wav = [file.replace('.wav', '') for file in files]

df = final[final['path'].isin(files_without_wav)].reset_index()
df

Unnamed: 0,index,client_id,path,sentence,up_votes,down_votes,age,gender,accent
0,8,01dc88c61ec0eb8f22ab6ee768fb1f13780f2869f70336...,2f8392c2dfa63d2a7fdfebc053f9eff02246387575e205...,We couldn't make it in a week!,2,1,fourties,female,us
1,83,0c00d476cc4b12f5c7b751412871a725f548c05315241a...,33f6f6793f00ed68593ce9560406006e7c9693faacccbb...,"I promise, I will be careful!",2,0,teens,female,other
2,340,369ebdcea5e434ee2261477f9ebf862cd5aeac002541cb...,06c73d5731753b0f9accd4b68494a7b677ba8db954c7f9...,"Vermicelli A trio, or musical piece for three ...",2,0,twenties,female,us
3,500,53def0cb62cb696817d35356fcdea8850ca5bdbfbea29d...,52b40bf55615a1a76458fafa29e1225c0bed5b7df0f3a4...,Plastic surgery has become more popular.,2,0,fourties,female,us
4,835,8f759eb1f4d42a9161c6f1235122b6ca5d5f3e9c3c383b...,48081fddb6fe56d7238739344274f98f70f4a4335f37fc...,I'm going to them.,3,0,twenties,female,england
...,...,...,...,...,...,...,...,...,...
794,970,a3e9ba570eff338be233bf5795fb62d030f535631fedac...,2afa3d37e26d4c0abccf567c24431dba4094eb3d012bea...,Some men are standing around a grill as some a...,2,0,twenties,male,us
795,973,a3e9ba570eff338be233bf5795fb62d030f535631fedac...,3382f58c3e8268ef8196df2dd123a5be9c87542f33a68f...,A woman riding a blue bike is carrying a white...,3,0,twenties,male,us
796,974,a3e9ba570eff338be233bf5795fb62d030f535631fedac...,3775fea31d0de1fe21deb3c11d363df0776d606878adb9...,Two men skateboarding in the forest.,2,0,twenties,male,us
797,975,a3e9ba570eff338be233bf5795fb62d030f535631fedac...,4ff1fdee808c24ac5caf5c020c3d8d6cca47f0b5d8e075...,Two people talking on a dock.,2,0,twenties,male,us


In [9]:
async def extract_F0(count, row):
    async with sem:
        file = row.path + '.wav'

        file_path = os.path.join(project_root,"CommonVoice-En", "Full", file)

        if row.gender == 'female':
            gender = 0
        if row.gender == 'male': 
            gender = 1

        audio_data = parselmouth.Sound(file_path)
        pitch = audio_data.to_pitch()
        pitch_values = pitch.selected_array['frequency']
        

        nobs_pitch, minmax_pitch, mean_pitch, variance_pitch, skew_pitch, kurtosis_pitch =  stats.describe(pitch_values)
        median_pitch   = np.median(pitch_values)
        mode_pitch     = stats.mode(pitch_values).mode[0]
        std_pitch      = np.std(pitch_values)
        low_pitch,peak_pitch = minmax_pitch
        q75_pitch,q25_pitch  = np.percentile(pitch_values, [75 ,25])
        iqr_pitch      = q75_pitch - q25_pitch
        
        sample_features = [nobs_pitch, mean_pitch, skew_pitch, kurtosis_pitch, median_pitch, mode_pitch, std_pitch, low_pitch, peak_pitch, q25_pitch, q75_pitch, iqr_pitch]
        sample_features.insert(0,str(file))
        sample_features.append(gender)
        
        print(f"\r{count}/{len(df)}",end='')
        features_list.append(sample_features)
    
        return 

In [10]:
# #Calculo de tempo de disparo
start_time = time.time()

#inicio do Loop
loop = asyncio.get_event_loop()

#Controle de requisições por vez
sem = asyncio.Semaphore(600)

#Array de tasks
sents = []

nest_asyncio.apply()

#Coleta as recomendações para envio
gender_list = []
file_list = []
features_list = []

for k, row in df.iterrows():
        sent = asyncio.ensure_future(extract_F0(count=k+1, row=row))
#         sents.append(sent)
 
# done, _ = loop.run_until_complete(asyncio.wait(sents))

799/799

In [13]:
dataframe_features = pd.DataFrame(features_list, columns = ['FileName', 'nobs_pitch', 'mean_pitch', 'skew_pitch', 'kurtosis_pitch',
 'median_pitch', 'mode_pitch', 'std_pitch', 'low_pitch', 'peak_pitch', 'q25_pitch', 'q75_pitch', 'iqr_pitch', 'Gender'])
dataframe_features

Unnamed: 0,FileName,nobs_pitch,mean_pitch,skew_pitch,kurtosis_pitch,median_pitch,mode_pitch,std_pitch,low_pitch,peak_pitch,q25_pitch,q75_pitch,iqr_pitch,Gender
0,2f8392c2dfa63d2a7fdfebc053f9eff02246387575e205...,431,98.615796,1.205098,-0.070656,0.000000,0.0,157.837535,0.0,501.104815,0.0,227.496311,227.496311,0
1,33f6f6793f00ed68593ce9560406006e7c9693faacccbb...,683,164.199106,-0.484600,-1.620728,221.642048,0.0,124.685260,0.0,318.332472,0.0,262.940061,262.940061,0
2,06c73d5731753b0f9accd4b68494a7b677ba8db954c7f9...,678,84.513786,1.087563,0.093838,0.000000,0.0,119.546447,0.0,595.106962,0.0,209.759810,209.759810,0
3,52b40bf55615a1a76458fafa29e1225c0bed5b7df0f3a4...,743,35.610266,1.670144,1.343756,0.000000,0.0,69.712168,0.0,257.954553,0.0,0.000000,0.000000,0
4,48081fddb6fe56d7238739344274f98f70f4a4335f37fc...,289,88.441567,0.954049,-0.673693,0.000000,0.0,125.156519,0.0,393.420613,0.0,221.361991,221.361991,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
794,2afa3d37e26d4c0abccf567c24431dba4094eb3d012bea...,585,55.816647,3.076425,16.631885,0.000000,0.0,81.748405,0.0,587.012587,0.0,112.573291,112.573291,1
795,3382f58c3e8268ef8196df2dd123a5be9c87542f33a68f...,525,60.670798,-0.080575,-1.868937,94.198815,0.0,56.521653,0.0,157.126994,0.0,110.263473,110.263473,1
796,3775fea31d0de1fe21deb3c11d363df0776d606878adb9...,292,72.324828,3.147206,12.024177,86.971694,0.0,108.377992,0.0,571.777860,0.0,112.555066,112.555066,1
797,4ff1fdee808c24ac5caf5c020c3d8d6cca47f0b5d8e075...,311,39.720633,3.035056,17.787581,0.000000,0.0,67.509698,0.0,546.320074,0.0,101.399882,101.399882,1


In [14]:
dataframe_features.to_csv('D:\dev\Speaker-Gender-Recognition\data\CommonVoice-En/F0_data.csv', index=False)