# Obteniendo los datos estadisticos de todos los archivos

Se calculan todas las variables estadisticas y se almacenan en un dataframe

### Importando librerias

In [8]:
import pandas as pd
from scipy import stats
from sklearn.preprocessing import MinMaxScaler
import math
import numpy as np
import matplotlib.pyplot as plt

### Importar lista con nombres de archivos

*Cambiar el nombre de la lista según el sentimiento a analizar

In [9]:
files = pd.read_csv("Sadness.csv") ### MODIFICAR EL NOMBRE

### Se crea un diccionario para almacenar las variables de cada archivo

In [10]:
data_instance = {'M1': [],
                 'M2': [],
                 'M3': [],
                 'M4': [],
                 'C2': [],
                 'C3': [],
                 'C4': [],
                 'Skewness': [],
                 'Kurtosis': [],
                 'Mean': [],
                 'Absolute_mean': [],
                 'Peak_value': [],
                 'SRV': [],
                 'RMS': [],
                 'Crest_factor': [],
                 'Shape_factor': []}

### Ciclo para leer todos los archivos de cada sentimiento

In [11]:
for a in range(int(files.count())):
    name_doc = files._get_value (a, 'Name')
    data = pd.read_csv (name_doc)


    ################# Get statistical features #################
    m1 = float (stats.moment (data, moment = 1)) #First moment 
    m2 = float (stats.moment (data, moment = 2)) #Second moment
    m3 = float (stats.moment (data, moment = 3))#Third moment
    m4 = float (stats.moment (data, moment = 4)) #Fourth moment
    sc = float (m2 - math.sqrt (pow (m1,2))) #Second cumulant
    tc = float (m3 - (3*m1*m2) - (2*pow (m1,3))) #Third cumulant
    fc = float (m4 + (3*m3*m1) - (3*pow (m2,2)) + (12*m2*pow (m1,2)) - (6*pow (m1,4))) #Fourth cumulant
    skew = float (stats.skew (data)) #Skewness
    kurt = float (stats.kurtosis (data)) #Kurtosis
    mean = float(data.mean()) #Mean
    am = abs(float(data.mean())) #Absolute mean
    pv = 0.5*(float(data.max()) - float(data.min())) #Peak value
    da = abs(data)
    dsa = da**2
    srv = float(pow((dsa.sum()/int(dsa.count())),2)) #Square root value
    d2 = data**2
    rms = math.sqrt (float(d2.sum())/int(d2.count())) #Root mean square
    cf = pv/rms #Crest factor
    sf = rms/am #Shape factor

    ################# Append features to a dictionary #################
    data_instance ['M1'].append(m1)
    data_instance ['M2'].append(m2)
    data_instance ['M3'].append(m3)
    data_instance ['M4'].append(m4)
    data_instance ['C2'].append(sc)
    data_instance ['C3'].append(tc)
    data_instance ['C4'].append(fc)
    data_instance ['Skewness'].append(skew)
    data_instance ['Kurtosis'].append(kurt)
    data_instance ['Mean'].append(mean)
    data_instance ['Absolute_mean'].append(am)
    data_instance ['Peak_value'].append(pv)
    data_instance ['SRV'].append(srv)
    data_instance ['RMS' ].append(rms)
    data_instance ['Crest_factor'].append(cf)
    data_instance ['Shape_factor'].append(sf)

    del data

### Se crea un dataframe con todos los datos estadisticos y se guarda en un .csv

In [12]:
################# Create a dataframe from dictionary #################
data_stats = pd.DataFrame(data=data_instance)
data_stats.head()

Unnamed: 0,M1,M2,M3,M4,C2,C3,C4,Skewness,Kurtosis,Mean,Absolute_mean,Peak_value,SRV,RMS,Crest_factor,Shape_factor
0,0.0,0.007192,-0.000337,0.000433,0.007192,-0.000337,0.000278,-0.552879,5.379802,0.042016,0.042016,0.562622,8e-05,0.094642,5.944733,2.252542
1,0.0,0.007149,-0.000286,0.000446,0.007149,-0.000286,0.000293,-0.47242,5.730756,0.039334,0.039334,0.62175,7.6e-05,0.093251,6.667509,2.370771
2,0.0,0.006408,-0.000449,0.000377,0.006408,-0.000449,0.000253,-0.874603,6.168603,0.039333,0.039333,0.502914,6.3e-05,0.089194,5.638448,2.267662
3,0.0,0.007653,-0.000385,0.000562,0.007653,-0.000385,0.000386,-0.575592,6.595243,0.039395,0.039395,0.634842,8.5e-05,0.095942,6.616965,2.43538
4,0.0,0.01016,-0.000465,0.000579,0.01016,-0.000465,0.000269,-0.453654,2.609187,0.039728,0.039728,0.491257,0.000138,0.108345,4.534186,2.72716


In [13]:
#Normalizacion
scaler = MinMaxScaler()
data_stats = pd.DataFrame (scaler.fit_transform(data_stats),columns=['M1','M2','M3','M4','C2','C3','C4','Skewness','Kurtosis','Mean','Absolute_mean','Peak_value','SRV','RMS','Crest_factor','Shape_factor'])

data_stats.to_csv (r"..\stats\Normalizated_data\Stats_sadness.csv")
data_stats.head()

Unnamed: 0,M1,M2,M3,M4,C2,C3,C4,Skewness,Kurtosis,Mean,Absolute_mean,Peak_value,SRV,RMS,Crest_factor,Shape_factor
0,0.0,0.245664,0.922985,0.032549,0.245664,0.922985,0.024313,0.718807,0.176005,0.957481,0.957481,0.550497,0.084855,0.526463,0.467262,0.079897
1,0.0,0.244182,0.934787,0.033506,0.244182,0.934787,0.025588,0.776631,0.198299,0.89428,0.89428,0.611266,0.079974,0.5183,0.604761,0.091384
2,0.0,0.218823,0.897519,0.028277,0.218823,0.897519,0.022135,0.487594,0.226114,0.894266,0.894266,0.489132,0.066939,0.494497,0.408995,0.081366
3,0.0,0.26146,0.911988,0.042202,0.26146,0.911988,0.033749,0.702484,0.253217,0.895727,0.895727,0.624722,0.089612,0.534087,0.595146,0.097661
4,0.0,0.347377,0.893881,0.043485,0.347377,0.893881,0.023535,0.790117,0.0,0.90358,0.90358,0.477151,0.14574,0.606859,0.198922,0.12601
