# Obteniendo los datos estadisticos de todos los archivos

Se calculan todas las variables estadisticas y se almacenan en un dataframe

### Importando librerias

In [1]:
import pandas as pd
from scipy import stats
from sklearn.preprocessing import MinMaxScaler
import math
import numpy as np
import matplotlib.pyplot as plt

### Importar lista con nombres de archivos

*Cambiar el nombre de la lista según el sentimiento a analizar

In [2]:
files = pd.read_csv("Disgust.csv") ### MODIFICAR EL NOMBRE

### Se crea un diccionario para almacenar las variables de cada archivo

In [3]:
data_instance = {'M1': [],
                 'M2': [],
                 'M3': [],
                 'M4': [],
                 'C2': [],
                 'C3': [],
                 'C4': [],
                 'Skewness': [],
                 'Kurtosis': [],
                 'Mean': [],
                 'Absolute_mean': [],
                 'Peak_value': [],
                 'SRV': [],
                 'RMS': [],
                 'Crest_factor': [],
                 'Shape_factor': []}

### Ciclo para leer todos los archivos de cada sentimiento

In [4]:
from statistics import mean


for a in range(int(files.count())):
    name_doc = files._get_value (a, 'Name')
    data = pd.read_csv (name_doc)

    ########################################################################################################################################
    ################# Fourier #################
    raw_data = np.ravel(data.to_numpy())
    n = len(raw_data)
    yf = np.fft.fft(raw_data)       #Get fourier transform
    yfts = np.fft.fftshift(yf)
    yftsa = 1.0/n * np.abs(yfts)        #Remove complex numbers

    # Convert numpy array to DataFrame
    data = pd.DataFrame (yftsa)
    ##########################################################################################################################################



    ################# Get statistical features #################
    m1 = float (stats.moment (data, moment = 1)) #First moment 
    m2 = float (stats.moment (data, moment = 2)) #Second moment
    m3 = float (stats.moment (data, moment = 3))#Third moment
    m4 = float (stats.moment (data, moment = 4)) #Fourth moment
    sc = float (m2 - math.sqrt (pow (m1,2))) #Second cumulant
    tc = float (m3 - (3*m1*m2) - (2*pow (m1,3))) #Third cumulant
    fc = float (m4 + (3*m3*m1) - (3*pow (m2,2)) + (12*m2*pow (m1,2)) - (6*pow (m1,4))) #Fourth cumulant
    skew = float (stats.skew (data)) #Skewness
    kurt = float (stats.kurtosis (data)) #Kurtosis
    mean = float(data.mean()) #Mean
    am = abs(float(data.mean())) #Absolute mean
    pv = 0.5*(float(data.max()) - float(data.min())) #Peak value
    da = abs(data)
    dsa = da**2
    srv = float(pow((dsa.sum()/int(dsa.count())),2)) #Square root value
    d2 = data**2
    rms = math.sqrt (float(d2.sum())/int(d2.count())) #Root mean square
    cf = pv/rms #Crest factor
    sf = rms/am #Shape factor

    ################# Append features to a dictionary #################
    data_instance ['M1'].append(m1)
    data_instance ['M2'].append(m2)
    data_instance ['M3'].append(m3)
    data_instance ['M4'].append(m4)
    data_instance ['C2'].append(sc)
    data_instance ['C3'].append(tc)
    data_instance ['C4'].append(fc)
    data_instance ['Skewness'].append(skew)
    data_instance ['Kurtosis'].append(kurt)
    data_instance ['Mean'].append(mean)
    data_instance ['Absolute_mean'].append(am)
    data_instance ['Peak_value'].append(pv)
    data_instance ['SRV'].append(srv)
    data_instance ['RMS' ].append(rms)
    data_instance ['Crest_factor'].append(cf)
    data_instance ['Shape_factor'].append(sf)

    del data

### Se crea un dataframe con todos los datos estadisticos y se guarda en un .csv

In [5]:
################# Create a dataframe from dictionary #################
data_stats = pd.DataFrame(data=data_instance)
data_stats.head()

Unnamed: 0,M1,M2,M3,M4,C2,C3,C4,Skewness,Kurtosis,Mean,Absolute_mean,Peak_value,SRV,RMS,Crest_factor,Shape_factor
0,0.0,5.883702e-08,5.132178e-10,1.61463e-11,5.883702e-08,5.132178e-10,1.613591e-11,35.960505,4661.140252,3.7e-05,3.7e-05,0.02082,3.627451e-15,0.000245,84.835768,6.57944
1,0.0,4.488114e-08,4.097516e-10,1.409923e-11,4.488114e-08,4.097516e-10,1.409318e-11,43.094793,6996.508466,3.9e-05,3.9e-05,0.019738,2.152192e-15,0.000215,91.641784,5.541761
2,0.0,1.163065e-07,8.862711e-10,2.474467e-11,1.163065e-07,8.862711e-10,2.470409e-11,22.343984,1826.252573,6.1e-05,6.1e-05,0.019809,1.441866e-14,0.000347,57.165372,5.642735
3,0.0,3.796815e-08,3.850145e-10,1.280476e-11,3.796815e-08,3.850145e-10,1.280044e-11,52.041275,8879.446829,2.8e-05,2.8e-05,0.019767,1.499628e-15,0.000197,100.449828,7.152921
4,0.0,9.500534e-08,6.980549e-10,1.864465e-11,9.500534e-08,6.980549e-10,1.861757e-11,23.837873,2062.656849,5.2e-05,5.2e-05,0.019712,9.554699e-15,0.000313,63.047452,5.969768


In [6]:
#Normalizacion
scaler = MinMaxScaler()
data_stats = pd.DataFrame (scaler.fit_transform(data_stats),columns=['M1','M2','M3','M4','C2','C3','C4','Skewness','Kurtosis','Mean','Absolute_mean','Peak_value','SRV','RMS','Crest_factor','Shape_factor'])

data_stats.to_csv (r"..\stats\Normalizated_data\Stats_disgust_fourier.csv")
data_stats.head()

Unnamed: 0,M1,M2,M3,M4,C2,C3,C4,Skewness,Kurtosis,Mean,Absolute_mean,Peak_value,SRV,RMS,Crest_factor,Shape_factor
0,0.0,0.170494,0.193837,0.415023,0.170494,0.193837,0.418567,0.244377,0.16326,0.308821,0.308821,0.931139,0.02855,0.393009,0.53864,0.820908
1,0.0,0.129844,0.154754,0.362405,0.129844,0.154754,0.365578,0.305907,0.246726,0.323073,0.323073,0.881057,0.016939,0.341174,0.596027,0.496852
2,0.0,0.33789,0.334753,0.636035,0.33789,0.334753,0.640827,0.12694,0.061941,0.528243,0.528243,0.884327,0.113485,0.567551,0.305327,0.528385
3,0.0,0.109708,0.14541,0.329132,0.109708,0.14541,0.332044,0.383067,0.314023,0.219734,0.219734,0.882387,0.011802,0.309064,0.670295,1.0
4,0.0,0.275844,0.263657,0.47924,0.275844,0.263657,0.482942,0.139825,0.07039,0.445984,0.445984,0.879812,0.075202,0.509072,0.354924,0.630514
