# Obteniendo los datos estadisticos de todos los archivos

Se calculan todas las variables estadisticas y se almacenan en un dataframe

### Importando librerias

In [1]:
import pandas as pd
from scipy import stats
from sklearn.preprocessing import MinMaxScaler
import math
import numpy as np
import matplotlib.pyplot as plt

### Importar lista con nombres de archivos

*Cambiar el nombre de la lista según el sentimiento a analizar

In [2]:
files = pd.read_csv("Neutral.csv") ### MODIFICAR EL NOMBRE

### Se crea un diccionario para almacenar las variables de cada archivo

In [3]:
data_instance = {'M1': [],
                 'M2': [],
                 'M3': [],
                 'M4': [],
                 'C2': [],
                 'C3': [],
                 'C4': [],
                 'Skewness': [],
                 'Kurtosis': [],
                 'Mean': [],
                 'Absolute_mean': [],
                 'Peak_value': [],
                 'SRV': [],
                 'RMS': [],
                 'Crest_factor': [],
                 'Shape_factor': []}

### Ciclo para leer todos los archivos de cada sentimiento

In [4]:
for a in range(int(files.count())):
    name_doc = files._get_value (a, 'Name')
    data = pd.read_csv (name_doc)

    ################# Get statistical features #################
    m1 = float (stats.moment (data, moment = 1)) #First moment 
    m2 = float (stats.moment (data, moment = 2)) #Second moment
    m3 = float (stats.moment (data, moment = 3))#Third moment
    m4 = float (stats.moment (data, moment = 4)) #Fourth moment
    sc = float (m2 - math.sqrt (pow (m1,2))) #Second cumulant
    tc = float (m3 - (3*m1*m2) - (2*pow (m1,3))) #Third cumulant
    fc = float (m4 + (3*m3*m1) - (3*pow (m2,2)) + (12*m2*pow (m1,2)) - (6*pow (m1,4))) #Fourth cumulant
    skew = float (stats.skew (data)) #Skewness
    kurt = float (stats.kurtosis (data)) #Kurtosis
    mean = float(data.mean()) #Mean
    am = abs(float(data.mean())) #Absolute mean
    pv = 0.5*(float(data.max()) - float(data.min())) #Peak value
    da = abs(data)
    dsa = da**2
    srv = float(pow((dsa.sum()/int(dsa.count())),2)) #Square root value
    d2 = data**2
    rms = math.sqrt (float(d2.sum())/int(d2.count())) #Root mean square
    cf = pv/rms #Crest factor
    sf = rms/am #Shape factor

    ################# Append features to a dictionary #################
    data_instance ['M1'].append(m1)
    data_instance ['M2'].append(m2)
    data_instance ['M3'].append(m3)
    data_instance ['M4'].append(m4)
    data_instance ['C2'].append(sc)
    data_instance ['C3'].append(tc)
    data_instance ['C4'].append(fc)
    data_instance ['Skewness'].append(skew)
    data_instance ['Kurtosis'].append(kurt)
    data_instance ['Mean'].append(mean)
    data_instance ['Absolute_mean'].append(am)
    data_instance ['Peak_value'].append(pv)
    data_instance ['SRV'].append(srv)
    data_instance ['RMS' ].append(rms)
    data_instance ['Crest_factor'].append(cf)
    data_instance ['Shape_factor'].append(sf)

    del data

### Se crea un dataframe con todos los datos estadisticos y se guarda en un .csv

In [5]:
################# Create a dataframe from dictionary #################
data_stats = pd.DataFrame(data=data_instance)
data_stats.head()

Unnamed: 0,M1,M2,M3,M4,C2,C3,C4,Skewness,Kurtosis,Mean,Absolute_mean,Peak_value,SRV,RMS,Crest_factor,Shape_factor
0,0.0,0.002597,-0.000144,6.2e-05,0.002597,-0.000144,4.2e-05,-1.090408,6.177708,0.040166,0.040166,0.302811,1.8e-05,0.064884,4.666968,1.615379
1,0.0,0.001849,-0.0001,3.6e-05,0.001849,-0.0001,2.5e-05,-1.259681,7.395431,0.040123,0.040123,0.272781,1.2e-05,0.058813,4.638147,1.4658
2,0.0,0.00321,-0.000168,0.000108,0.00321,-0.000168,7.7e-05,-0.922301,7.44462,0.040075,0.040075,0.363739,2.3e-05,0.069398,5.241352,1.731695
3,0.0,0.002659,-0.000121,7.9e-05,0.002659,-0.000121,5.7e-05,-0.882186,8.110935,0.040022,0.040022,0.343445,1.8e-05,0.065278,5.26127,1.631068
4,0.0,0.002813,-0.000184,8.4e-05,0.002813,-0.000184,6e-05,-1.2351,7.59184,0.040149,0.040149,0.342255,2e-05,0.066521,5.145078,1.65684


In [6]:
#Normalizacion
scaler = MinMaxScaler()
data_stats = pd.DataFrame (scaler.fit_transform(data_stats),columns=['M1','M2','M3','M4','C2','C3','C4','Skewness','Kurtosis','Mean','Absolute_mean','Peak_value','SRV','RMS','Crest_factor','Shape_factor'])

data_stats.to_csv (r"..\stats\Normalizated_data\Stats_neutral.csv")
data_stats.head()

Unnamed: 0,M1,M2,M3,M4,C2,C3,C4,Skewness,Kurtosis,Mean,Absolute_mean,Peak_value,SRV,RMS,Crest_factor,Shape_factor
0,0.0,0.192196,0.919003,0.02382,0.192196,0.919003,0.01978,0.659942,0.070914,0.9052,0.9052,0.275601,0.079635,0.507599,0.113357,0.023215
1,0.0,0.136169,0.943791,0.01368,0.136169,0.943791,0.012006,0.554837,0.128276,0.904194,0.904194,0.244399,0.053756,0.455386,0.108585,0.00241
2,0.0,0.238177,0.905818,0.041434,0.238177,0.905818,0.036434,0.764324,0.130593,0.903072,0.903072,0.338909,0.10422,0.546421,0.208447,0.039393
3,0.0,0.196911,0.932085,0.030253,0.196911,0.932085,0.027245,0.789231,0.16198,0.901822,0.901822,0.317822,0.081588,0.510989,0.211744,0.025397
4,0.0,0.208422,0.896528,0.032267,0.208422,0.896528,0.028532,0.5701,0.137528,0.904801,0.904801,0.316586,0.087981,0.521677,0.192509,0.028982
