<a href="https://colab.research.google.com/github/Solrak97/clasificador_de_sentimientos/blob/main/Notebooks/Analisis_preliminar.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Analisis preeliminar de los datos

## Importación de los datos desde Kaggle

In [1]:
%%capture
! pip install kaggle wavio pydub keras-metrics
! rm -rf *
! mkdir ~/.kaggle
! curl https://raw.githubusercontent.com/Solrak97/clasificador_de_sentimientos/main/kaggle.json > kaggle.json
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets download uwrfkaggler/ravdess-emotional-speech-audio
! unzip ravdess-emotional-speech-audio.zip

In [2]:
import soundfile
import numpy as np
import librosa
import glob
import os
from pydub import AudioSegment
import seaborn as sns
from matplotlib import pyplot as plt
import pandas as pd

### Transformación de los datos a las caracteristicas base del estudio Dias Issa et al.
Se extraerá un grupo de caracteristicas para analizar, hemos notado que hay diferencias en los tamaños de los audios por lo que hemos decidido revisar como funcionan correctamente los tamaños de datos y otras de estas caracteristicas.

In [3]:
INT_2_EMOTION = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}

In [None]:
name, _len, emotions, mfccs, chromas, mels, contrasts, tonnetz = [], [], [], [], [], [], [], []

for file in glob.glob("Actor_*/*.wav"):      
      file_name = os.path.basename(file)

      sound = AudioSegment.from_wav(file)
      sound = sound.set_channels(1)
      sound.export(file, format="wav")

      name_split = file_name.split("-")
      emotion = INT_2_EMOTION[name_split[2]]

      with soundfile.SoundFile(file) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        stft = np.abs(librosa.stft(X))
        mfccs.append(np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=128).T, axis=0))
        chromas.append(np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0))
        mels.append(np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0))
        contrasts.append(np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0))
        tonnetz.append(np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0))
        
        _len.append(librosa.get_duration(y=X, sr=sample_rate))         

      name.append(file_name)
      emotions.append(emotion)

data = pd.DataFrame({'File name': name, 'Emotion': emotions, 'Duration': _len,
                        'MFCC': mfccs, 'Chroma': chromas, 'Mel': mels, 'Contrast': contrasts, 'Tonnetz': tonnetz})

## Transformación de los datos utilizando PCA

In [None]:
from sklearn.decomposition import PCA

X = data.drop(cols='Emotion')
pca = PCA(2)
reduced = pca.fit_transform(X)

### Separación no supervisada como intento de visualizar los datos agrupados.

In [None]:
from sklearn.cluster import KMeans

label = KMeans(n_clusters=8, random_state=0).fit_predict(reduced)

filtered_label0 = reduced[label == 0]
filtered_label0 = reduced[label == 1]
filtered_label0 = reduced[label == 2]
filtered_label0 = reduced[label == 3]
filtered_label0 = reduced[label == 4]
filtered_label0 = reduced[label == 5]
filtered_label0 = reduced[label == 6]
filtered_label0 = reduced[label == 7]
 