In [None]:
import os
import numpy as np
import librosa
import pandas as pd
from scipy import stats
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import (
    Conv1D, BatchNormalization, MaxPooling1D,
    Bidirectional, LSTM, Dropout, Dense, Lambda, Layer, Masking
)
from tensorflow.keras.regularizers import l2
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from hmmlearn import hmm
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
import random
import warnings
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
"""
Estrazione e Salvataggio:
 1) Feature globali (966) per Utterance -> emovo_features.xlsx
 2) Feature locali (23 LLD + 23 Δ) per Frame -> emovo_frame_features.xlsx
"""

if not hasattr(np, 'complex'): np.complex = complex
if not hasattr(np, 'float'):   np.float   = float

# Configurazione
AUDIO_DIR = #...
SAVE_DIR  = #...
OUT_GLOBAL  = "emovo_global_features.xlsx"
OUT_LOCAL  = "emovo_local_features.xlsx"
# 32 ms @16 kHz -> 512 campioni; hop 8 ms -> 128 campioni
WIN_LENGTH = 512
HOP_LENGTH = 128

emo_map = {
    "dis": "Disgust",
    "gio": "Happiness",
    "pau": "Fear",
    "rab": "Anger",
    "sor": "Surprise",
    "tri": "Sadness",
    "neu": "Neutral"
}

def compute_stats(vec):
    #Calcolo delle 21 statistiche su un vettore 1-D.
    n = len(vec)
    t = np.arange(n)
    a, b = np.polyfit(t, vec, 1)
    pred = a*t + b
    err = vec - pred
    q25, q50, q75 = np.percentile(vec, [25,50,75])
    vmin,vmax = vec.min(), vec.max()
    thr75 = vmin + 0.75*(vmax-vmin)
    thr90 = vmin + 0.90*(vmax-vmin)
    return {
        "maxPos": np.argmax(vec)/n,
        "minPos": np.argmin(vec)/n,
        "amean":  vec.mean(),
        "stddev": vec.std(),
        "linregc1": a,
        "linregc2": b,
        "linregerrA": np.mean(np.abs(err)),
        "linregerrQ": np.mean(err**2),
        "quartile1": q25,
        "quartile2": q50,
        "quartile3": q75,
        "kurtosis": stats.kurtosis(vec,fisher=True,bias=False),
        "skewness": stats.skew(vec,bias=False),
        "iqr1_2": q50-q25,
        "iqr2_3": q75-q50,
        "iqr1_3": q75-q25,
        "percentile1":  np.percentile(vec,1),
        "percentile99": np.percentile(vec,99),
        "pctlrange0_1": np.percentile(vec,1)-vmin,
        "upleveltime75": np.mean(vec>thr75),
        "upleveltime90": np.mean(vec>thr90),
    }

def extract_framewise(y, sr):
    """
    Restituisce:
      - frame_feats: array (T,46) di [MFCC0-14, LogMel0-7, ΔMFCC, ΔLogMel]
      - hdr       : lista di 46 nomi colonna
    """
    # MFCC e LogMel con overlap
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=15,
                                n_fft=WIN_LENGTH,
                                hop_length=HOP_LENGTH,
                                win_length=WIN_LENGTH)
    mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=8,
                                         n_fft=WIN_LENGTH,
                                         hop_length=HOP_LENGTH,
                                         win_length=WIN_LENGTH,
                                         fmax=8000)
    logmel = librosa.power_to_db(mel)
    lld = np.vstack([mfcc, logmel])                # (23, T)
    delta = librosa.feature.delta(lld, order=1)     # (23, T)

    hdr = [f"mfcc{i}" for i in range(15)] + \
          [f"logmel{i}" for i in range(8)] + \
          [f"dmfcc{i}" for i in range(15)] + \
          [f"dlogmel{i}" for i in range(8)]
    # Costruzione matrice Tx46
    feat = np.hstack([lld.T, delta.T])
    return feat, hdr

def extract_global_stats(frame_feats, hdr):
    """
    Calcolo delle statistiche globali su ciascuna colonna di frame_feats
    Restituisce dict {<hdr>_<stat>:val, ...}
    """
    stats_dict = {}
    for col, name in enumerate(hdr):
        colvec = frame_feats[:,col]
        s = compute_stats(colvec)
        for k,v in s.items():
            stats_dict[f"{name}_{k}"] = v
    return stats_dict

def main():
    rows_global = []
    rows_frames = []
    for root,_,files in os.walk(AUDIO_DIR):
        for fn in files:
            if not fn.lower().endswith(('.wav','.flac','.ogg','.mp3')):
                continue
            base,_ = os.path.splitext(fn)
            parts = base.split('-')
            if len(parts)<3:
                print(f"[SKIP] {fn}")
                continue
            emo_code = parts[0]
            emo = emo_map.get(emo_code, emo_code)
            speaker,utt = parts[1],parts[2]
            path = os.path.join(root,fn)

            # Caricamento audio Stereo/Mono
            y,sr = librosa.load(path, sr=16000, mono=False)
            if y.ndim>1: y = y.mean(axis=0)

            # Estrazione feature frame-wise
            feat_mat, hdr = extract_framewise(y,sr)
            T,_ = feat_mat.shape

            # frame-wise -> rows_frames
            for t in range(T):
                rowf = {
                    "file":fn,"emotion":emo,"speaker":speaker,"utt_id":utt,"frame":t
                }
                for i,name in enumerate(hdr):
                    rowf[name] = float(feat_mat[t,i])
                rows_frames.append(rowf)

            # global stats -> rows_global
            gs = extract_global_stats(feat_mat, hdr)
            rowg = {"file":fn,"emotion":emo,"speaker":speaker,"utt_id":utt}
            rowg.update(gs)
            rows_global.append(rowg)
            print(f"{fn}: {T} frame estratti")

    # Salvataggio DataFrame globali
    dfg = pd.DataFrame(rows_global)
    dff = pd.DataFrame(rows_frames)

    # Ordinamento
    dfg.sort_values(["speaker","emotion","utt_id"], inplace=True)
    dff.sort_values(["speaker","emotion","utt_id","frame"], inplace=True)

    # Creazione della cartella di salvataggio
    os.makedirs(SAVE_DIR, exist_ok=True)
    # Salvataggio
    out_g = os.path.join(SAVE_DIR, OUT_GLOBAL)
    out_f = os.path.join(SAVE_DIR, OUT_LOCAL)
    dfg.to_excel(out_g, index=False)
    dff.to_excel(out_f, index=False)
    print(f"\nGlobal features -> {out_g}")
    print(f"Frame features  -> {out_f}")

if __name__=="__main__":
    main()