In [None]:
# Prima cella -> Montaggio Drive e installazione librerie
!pip install git+https://github.com/openai/whisper.git

import os
import pandas as pd
import time
import torch
import whisper

from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Seconda cella -> Definizione funzione
def create_demos_metadata(input_folder, output_csv, valid_emotions=None, model_name="large-v3"):
    """
    Trascrive i file audio, estrae l'emozione dal nome del file e salva le trascrizioni in un file CSV.
    """
    if valid_emotions is None:
        valid_emotions = ['col', 'dis', 'gio', 'neu', 'pau', 'rab', 'sor', 'tri']
    emotion_map = {
        'col': 'colpa',
        'dis': 'disgusto',
        'gio': 'gioia',
        'neu': 'neutrale',
        'pau': 'paura',
        'rab': 'rabbia',
        'sor': 'sorpresa',
        'tri': 'tristezza'
    }
    def extract_emotion(filename):
        for emo in valid_emotions:
            if emo in filename:
                return emo
        return None
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Caricamento modello su: {device}")
    model = whisper.load_model(model_name).to(device)
    data = []
    count = 0
    script_start = time.time()
    for root, _, files in os.walk(input_folder):
        for file in files:
            if not file.endswith(".wav"): continue
            filepath = os.path.join(root, file)
            emotion = extract_emotion(file.lower())
            if not emotion: continue
            print(f"[{count+1}] Trascrivo: {file} [{emotion}]")
            t0 = time.time()
            try:
                result = model.transcribe(filepath, language="it")
                text = result["text"].strip()
            except Exception as e:
                print(f"Errore con {file}: {type(e).__name__} - {e}")
                continue
            duration = time.time() - t0
            print(f"{duration:.2f} sec")
            data.append({
                "file_name": os.path.basename(filepath),
                "transcription": text,
                "emotion": emotion_map.get(emotion, emotion)
            })
            count += 1
    df = pd.DataFrame(data)
    df.to_csv(output_csv, index=False, encoding="utf-8", sep=";")
    print(f"\nCompletato!\nFile totali processati: {count}")
    print(f"Tempo totale: {((time.time()-script_start)/60):.2f} minuti")

In [None]:
# Terza cella -> Esempio chiamata di funzione
create_demos_metadata(
    input_folder="/content/drive/MyDrive/DEMoS",
    output_csv="/content/DEMoS_incomplete.csv"
)