In [None]:
import cv2
import numpy as np
import librosa
import soundfile as sf
import face_recognition
from pydub import AudioSegment
from pydub.silence import split_on_silence
import argparse
import os
from typing import List, Tuple
import tkinter as tk
from tkinter import filedialog


In [None]:
import librosa
import numpy as np
import pandas as pd
import os
import glob
from google.colab import files # Keep this for Colab downloads

class AudioFeatureExtractor:
    def __init__(self, sample_rate=22050, hop_length=512):
        self.sr = sample_rate
        self.hop_length = hop_length # 512 samples @ 22050Hz is approx 23ms per frame

    def process_audio(self, file_path):
        """
        Extracts features from audio file matching the requested CSV format.
        """
        print(f"Loading {os.path.basename(file_path)}...")

        # 1. Load Audio
        # y = audio time series, sr = sample rate
        try:
            y, sr = librosa.load(file_path, sr=self.sr)
        except Exception as e:
            print(f"Error loading {file_path}: {e}")
            return None

        # 2. Extract Features

        # --- Energy (RMS converted to dB) ---
        rms = librosa.feature.rms(y=y, frame_length=2048, hop_length=self.hop_length)[0]
        energy_db = librosa.amplitude_to_db(rms, ref=np.max)

        # --- Pitch (Fundamental Frequency - F0) ---
        # Using PYIN (Probabilistic YIN) for accurate pitch and confidence
        f0, voiced_flag, voiced_probs = librosa.pyin(
            y,
            fmin=librosa.note_to_hz('C2'),
            fmax=librosa.note_to_hz('C7'),
            sr=sr,
            frame_length=2048,
            hop_length=self.hop_length
        )
        # Fill NaNs in pitch (silence) with 0
        pitch_hz = np.nan_to_num(f0)

        # --- Spectral Features ---
        centroid = librosa.feature.spectral_centroid(y=y, sr=sr, hop_length=self.hop_length)[0]
        bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr, hop_length=self.hop_length)[0]
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr, hop_length=self.hop_length)[0]

        # --- MFCCs (Mel-frequency cepstral coefficients) ---
        # We extract 13, but usually only keep the lower ones for simple analysis
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, hop_length=self.hop_length)

        # --- Speaking Rate Proxy (Onset Strength) ---
        # True speaking rate requires text transcription.
        # Onset strength is a good signal-processing proxy for "articulation rate".
        speaking_rate = librosa.onset.onset_strength(y=y, sr=sr, hop_length=self.hop_length)

        # --- Tempo (Global) ---
        # Tempo is calculated globally and broadcast to all frames
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

        # 3. Align Lengths
        # Sometimes feature arrays differ by 1 frame due to padding
        min_frames = min(len(energy_db), len(pitch_hz), len(centroid), len(mfccs[0]))

        # Create Time Axis
        # librosa.frames_to_time converts frame indices to seconds
        times = librosa.frames_to_time(np.arange(min_frames), sr=sr, hop_length=self.hop_length)

        # 4. Construct DataFrame
        data = {
            'time_seconds': times[:min_frames],
            'energy_db': energy_db[:min_frames],
            'pitch_hz': pitch_hz[:min_frames],
            'pitch_confidence': voiced_probs[:min_frames], # Probability that pitch is valid
            'speaking_rate': speaking_rate[:min_frames],
            'spectral_centroid': centroid[:min_frames],
            'spectral_bandwidth': bandwidth[:min_frames],
            'spectral_rolloff': rolloff[:min_frames],
            'mfcc_1': mfccs[0][:min_frames],
            'mfcc_2': mfccs[1][:min_frames],
            'mfcc_3': mfccs[2][:min_frames],
            'tempo': [tempo] * min_frames # Repeat global value
        }

        return pd.DataFrame(data)

def batch_process_audio(folder_path):
    """
    Finds all audio files, processes them, saves CSVs, and zips them.
    """
    # Create output directory
    output_dir = "audio_features_output"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Initialize Extractor
    extractor = AudioFeatureExtractor()

    # Supported extensions (add more if needed)
    extensions = ['*.wav', '*.mp3', '*.m4a', '*.flac']
    audio_files = []

    for ext in extensions:
        # Recursive search using glob
        audio_files.extend(glob.glob(os.path.join(folder_path, ext)))
        # Also check for .mov if you want to process video audio directly
        audio_files.extend(glob.glob(os.path.join(folder_path, '*.mov')))

    print(f"Found {len(audio_files)} files to process.")

    processed_count = 0

    for file_path in audio_files:
        base_name = os.path.basename(file_path)
        file_name_only = os.path.splitext(base_name)[0]
        csv_name = f"{file_name_only}_audio_features.csv"
        save_path = os.path.join(output_dir, csv_name)

        print(f"Processing: {base_name}...")

        df = extractor.process_audio(file_path)

        if df is not None:
            # Save to CSV
            df.to_csv(save_path, index=False)
            print(f"‚úÖ Saved {csv_name} ({len(df)} frames)")
            processed_count += 1
        else:
            print(f"‚ùå Failed to process {base_name}")

    if processed_count > 0:
        print(f"\nüì¶ Zipping results...")
        os.system(f"zip -r audio_features.zip {output_dir}")
        print("‚¨áÔ∏è Downloading zip file...")
        files.download('audio_features.zip')
    else:
        print("No files were processed successfully.")

# --- RUN THE PROCESSING ---
# Change this path to your folder containing .wav or .mov files


In [None]:
import os
import subprocess

def convert_mov_to_wav_on_drive(video_folder, output_folder):
    """
    Converts all .mov files in the video_folder to .wav files in the output_folder
    using FFmpeg inside Colab.
    """
    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        print(f"üìÅ Created output folder: {output_folder}")

    # Find all .mov files
    video_files = [f for f in os.listdir(video_folder) if f.lower().endswith('.mov')]

    if not video_files:
        print("‚ùå No .mov files found in the source folder.")
        return

    print(f"Found {len(video_files)} video files. Starting conversion...\n")

    for i, filename in enumerate(video_files):
        # Setup paths
        input_path = os.path.join(video_folder, filename)

        # Create output filename (VideoName.mov -> VideoName.wav)
        wav_filename = os.path.splitext(filename)[0] + ".wav"
        output_path = os.path.join(output_folder, wav_filename)

        # Skip if already exists (saves time if you re-run)
        if os.path.exists(output_path):
            print(f"‚ö†Ô∏è Skipping {wav_filename} (already exists)")
            continue

        print(f"Processing {i+1}/{len(video_files)}: {filename} ...")

        # --- THE FFMPEG COMMAND ---
        # -i : Input file
        # -vn : Disable video recording (audio only)
        # -acodec pcm_s16le : Use standard WAV encoding
        # -ar 44100 : Set sample rate to 44.1kHz
        # -ac 1 : Convert to Mono (better for analysis) or use 2 for Stereo
        # -loglevel error : Keep output clean
        command = [
            'ffmpeg',
            '-i', input_path,
            '-vn',
            '-acodec', 'pcm_s16le',
            '-ar', '44100',
            '-ac', '1',
            output_path,
            '-y', # Overwrite without asking
            '-loglevel', 'error'
        ]

        try:
            subprocess.run(command, check=True)
            print(f"  ‚úÖ Converted to {wav_filename}")
        except subprocess.CalledProcessError as e:
            print(f"  ‚ùå Error converting {filename}: {e}")

    print("\nüéâ Conversion complete! Your .wav files are ready.")

video_source_path = "/content/drive/MyDrive/DatasetCercetare/"

# Where you want the new WAV files to go
# (I recommend putting them in a separate folder to keep things clean)
wav_output_path = "/content/drive/MyDrive/DatasetCercetare/AudioExtracted/"

# Run the converter
convert_mov_to_wav_on_drive(video_source_path, wav_output_path)

In [None]:
target_folder = "/content/drive/MyDrive/DatasetCercetare/AudioExtracted/"

batch_process_audio(target_folder)