In [None]:
pip install librosa numpy matplotlib tqdm

In [None]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import subprocess
import os
import glob
from tqdm import tqdm
import time

## Directories

In [None]:
def convert(video_filepath, audio_filepath):
    command = "ffmpeg -i {} -vn -ar 44100 -ac 2 -b:a 192k {}".format(video_filepath, audio_filepath)
    subprocess.call(command, shell=True)

video_dir = "E:\\Thesis\\Videos"
audio_dir = "E:\\Thesis\\Audio"
spectrogram_dir = "spectrograms"
mfcc_dir = "mfcc"

contents = glob.glob(os.path.join(video_dir, "*.mp4"))
contents_spec = glob.glob(os.path.join(spectrogram_dir, "*.png"))

print("Contents of the directory: ")
for item in contents:
    print(item)

for item in contents_spec:
    print(item)

# Process each .mp4 file

In [None]:
for video_file in tqdm(glob.glob(os.path.join(video_dir, "*.mp4")), desc="Processing videos"):
    start_time = time.time()
    
    audio_file = os.path.join(audio_dir, os.path.basename(video_file).replace(".mp4", ".mp3"))
    
    # Convert video to audio
    print(f"\nConverting video to audio for {os.path.basename(video_file)}...")
    convert_start_time = time.time()
    convert(video_file, audio_file)
    print(f"Conversion took {time.time() - convert_start_time:.2f} seconds")
    
    # Load audio
    print("Loading audio...")
    load_start_time = time.time()
    y, sr = librosa.load(audio_file, sr=None)
    print(f"Loading audio took {time.time() - load_start_time:.2f} seconds")
    
    # Convert to Mel Spectrogram
    print("Converting to Mel Spectrogram...")
    mel_start_time = time.time()
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
    print(f"Mel Spectrogram conversion took {time.time() - mel_start_time:.2f} seconds")
    
    # Convert to Log-Mel Spectrogram
    print("Converting to Log-Mel Spectrogram...")
    logmel_start_time = time.time()
    log_S = librosa.power_to_db(S, ref=np.max)
    print(f"Log-Mel Spectrogram conversion took {time.time() - logmel_start_time:.2f} seconds")
    
    # Visualize
    print("Visualizing Log-Mel Spectrogram...")
    visualize_start_time = time.time()
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(log_S, sr=sr, x_axis=None, y_axis=None, fmax=8000)
    plt.axis('off') 
    plt.tight_layout(pad=0)
    fig1 = plt.gcf()
    plt.show()
    plt.draw()
    
    # Save
    spectrogram_file = os.path.join(spectrogram_dir, os.path.basename(video_file).replace(".mp4", ".png"))
    fig1.savefig(spectrogram_file, bbox_inches='tight', pad_inches=0)
    plt.close(fig1)
    print(f"Visualization and saving took {time.time() - visualize_start_time:.2f} seconds")
    
    # Extract
    print("Extracting MFCCs...")
    mfcc_start_time = time.time()
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    print(f"MFCC extraction took {time.time() - mfcc_start_time:.2f} seconds")
    
    # Visualize
    print("Visualizing MFCCs...")
    visualize_mfcc_start_time = time.time()
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mfccs, sr=sr, x_axis=None, y_axis=None)
    plt.axis('off')  
    plt.tight_layout(pad=0)
    fig2 = plt.gcf()
    plt.show()
    plt.draw()
    
    # Save
    mfcc_file = os.path.join(mfcc_dir, os.path.basename(video_file).replace(".mp4", "_mfcc.png"))
    fig2.savefig(mfcc_file, bbox_inches='tight', pad_inches=0)
    plt.close(fig2)
    print(f"MFCC visualization and saving took {time.time() - visualize_mfcc_start_time:.2f} seconds")

    # # Optionally, save MFCCs to a file (e.g., CSV)
    # print("Saving MFCCs to CSV...")
    # save_mfcc_start_time = time.time()
    # mfcc_csv_file = os.path.join(mfcc_dir, os.path.basename(video_file).replace(".mp4", "_mfcc.csv"))
    # np.savetxt(mfcc_csv_file, mfccs, delimiter=",")
    # print(f"Saving MFCCs to CSV took {time.time() - save_mfcc_start_time:.2f} seconds")

    total_time = time.time() - start_time
    print(f"Total processing time for {os.path.basename(video_file)}: {total_time:.2f} seconds")