<a href="https://colab.research.google.com/github/Sairamsparks2003/Vocalysis/blob/main/Vocalysis3_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install yt-dlp pandas
import yt_dlp
import pandas as pd
from google.colab import files

def download_audio(url, output_file):
    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'outtmpl': output_file,
        'ignoreerrors': True,  # Skip downloading if an error occurs
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])

# Upload the Excel file
uploaded = files.upload()
excel_file = list(uploaded.keys())[0]

# Read URLs from Excel sheet
df = pd.read_excel(excel_file, header=None)
youtube_urls = df.iloc[:, 0].tolist()

# Prompt user for additional URLs
print("Enter additional YouTube URLs (press Enter without typing a URL to finish):")
while True:
    url = input("URL: ")
    if url == "":
        break
    youtube_urls.append(url)

# Download audio from each URL
for i, url in enumerate(youtube_urls):
    try:
        output_file = f'audio_{i}.mp3'
        download_audio(url, output_file)
        print(f"Audio downloaded: {output_file}")
    except yt_dlp.utils.DownloadError as e:
        print(f"Skipping invalid URL: {url}")
        print(f"Error: {str(e)}")



Saving pdt.xlsx to pdt (2).xlsx
Enter additional YouTube URLs (press Enter without typing a URL to finish):
URL: 
[youtube] Extracting URL: https://youtu.be/0k3uQVMbfOo?si=au9LuWNMv-jzikuo
[youtube] 0k3uQVMbfOo: Downloading webpage
[youtube] 0k3uQVMbfOo: Downloading ios player API JSON
[youtube] 0k3uQVMbfOo: Downloading web creator player API JSON
[youtube] 0k3uQVMbfOo: Downloading m3u8 information
[info] 0k3uQVMbfOo: Downloading 1 format(s): 251
[download] Destination: audio_0.mp3
[download] 100% of  916.02KiB in 00:00:00 at 5.43MiB/s   
[ExtractAudio] Destination: audio_0.mp3.mp3
Deleting original file audio_0.mp3 (pass -k to keep)
Audio downloaded: audio_0.mp3
[youtube] Extracting URL: https://youtu.be/vYdd5Zr2ZwM?si=YZzGHIIZfsW8G-en
[youtube] vYdd5Zr2ZwM: Downloading webpage
[youtube] vYdd5Zr2ZwM: Downloading ios player API JSON
[youtube] vYdd5Zr2ZwM: Downloading web creator player API JSON
[youtube] vYdd5Zr2ZwM: Downloading m3u8 information
[info] vYdd5Zr2ZwM: Downloading 1 format

In [None]:
output_directory = '/path/to/save/audio'
output_file = os.path.join(output_directory, f'audio_{i}.mp3')

In [None]:
import os
import numpy as np
import pandas as pd
import librosa
import matplotlib.pyplot as plt
from tqdm import tqdm

def extract_features(audio_file, max_duration=1800):
    try:
        y, sr = librosa.load(audio_file, duration=max_duration, sr=None)  # sr=None preserves the original sampling rate
    except Exception as e:
        print(f"Error loading {audio_file}: {str(e)}")
        return None

    features = {}
    features['sampling_rate'] = sr
    duration = librosa.get_duration(y=y, sr=sr)
    features['duration'] = duration

    # Pitch features
    f0, voiced_flag, voiced_probs = librosa.pyin(y, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
    f0 = f0[voiced_flag]
    if len(f0) > 0:
        features['pitch_mean'] = np.mean(f0)
        features['pitch_std'] = np.std(f0)
        features['pitch_range'] = np.max(f0) - np.min(f0)
        features['pitch_quartiles'] = np.percentile(f0, [25, 50, 75]).tolist()
    else:
        features['pitch_mean'] = features['pitch_std'] = features['pitch_range'] = 0
        features['pitch_quartiles'] = [0, 0, 0]

    # Energy features
    features['rms_energy'] = np.mean(librosa.feature.rms(y=y))

    # Rhythm features
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    features['tempo'] = tempo
    features['speech_rate'] = tempo / 60  # beats per second

    # Voice quality features
    features['zcr'] = np.mean(librosa.feature.zero_crossing_rate(y=y))

    # Pause features
    pauses = librosa.effects.split(y, top_db=20)
    features['pause_duration'] = np.mean([p[1] - p[0] for p in pauses]) / sr if len(pauses) > 0 else 0
    features['num_pauses'] = len(pauses)
    features['pause_rate'] = features['num_pauses'] / duration

    # Spectral features
    features['spectral_centroid'] = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    features['spectral_bandwidth'] = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    features['spectral_contrast'] = np.mean(librosa.feature.spectral_contrast(y=y, sr=sr))
    features['spectral_flatness'] = np.mean(librosa.feature.spectral_flatness(y=y))
    features['spectral_rolloff'] = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))

    # MFCC features
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    for i, mfcc in enumerate(mfccs):
        features[f'mfcc_{i+1}'] = np.mean(mfcc)

    return features

def is_audio_file(filename):
    audio_extensions = ['.mp3', '.wav', '.ogg', '.flac', '.m4a']
    return any(filename.lower().endswith(ext) for ext in audio_extensions)

def process_audio_files(audio_directory):
    all_features = []

    for filename in tqdm(os.listdir(audio_directory)):
        if is_audio_file(filename):
            audio_file = os.path.join(audio_directory, filename)
            features = extract_features(audio_file)
            if features is not None:
                features['filename'] = filename
                all_features.append(features)

    return pd.DataFrame(all_features)

# Set the audio directory to the current working directory
audio_directory = os.getcwd()

# Extract features
print("Extracting features from audio files...")
features_df = process_audio_files(audio_directory)

# Display the first few rows of the features DataFrame
print("\nFirst few rows of extracted features:")
print(features_df.head())

# Save features to CSV
csv_filename = 'audio_features.csv'
features_df.to_csv(csv_filename, index=False)
print(f"\nFeatures saved to {csv_filename}")

# Visualize some features
plt.figure(figsize=(15, 10))
plt.subplot(2, 3, 1)
plt.hist(features_df['sampling_rate'], bins=20)
plt.title('Distribution of Sampling Rates')

plt.subplot(2, 3, 2)
plt.hist(features_df['duration'], bins=20)
plt.title('Distribution of Audio Durations')

plt.subplot(2, 3, 3)
plt.hist(features_df['pitch_mean'], bins=20)
plt.title('Distribution of Mean Pitch')

plt.subplot(2, 3, 4)
plt.hist(features_df['rms_energy'], bins=20)
plt.title('Distribution of RMS Energy')

plt.subplot(2, 3, 5)
plt.hist(features_df['tempo'], bins=20)
plt.title('Distribution of Tempo')

plt.subplot(2, 3, 6)
plt.hist(features_df['spectral_centroid'], bins=20)
plt.title('Distribution of Spectral Centroid')

plt.tight_layout()
plt.show()

# Correlation heatmap of features
numeric_features = features_df.select_dtypes(include=[np.number])
correlation_matrix = numeric_features.corr()
plt.figure(figsize=(15, 12))
plt.imshow(correlation_matrix, cmap='coolwarm', aspect='auto')
plt.colorbar()
plt.title('Correlation Heatmap of Audio Features')
plt.xticks(range(len(numeric_features.columns)), numeric_features.columns, rotation=90)
plt.yticks(range(len(numeric_features.columns)), numeric_features.columns)
plt.tight_layout()
plt.show()

print("\nFeature extraction and visualization complete!")

Extracting features from audio files...


  9%|▉         | 3/33 [18:04<2:41:50, 323.67s/it]