<a href="https://colab.research.google.com/github/Ravitejaa249/Music-Genre-Classification/blob/main/preprocess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import sklearn


In [None]:
import librosa
import librosa.display
import IPython.display as ipd
import warnings
warnings.filterwarnings('ignore')

In [None]:
import os

dataset_path = '/content/drive/MyDrive/genres_original'

genres = os.listdir(dataset_path)
for genre in genres:
    genre_path = os.path.join(dataset_path, genre)
    if os.path.isdir(genre_path):
        num_files = len([f for f in os.listdir(genre_path) if f.endswith('.wav') or f.endswith('.mp3')])
        print(f"Genre: {genre} - Number of audio files: {num_files}")


Genre: blues - Number of audio files: 100
Genre: classical - Number of audio files: 100
Genre: reggae - Number of audio files: 100
Genre: metal - Number of audio files: 100
Genre: country - Number of audio files: 100
Genre: jazz - Number of audio files: 100
Genre: pop - Number of audio files: 100
Genre: hiphop - Number of audio files: 100
Genre: disco - Number of audio files: 100
Genre: rock - Number of audio files: 100


In [None]:
!pip install librosa
!pip install numpy scipy
!pip install matplotlib
!pip install scikit-learn
!pip install tensorflow
!pip install torch

  Attempting uninstall: nvidia-cusolver-cu12
    Found existing installation: nvidia-cusolver-cu12 11.6.3.83
    Uninstalling nvidia-cusolver-cu12-11.6.3.83:
      Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83
Successfully installed nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127


In [None]:
import os
import librosa
import numpy as np
import pandas as pd

def extract_features(audio_path, label=None, sr=22050):

    y, sr = librosa.load(audio_path, sr=sr)


    length = len(y)
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_stft_mean = np.mean(chroma_stft)
    chroma_stft_var = np.var(chroma_stft)

    rms = librosa.feature.rms(y=y)
    rms_mean = np.mean(rms)
    rms_var = np.var(rms)

    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_cent_mean = np.mean(spec_cent)
    spec_cent_var = np.var(spec_cent)

    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    spec_bw_mean = np.mean(spec_bw)
    spec_bw_var = np.var(spec_bw)

    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    rolloff_mean = np.mean(rolloff)
    rolloff_var = np.var(rolloff)

    zcr = librosa.feature.zero_crossing_rate(y)
    zcr_mean = np.mean(zcr)
    zcr_var = np.var(zcr)

    y_harm, y_perc = librosa.effects.hpss(y)
    harmony_mean = np.mean(y_harm)
    harmony_var = np.var(y_harm)
    perceptr_mean = np.mean(y_perc)
    perceptr_var = np.var(y_perc)

    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    mfcc_means = np.mean(mfcc, axis=1)
    mfcc_vars = np.var(mfcc, axis=1)

    features_dict = {
        "filename": os.path.basename(audio_path),
        "length": length,
        "chroma_stft_mean": chroma_stft_mean,
        "chroma_stft_var": chroma_stft_var,
        "rms_mean": rms_mean,
        "rms_var": rms_var,
        "spectral_centroid_mean": spec_cent_mean,
        "spectral_centroid_var": spec_cent_var,
        "spectral_bandwidth_mean": spec_bw_mean,
        "spectral_bandwidth_var": spec_bw_var,
        "rolloff_mean": rolloff_mean,
        "rolloff_var": rolloff_var,
        "zero_crossing_rate_mean": zcr_mean,
        "zero_crossing_rate_var": zcr_var,
        "harmony_mean": harmony_mean,
        "harmony_var": harmony_var,
        "perceptr_mean": perceptr_mean,
        "perceptr_var": perceptr_var,
        "tempo": tempo,
    }


    for i in range(1, 21):
        features_dict[f"mfcc{i}_mean"] = mfcc_means[i-1]
        features_dict[f"mfcc{i}_var"] = mfcc_vars[i-1]

    if label is not None:
        features_dict["label"] = label

    return features_dict


def process_audio_folder(base_dir, csv_output="data_features.csv", sr=22050):

    feature_rows = []

    for genre in os.listdir(base_dir):
        genre_path = os.path.join(base_dir, genre)
        if os.path.isdir(genre_path):
            for filename in os.listdir(genre_path):
                if filename.lower().endswith(".wav"):
                    audio_path = os.path.join(genre_path, filename)
                    try:
                        feat_dict = extract_features(audio_path, label=genre, sr=sr)
                        feature_rows.append(feat_dict)
                    except Exception as e:
                        print(f"Error processing {audio_path}: {e}")


    df = pd.DataFrame(feature_rows)


    df.to_csv(csv_output, index=False)
    print(f"Features saved to {csv_output}")


if __name__ == "__main__":

    base_dir = '/content/drive/MyDrive/genres_original'
    csv_path = "features_30_sec.csv"

    process_audio_folder(base_dir, csv_path, sr=22050)


In [None]:
import os
import librosa
import numpy as np
import pandas as pd

def calc_tempo(audio_full_path, sr=22050):

    try:

        y, sr = librosa.load(audio_full_path, sr=sr)
        onset_env = librosa.onset.onset_strength(y=y, sr=sr)
        tempo_arr = librosa.beat.tempo(onset_envelope=onset_env, sr=sr)
        tempo = tempo_arr[0] if tempo_arr.size > 0 else 0.0
    except Exception as e:
        print(f"Error calculating tempo for {audio_full_path}: {e}")
        tempo = 0.0
    return tempo

def update_tempo_in_csv(csv_input, csv_output, base_dir, sr=22050):

    df = pd.read_csv(csv_input)

    if 'tempo' in df.columns:
        df = df.drop(columns=['tempo'])

    def calculate_row_tempo(row):

        audio_full_path = os.path.join(base_dir, row['label'], row['filename'])
        return calc_tempo(audio_full_path, sr=sr)

    df['tempo'] = df.apply(calculate_row_tempo, axis=1)


    df.to_csv(csv_output, index=False)
    print(f"Updated CSV saved to {csv_output}")

if __name__ == "__main__":

    csv_input_path = "data_features.csv"         # The CSV file you originally produced
    csv_output_path = "data_features_updated.csv"  # The CSV file to save with updated tempo values
    base_audio_dir = '/content/drive/MyDrive/genres_original'  # Base folder for audio files

    update_tempo_in_csv(csv_input_path, csv_output_path, base_audio_dir, sr=22050)


In [None]:
data = pd.read_csv(f'/content/drive/MyDrive/features_30_sec.csv')
data.head()

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.wav,661794,0.350088,0.088757,0.130228,0.002827,1784.16585,129774.064525,2002.44906,85882.761315,...,52.42091,-1.690215,36.524071,-0.408979,41.597103,-2.303523,55.062923,1.221291,46.936035,blues
1,blues.00001.wav,661794,0.340914,0.09498,0.095948,0.002373,1530.176679,375850.073649,2039.036516,213843.755497,...,55.356403,-0.731125,60.314529,0.295073,48.120598,-0.283518,51.10619,0.531217,45.786282,blues
2,blues.00002.wav,661794,0.363637,0.085275,0.17557,0.002746,1552.811865,156467.643368,1747.702312,76254.192257,...,40.598766,-7.729093,47.639427,-1.816407,52.382141,-3.43972,46.63966,-2.231258,30.573025,blues
3,blues.00003.wav,661794,0.404785,0.093999,0.141093,0.006346,1070.106615,184355.942417,1596.412872,166441.494769,...,44.427753,-3.319597,50.206673,0.636965,37.31913,-0.619121,37.259739,-3.407448,31.949339,blues
4,blues.00004.wav,661794,0.308526,0.087841,0.091529,0.002303,1835.004266,343399.939274,1748.172116,88445.209036,...,86.099236,-5.454034,75.269707,-0.916874,53.613918,-4.404827,62.910812,-11.703234,55.19516,blues


In [None]:
import os
import glob
import numpy as np
import librosa
import librosa.display
from sklearn.preprocessing import minmax_scale

def extract_features_segment(segment, sr, hop_length=512):
    features = {}

    features['length'] = len(segment)
    rms = librosa.feature.rms(y=segment, hop_length=hop_length)
    features['rms'] = np.mean(rms)

    centroid = librosa.feature.spectral_centroid(y=segment, sr=sr, hop_length=hop_length)
    features['centroid'] = np.mean(centroid)

    bandwidth = librosa.feature.spectral_bandwidth(y=segment, sr=sr, hop_length=hop_length)
    features['bandwidth'] = np.mean(bandwidth)

    rolloff = librosa.feature.spectral_rolloff(y=segment, sr=sr, hop_length=hop_length, roll_percent=0.85)
    features['rolloff'] = np.mean(rolloff)

    zcr = librosa.feature.zero_crossing_rate(y=segment, hop_length=hop_length)
    features['zcr'] = np.mean(zcr)


    chroma = librosa.feature.chroma_stft(y=segment, sr=sr, hop_length=hop_length)
    features['chroma_mean'] = np.mean(chroma, axis=1)  # shape: (12,)

    mfcc = librosa.feature.mfcc(y=segment, sr=sr, n_mfcc=20, hop_length=hop_length)
    features['mfcc_mean'] = np.mean(mfcc, axis=1)  # 20 features
    features['mfcc_var']  = np.var(mfcc, axis=1)    # 20 features

    onset_env = librosa.onset.onset_strength(y=segment, sr=sr, hop_length=hop_length)
    tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr)
    features['tempo'] = tempo[0] if tempo.size > 0 else 0.0

    return features

def extract_features_file(file_path, segment_duration=3, sr_expected=None, hop_length=512):

    y, sr = librosa.load(file_path, sr=sr_expected)

    y, _ = librosa.effects.trim(y)

    total_samples = 30 * sr
    if len(y) < total_samples:
        y = np.pad(y, (0, total_samples - len(y)), mode='constant')
    else:
        y = y[:total_samples]

    samples_per_segment = int(segment_duration * sr)
    num_segments = 10

    feature_vectors = []

    for i in range(num_segments):
        start = i * samples_per_segment
        end = (i + 1) * samples_per_segment
        segment = y[start:end]

        feats = extract_features_segment(segment, sr, hop_length=hop_length)


        vector = []

        vector.append(feats['length'])
        vector.append(feats['rms'])
        vector.append(feats['centroid'])
        vector.append(feats['bandwidth'])
        vector.append(feats['rolloff'])
        vector.append(feats['zcr'])
        vector.extend(feats['chroma_mean'])
        vector.extend(feats['mfcc_mean'])
        vector.extend(feats['mfcc_var'])
        vector.append(feats['tempo'])
        assert len(vector) == 59, f"Expected 59 features, got {len(vector)}"
        feature_vectors.append(vector)

    return np.array(feature_vectors), sr

gtzan_path = '/content/drive/MyDrive/genres_original'

genres = [d for d in os.listdir(gtzan_path) if os.path.isdir(os.path.join(gtzan_path, d))]

all_features = []
all_labels = []

for genre in genres:
    genre_dir = os.path.join(gtzan_path, genre)
    file_paths = glob.glob(os.path.join(genre_dir, "*.wav"))

    for file_path in file_paths:
        print(f"Processing file: {file_path}")
        try:
            features_segments, sr = extract_features_file(file_path)
            for vec in features_segments:
                all_features.append(vec)
                all_labels.append(genre)
        except Exception as e:
            print(f"Error processing {file_path}: {e}")

X = np.array(all_features)
y_labels = np.array(all_labels)

print("Extracted feature matrix shape:", X.shape)
print("Extracted labels shape:", y_labels.shape)


In [None]:
import pandas as pd

feature_columns = [f'f{i+1}' for i in range(59)]

df_features = pd.DataFrame(X, columns=feature_columns)

df_features['genre'] = y_labels

csv_filename = 'features_3_sec.csv'
df_features.to_csv(csv_filename, index=False)

print(f"Saved features to {csv_filename}")
