In [45]:
import matplotlib.pyplot as plt
import librosa as lb
import numpy as np
import pandas as pd
import yaml
import os

# Getting the values from config file
with open("../config.yaml") as file:
    config = yaml.safe_load(file)
    
# Important paths ;)
dirPath = config["paths"]["audioData"]
csvPath = dirPath + "/dataset.csv"

# Number of MFC Coeffs
numOfMFCCoeffs = config["preprocessing"]["numOfMFCCoeffs"]

# Hop Length
hopLength = config["preprocessing"]["hopLength"]

In [46]:
# Load the csv file
df = pd.read_csv(csvPath)
df.head(10)

Unnamed: 0,audio_file_path,class
0,/home/g0d/Downloads/UrbanSound8K/fold5/100032-...,dog_bark
1,/home/g0d/Downloads/UrbanSound8K/fold5/100263-...,children_playing
2,/home/g0d/Downloads/UrbanSound8K/fold5/100263-...,children_playing
3,/home/g0d/Downloads/UrbanSound8K/fold5/100263-...,children_playing
4,/home/g0d/Downloads/UrbanSound8K/fold5/100263-...,children_playing
5,/home/g0d/Downloads/UrbanSound8K/fold5/100263-...,children_playing
6,/home/g0d/Downloads/UrbanSound8K/fold5/100263-...,children_playing
7,/home/g0d/Downloads/UrbanSound8K/fold5/100263-...,children_playing
8,/home/g0d/Downloads/UrbanSound8K/fold5/100263-...,children_playing
9,/home/g0d/Downloads/UrbanSound8K/fold10/100648...,car_horn


# 📂 Dataset Creation  

The existing `dataset.csv` in the `audioData` directory is **not needed** for our purpose.  

🔄 Instead, we will **create a new `dataset.csv`** in the `imageData` directory, which will contain:  

📌 **Columns:**  
- 🖼️ **image_file_path** – Path to the spectrogram image  
- 🎭 **class** – Corresponding class  

In [47]:
# Important paths ;)
dirPath = config["paths"]["imageData"]
csvPath = dirPath + "dataset.csv"

# Creating a new dataset
dataset = pd.DataFrame(columns = ["image_file_path", "class"])

# Iterating the original dataset
for i in range(len(df)) :
    audio_file_path = os.path.join(dirPath, df.loc[i]["class"], os.path.basename(df.loc[i]["audio_file_path"]).split(".")[0] + "_Spectrogram.png")
    dataset.loc[i] = [audio_file_path, df.loc[i]["class"]]

In [48]:
# Saving the dataset
dataset.to_csv(os.path.join(dirPath, "dataset.csv"), index = False)

In [49]:
# Loading the new dataset again to check ;)
df = pd.read_csv(os.path.join(dirPath, "dataset.csv"))
df.head(10)

Unnamed: 0,image_file_path,class
0,/home/g0d/Desktop/audio-vision/melSpectrograms...,dog_bark
1,/home/g0d/Desktop/audio-vision/melSpectrograms...,children_playing
2,/home/g0d/Desktop/audio-vision/melSpectrograms...,children_playing
3,/home/g0d/Desktop/audio-vision/melSpectrograms...,children_playing
4,/home/g0d/Desktop/audio-vision/melSpectrograms...,children_playing
5,/home/g0d/Desktop/audio-vision/melSpectrograms...,children_playing
6,/home/g0d/Desktop/audio-vision/melSpectrograms...,children_playing
7,/home/g0d/Desktop/audio-vision/melSpectrograms...,children_playing
8,/home/g0d/Desktop/audio-vision/melSpectrograms...,children_playing
9,/home/g0d/Desktop/audio-vision/melSpectrograms...,car_horn


In [50]:
df.loc[0]["image_file_path"]

'/home/g0d/Desktop/audio-vision/melSpectrograms/UrbanSound8K/dog_bark/100032-3-0-0_Spectrogram.png'