## Initialization

In [1]:
%%capture
# give permissions to kaggle API
! pip install kaggle
! mkdir /root/.kaggle
! cp kaggle.json /root/.kaggle
! chmod 600 /root/.kaggle/kaggle.json

In [2]:
%%capture
# download dataset from kaggle to colab
! kaggle datasets download -d rayonegautam/charanet

# extract the content of the zipped file
!unzip /content/charanet.zip -d /content/

In [3]:
# remove uncessary files
! rm -r /content/charanet.zip
! rm -r /content/kaggle.json
! rm -r /content/sample_data

## Modules & Libraries

In [4]:
import os
import pandas as pd
import librosa
import numpy as np

In [5]:
dataset_path = '/content/charaNet'
training_folder = '/content/charaNet/train'
validation_folder = '/content/charaNet/val'
testing_folder = '/content/charaNet/test'

## **Preprocessing**

In [6]:
def extract_mfcc_features(file_path, n_mfcc=13):
    """
    Extracts MFCC features from an audio file.
    """

    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return mfccs.T  # Transpose for compatibility with the CSV format

In [7]:

def preprocess_and_save_features(dataset_path, output_csv):
    """
    Preprocesses audio files in the dataset and saves MFCC features to a CSV file.
    """


    data = []
    classes = os.listdir(dataset_path)

    for class_label in classes:
        class_path = os.path.join(dataset_path, class_label)
        audio_files = os.listdir(class_path)

        for audio_file in audio_files:
            file_path = os.path.join(class_path, audio_file)
            mfccs = extract_mfcc_features(file_path)
            for mfcc in mfccs:
                data.append((class_label, mfcc.tolist()))  # Convert MFCC array to list for CSV

    df = pd.DataFrame(data, columns=['class', 'mfcc'])
    df.to_csv(output_csv, index=False)

In [11]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!touch /content/train_feat.csv
!touch /content/val_feat.csv
!touch /content/test_feat.csv

csv_path = "/content/drive/MyDrive/Models/FeatherFind"

**Generate and save MFCC features**

In [10]:
output_csv = '/content/train_feat.csv'
preprocess_and_save_features(training_folder, output_csv)

!cp /content/train_feat.csv $csv_path

**Generate and save validation set features**

In [13]:
output_csv = '/content/val_feat.csv'
preprocess_and_save_features(validation_folder, output_csv)

!cp /content/val_feat.csv $csv_path

**Generate and save test set features**

In [14]:
output_csv = '/content/test_feat.csv'
preprocess_and_save_features(testing_folder, output_csv)

!cp /content/test_feat.csv $csv_path