## Initialization

In [None]:
%%capture
# give permissions to kaggle API
! pip install kaggle
! mkdir /root/.kaggle
! cp kaggle.json /root/.kaggle
! chmod 600 /root/.kaggle/kaggle.json

In [None]:
%%capture
# download dataset from kaggle to colab
! kaggle datasets download -d rayonegautam/charanet

# extract the content of the zipped file
!unzip /content/charanet.zip -d /content/

In [None]:
# remove uncessary files
! rm -r /content/charanet.zip
! rm -r /content/kaggle.json
! rm -r /content/sample_data

## Modules & Libraries

In [None]:
import os
import math
import json
import pandas as pd
import librosa
import numpy as np

In [None]:
dataset_path = '/content/charaNet'
training_folder = '/content/charaNet/train'
validation_folder = '/content/charaNet/val'
testing_folder = '/content/charaNet/test'

SAMPLE_RATE = 22050
DURATION = 10 # sec
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION

## **Preprocessing**

In [None]:
def save_mfcc(dataset_path, json_path, n_mfcc=13, n_fft=2048, hop_length=512, num_segments=1):

    # dictionary to store data
    data = {
        'mapping': [],
        'mfcc': [],
        'labels': []
    }

    samples_per_seg = SAMPLES_PER_TRACK // num_segments

    # expected number of mfcc vectors per segment:
    # round the number to higher integer
    mfcc_vectors = math.ceil(samples_per_seg / hop_length)

    # loop through all genres
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

        if dirpath is not dataset_path:
            # save the semantic label
            dirpath_components = dirpath.split('/')
            semantic_label = dirpath_components[-1]
            data['mapping'].append(semantic_label)
            print(f'Processing {i}: {semantic_label}')

            # process files for a specific genre
            for file in filenames:
                file_path = os.path.join(dirpath, file)
                signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)

                # process segments extracting mfcc and storing data
                for curr_seg in range(num_segments):
                    start_sample = samples_per_seg * curr_seg
                    finish_sample = start_sample + samples_per_seg

                    mfcc = librosa.feature.mfcc(y=signal[start_sample:finish_sample],
                                               sr=sr,
                                               n_fft=n_fft,
                                               n_mfcc=n_mfcc,
                                               hop_length=hop_length)

                    mfcc = mfcc.T # to make it easier to work with

                    # store mfcc for segment if it has the expected length
                    if len(mfcc) == mfcc_vectors:
                        data['mfcc'].append(mfcc.tolist())
                        data['labels'].append(i-1)

                        # print(f'{file_path}, segment:{curr_seg+1}')

    with open(json_path, 'w') as fp:
        json.dump(data, fp, indent=4)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!touch /content/train_data.json
!touch /content/val_data.json
!touch /content/test_data.json

json_path = "/content/drive/MyDrive/Models/FeatherFind"

**Generate and save MFCC features**

In [None]:
output_path = '/content/train_data.json'
save_mfcc(training_folder, output_path)

!cp /content/train_data.json $json_path

Processing: White-throated Bushchat
Processing: Long-tailed Duck
Processing: Swamp Francolin
Processing: Jerdon_s Babbler
Processing: Grey-sided Thrush
Processing: Egyptian Vulture
Processing: Grey-crowned Prinia
Processing: Black Kite
Processing: Slender-billed Babbler
Processing: Spotted Dove
Processing: Himalayan Monal
Processing: Large-billed Crow
Processing: Grey Treepie
Processing: Red-billed Blue Magpie
Processing: Cheer Pheasant
Processing: Spiny Babbler
Processing: House Sparrow
Processing: Great Slaty Woodpecker
Processing: Common Cuckoo
Processing: Rufous-necked Hornbill
Processing: Wood Snipe
Processing: Pallas_s Fish Eagle
Processing: Bristled Grassbird
Processing: Common Wood Pigeon
Processing: House Crow
Processing: Rufous Treepie
Processing: Asian Koel
Processing: Black-necked crane
Processing: Indian Spotted Eagle
Processing: Sarus Crane
Processing: Greater Spotted Eagle
Processing: Rustic Bunting
Processing: Eastern Imperial Eagle
Processing: Rose-ringed Parakeet
Proc

**Generate and save validation set features**

In [None]:
output_path = '/content/val_data.json'
save_mfcc(validation_folder, output_path)

!cp /content/val_data.json $json_path

Processing 2: White-throated Bushchat
Processing 3: Long-tailed Duck
Processing 4: Swamp Francolin
Processing 5: Jerdon_s Babbler
Processing 6: Grey-sided Thrush
Processing 7: Egyptian Vulture
Processing 8: Grey-crowned Prinia
Processing 9: Black Kite
Processing 10: Slender-billed Babbler
Processing 11: Spotted Dove
Processing 12: Himalayan Monal
Processing 13: Large-billed Crow
Processing 14: Grey Treepie
Processing 15: Red-billed Blue Magpie
Processing 16: Cheer Pheasant
Processing 17: Spiny Babbler
Processing 18: House Sparrow
Processing 19: Great Slaty Woodpecker
Processing 20: Common Cuckoo
Processing 21: Rufous-necked Hornbill
Processing 22: Wood Snipe
Processing 23: Pallas_s Fish Eagle
Processing 24: Bristled Grassbird
Processing 25: Common Wood Pigeon
Processing 26: House Crow
Processing 27: Rufous Treepie
Processing 28: Asian Koel
Processing 29: Black-necked crane
Processing 30: Indian Spotted Eagle
Processing 31: Sarus Crane
Processing 32: Greater Spotted Eagle
Processing 33:

**Generate and save test set features**

In [None]:
output_path = '/content/test_data.json'
save_mfcc(testing_folder, output_path)

!cp /content/test_data.json $json_path

Processing 1: White-throated Bushchat
Processing 2: Long-tailed Duck
Processing 3: Swamp Francolin
Processing 4: Jerdon_s Babbler
Processing 5: Grey-sided Thrush
Processing 6: Egyptian Vulture
Processing 7: Grey-crowned Prinia
Processing 8: Black Kite
Processing 9: Slender-billed Babbler
Processing 10: Spotted Dove
Processing 11: Himalayan Monal
Processing 12: Large-billed Crow
Processing 13: Grey Treepie
Processing 14: Red-billed Blue Magpie
Processing 15: Cheer Pheasant
Processing 16: Spiny Babbler
Processing 17: House Sparrow
Processing 18: Great Slaty Woodpecker
Processing 19: Common Cuckoo
Processing 20: Rufous-necked Hornbill
Processing 21: Wood Snipe
Processing 22: Pallas_s Fish Eagle
Processing 23: Bristled Grassbird
Processing 24: Common Wood Pigeon
Processing 25: House Crow
Processing 26: Rufous Treepie
Processing 27: Asian Koel
Processing 28: Black-necked crane
Processing 29: Indian Spotted Eagle
Processing 30: Sarus Crane
Processing 31: Greater Spotted Eagle
Processing 32: 