In [None]:
pip install sounddevice librosa tensorflow

In [6]:
import sounddevice as sd
import numpy as np
import librosa
from tensorflow.keras.models import load_model
import numpy as np

from statistics import mode

# Loading pre-trained model for raga detection

model = load_model('/Users/tishabhavsar/Library/Trained_model.keras')

# Function to preprocess and extract features from live voice input
def process_audio(input_data, sample_rate):
    # Extracting features from live audio input
    rmse = librosa.feature.rms(y=input_data)[0]
    chroma_stft = librosa.feature.chroma_stft(y=input_data, sr=sample_rate)
    spec_cent = librosa.feature.spectral_centroid(y=input_data, sr=sample_rate)[0]
    spec_bw = librosa.feature.spectral_bandwidth(y=input_data, sr=sample_rate)[0]
    mfccs = librosa.feature.mfcc(y=input_data, sr=sample_rate, n_mfcc=13)

    # Concatenating the features into a single array
    processed_data = np.concatenate([rmse, chroma_stft, spec_cent, spec_bw, mfccs], axis=None)

    # Ensuring the processed data has at least 23 elements
    processed_data = processed_data[:23]

    # Reshaping the processed data to match the expected input shape of your model
    processed_data = processed_data.reshape((1, -1))

    return processed_data

# Function for model inference
def predict_raga(input_features):
    predictions = model.predict(input_features)
    return predictions

# List to store predicted raga indices
predicted_indices = []

# Callback function to handle incoming audio stream
def callback(indata, frames, time, status):
    if status:
        print(status)

    # Processing the incoming audio data
    processed_data = process_audio(indata.flatten(), sample_rate)

    # Getting the expected input shape from the model
    expected_input_shape = model.layers[0].input_shape[1:]

    # Reshaping the processed data to match the expected input shape
    processed_data = processed_data.reshape((1,) + expected_input_shape)

    # Performing model inference
    prediction = predict_raga(processed_data)

    # Finding the index of the maximum value in the output array
    predicted_class_index = np.argmax(prediction)

    # Adding the predicted index to the list
    predicted_indices.append(predicted_class_index)

# Setting the sample rate and duration for audio input
sample_rate = 44100  # Adjust based on your model's requirements
duration = 17

# Capturing audio from the microphone
with sd.InputStream(callback=callback, channels=1, samplerate=sample_rate):
    print(f"Listening for {duration} seconds...")
    sd.sleep(int(duration * 1000))
    print("Finished listening.")

# Find the most common predicted raga index
predicted_class_index = mode(predicted_indices)

# Print the most common predicted raga index
print("Most Common Predicted Raga Index:", predicted_class_index)

raga_mapping = {
        0: "Kapi",
        1: "Kamas",
        2: "Charukesi",
        3: "Bhairavi",
        4: "JaganMohini",
        5: "Hamsadhwani",
        6: "Kalyani",
        7: "Harikambhoj",
        8: "Ahir Bhairav",
        9: "Darbari Kanada",
        10: "Kamboji",
        11: "Todi",
        12: "Janaranjani",
        13: "Saveri",
        14: "Shankarabhairavi",
        15: "Hamsadhwani",
        16: "Amritavarshini",
        17: "Bilahari",
        18: "Nata",
        19: "Anandabhairavi",
        20: "Karaharapriya",
        21: "Varali",
        22: "Gaula",
        23: "Sindhubhairavi",
        24: "Hindolam",
        25: "Kanada",
        26: "Kedaram",
        27: "Sahana",
        28: "Mohanam",
        29: "Ranjani",
        30: "Mayamlavagaula",
        31: "Sri Ranjali"
    }

    # Print the predicted raga name
print(f"The predicted raga is: {raga_mapping[predicted_class_index]}")

    # Map the predicted class index to the corresponding row number
row_mapping = {
        0: "1-101",
        1: "102-201",
        2: "202-301",
        3: "302-401",
        4: "402-501",
        5: "502-601",
        6: "602-701",
        7: "702-802",
        8: "802-814",
        9: "815-828",
        10: "829-841",
        11: "842-854",
        12: "855-866",
        13: "867-879",
        14: "880-892",
        15: "893-905",
        16: "906-918",
        17: "919-932",
        18: "933-945",
        19: "946-958",
        20: "959-971",
        21: "972-984",
        22: "985-997",
        23: "998-1010",
        24: "1011-1022",
        25: "1023-1035",
        26: "1036-1048",
        27: "1049-1061",
        28: "1062-1074",
        29: "1075-1087",
        30: "1088-1100",
        31: "1100-1113"
    }

#Printing the row number of the dataset : 
#print(f"The row number is: {row_mapping[predicted_class_index]}")
#print(f"The corresponding raga is: {raga_mapping[predicted_class_index]}")





Listening for 17 seconds...
Finished listening.
Most Common Predicted Raga Index: 8
The predicted raga is: Ahir Bhairav


In [None]:

pip install kaggle

In [None]:
!kaggle kernels output satishpb/raaga-identification -p /Users/tishabhavsar/Downloads
