In [1]:
import numpy as np
import soundfile as sf


def to_generate_Ar_Av(sa_freq, duration,notes):
    ratios = {
            'Sa': 1.0, 
            'Re': 9/8,      #R2
            'Ga': 5/4,      # Ga
            'Pa': 3/2,      # Pa
            'Ni': 15/8,  # Ni
            'S': 2.0      
        }

    sr = 22050  # sample rate
    audio = np.array([])

    for r in notes:
        f = sa_freq * ratios[r]
        t = np.linspace(0, duration, int(sr*duration), endpoint=False)
        wave = 0.5*np.sin(2*np.pi*f*t)
        audio = np.concatenate((audio, wave))
    return audio

sa_freq = 261.63
duration = 0.8
sr = 22050  # sample rate
notes = ["Sa","Re","Ga","Pa","Ni","S","S","Ni","Pa","Ga","Re","Sa"]
audio = to_generate_Ar_Av(sa_freq, duration, notes)
sf.write("hamsadhwani.wav", audio, sr)
print("Audio file saved as hamsadhwani.wav")



Audio file saved as hamsadhwani.wav


Ārohanam: S R₂ G₃ P N₃ S \
Avarohanam: S N₃ P G₃ R₂ S

In [2]:
import numpy as np
import librosa
import soundfile as sf

def predict_swaras(audio_path, sa_freq, dur):
    """
    Predicts swaras (Sa, Re, Ga, ...) from an audio file given reference Sa frequency.
    
    Args:
        audio_path (str): Path to the audio file (.wav).
        sa_freq (float): Reference Sa frequency in Hz.
    
    Returns:
        list: Sequence of swara names corresponding to each 0.8s segment.
    """

    # Swara ratios (Just Intonation, one common mapping)
  
    ratios = {
            'Sa': 1.0, 
            'Re': 9/8,      #R2
            'Ga': 5/4,      # Ga
            'Pa': 3/2,      # Pa
            'Ni': 15/8,  # Ni
            'S': 2.0      
        }
    
    # Compute swara frequencies
    swara_freqs = {s: sa_freq * r for s, r in ratios.items()}
    
    # Load audio
    y, sr = librosa.load(audio_path, sr=None)
    
    # Each note lasts 0.8s
    segment_len = int(duration * sr)
    n_segments = len(y) // segment_len
    
    swaras_predicted = []
    
    for i in range(n_segments):
        segment = y[i*segment_len : (i+1)*segment_len]
        
        # Estimate pitch (fundamental frequency)
        f0, _, _ = librosa.pyin(segment, 
                                fmin=sa_freq/2, fmax=sa_freq*3, 
                                sr=sr, frame_length=2048)
        f0 = f0[~np.isnan(f0)]
        if len(f0) == 0:
            swaras_predicted.append("Unknown")
            continue
        freq_est = np.median(f0)  # representative frequency of this note
        
        # Find closest swara
        closest_swara = min(swara_freqs.keys(), 
                            key=lambda s: abs(swara_freqs[s] - freq_est))
        
        swaras_predicted.append(closest_swara)
    
    return swaras_predicted

dur = 0.8
sa_freq = 261.63
result = predict_swaras("hamsadhwani.wav", sa_freq, dur)
print(result)

['Sa', 'Re', 'Ga', 'Pa', 'Ni', 'S', 'S', 'Ni', 'Pa', 'Ga', 'Re', 'Sa']


In [3]:
import numpy as np
import soundfile as sf


def to_generate_Ar_Av(sa_freq, duration,notes):
    ratios = {
            'Sa': 1.0, 
            'Re': 9/8,      #R2
            'Ga': 5/4,      # Ga
            'Pa': 3/2,      # Pa
            'Ni': 15/8,  # Ni
            'S': 2.0      
        }

    sr = 22050  # sample rate
    audio = np.array([])

    for r in notes:
        f = sa_freq * ratios[r]
        t = np.linspace(0, duration, int(sr*duration), endpoint=False)
        wave = 0.5*np.sin(2*np.pi*f*t)
        audio = np.concatenate((audio, wave))
    return audio

sa_freq = 261.63
duration = 0.8
sr = 22050  # sample rate
hamch = [
    "Pa", "Ni", "Pa", "Ga", "Re", "Sa",
    "Sa", "Re", "Ga", "Pa", "Ni", "Pa",
    "Ga", "Re", "Sa", "Pa", "Ni", "Pa",
    "Ga", "Re", "Sa"
]

audio = to_generate_Ar_Av(sa_freq, duration, hamch)
sf.write("hamsadhwani_ch.wav", audio, sr)
print("Audio file saved as hamsadhwani_ch.wav")



Audio file saved as hamsadhwani_ch.wav


In [4]:
import numpy as np
import librosa
import soundfile as sf

def predict_swaras(audio_path, sa_freq, dur):
    """
    Predicts swaras (Sa, Re, Ga, ...) from an audio file given reference Sa frequency.
    
    Args:
        audio_path (str): Path to the audio file (.wav).
        sa_freq (float): Reference Sa frequency in Hz.
    
    Returns:
        list: Sequence of swara names corresponding to each 0.8s segment.
    """

    # Swara ratios (Just Intonation, one common mapping)
  
    ratios = {
            'Sa': 1.0, 
            'Re': 9/8,      #R2
            'Ga': 5/4,      # Ga
            'Pa': 3/2,      # Pa
            'Ni': 15/8,  # Ni
            'S': 2.0      
        }
    
    # Compute swara frequencies
    swara_freqs = {s: sa_freq * r for s, r in ratios.items()}
    
    # Load audio
    y, sr = librosa.load(audio_path, sr=None)
    
    # Each note lasts 0.8s
    segment_len = int(duration * sr)
    n_segments = len(y) // segment_len
    
    swaras_predicted = []
    
    for i in range(n_segments):
        segment = y[i*segment_len : (i+1)*segment_len]
        
        # Estimate pitch (fundamental frequency)
        f0, _, _ = librosa.pyin(segment, 
                                fmin=sa_freq/2, fmax=sa_freq*3, 
                                sr=sr, frame_length=2048)
        f0 = f0[~np.isnan(f0)]
        if len(f0) == 0:
            swaras_predicted.append("Unknown")
            continue
        freq_est = np.median(f0)  # representative frequency of this note
        
        # Find closest swara
        closest_swara = min(swara_freqs.keys(), 
                            key=lambda s: abs(swara_freqs[s] - freq_est))
        
        swaras_predicted.append(closest_swara)
    
    return swaras_predicted

dur = 0.8
sa_freq = 261.63
result = predict_swaras("hamsadhwani_ch.wav", sa_freq, dur)
print(result)

['Pa', 'Ni', 'Pa', 'Ga', 'Re', 'Sa', 'Sa', 'Re', 'Ga', 'Pa', 'Ni', 'Pa', 'Ga', 'Re', 'Sa', 'Pa', 'Ni', 'Pa', 'Ga', 'Re', 'Sa']
