In [61]:
import numpy as np
import librosa
import scipy
import matplotlib.pyplot as plt

In [60]:
# Ploting chromagram
def chromagram(y, sr, title):
    # Chord Analysis (Chromagram)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(chroma, y_axis='chroma', x_axis='time', sr=sr)
    plt.colorbar(label='Energy')
    plt.title(title)
    plt.tight_layout()
    # plt.show()
    return chroma

In [64]:
def melody_level_score(path, offset=0):
    melody_level = 0
    y, sr = librosa.load(path)
    length = librosa.get_duration(y = y, sr = sr)
    if length > 30:
        y, sr = librosa.load(path, duration = 10, offset = offset)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    std = np.std(chroma, axis = 0)
    ###### first part, count number with large standard deviation
    compare_val = len(std[std > 0.25]) / len(std)
    # print("count comparison, ", compare_val)
    if (compare_val >= 0.85):
        melody_level += 3
    elif compare_val >=0.7:
        melody_level += 2
    elif compare_val >= 0.3:
        melody_level += 1
    else:
        melody_level += 0
    # print(each_pitch_tuned)
    ######### pitch tuning starts ##########################
    # assume male / female voice
    pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr, fmin=100, fmax=500)
    # get indexes of the maximum value in each time slice        
    max_indexes = np.argmax(magnitudes, axis=0)
    # get the pitches of the max indexes per time slice and compare it to the frequency table
    pitches = pitches[max_indexes, range(magnitudes.shape[1])]        
    # or use the built in function to estimate the tuning of whole series
    pitches = librosa.pitch_tuning(pitches)
    if abs(pitches) <= 0.10:
        melody_level += 3
    elif abs(pitches) <= 0.20:
        melody_level += 2
    elif abs(pitches) <= 0.30:
        melody_level += 1
    ############### pitch tuning part ends ########################
    for i in range(len(np.array(chroma))):
        for j in range(len(chroma[i])):
            if chroma[i][j] <= 0.35:
                chroma[i][j] = 0
            else:
                chroma[i][j] = 1
    # print(chroma.shape)
    sumds = np.sum(chroma, axis = 0)
    count = sumds[sumds > (len(chroma) * 0.1)]
    #### score count #######
    # print("Percentage: ", (count / (len(chroma) * len(chroma[0]) - 0.08333)) / 0.08333)
    if len(count) < (len(chroma) * len(chroma[0]) * 0.09):
        melody_level += 3
    elif len(count) < (len(chroma) * len(chroma[0]) * 0.12):
        melody_level += 2
    elif len(count) < (len(chroma) * len(chroma[0]) * 0.15):
        melody_level += 1

    # print(melody_level)
    # if melody_level >= 7:
    #     return 1
    # else:
    #     return 0
    return melody_level
    # print(pitches)

In [84]:
def checking_audio_melody(path):
    y, sr = librosa.load(path)
    duration = librosa.get_duration(y = y, sr = sr)
    if duration >= 30:
        list = []
        for i in range(int(duration // 10)):
            list.append(melody_level_score(path, offset = i * 10))
        # print(np.sum(list).mean() / (9 * len(list)))
        if ((np.sum(list).mean() / (9 * len(list))) >= 0.7):
            return True, np.sum(list).mean(), 9 * len(list)
        else:
            return False, np.sum(list).mean(), 9 * len(list)
        # print("sum: ", np.sum(list).mean(), " total: ", 9 * len(list))
    else:
        result = melody_level_score(path)
        if result < 7:
            return False, result, 9
        return True, result, 9
        # print(result, "total: 9")

In [85]:
# a list of examples, including one recording audio, one downloaded song, and librosa example files
# './Noise.wav', librosa.ex('libri1'), librosa.ex('libri2'), librosa.ex('libri3'), librosa.ex('robin'),librosa.ex('humpback') are NOT music
examples = ['./Noise.wav', './Pull Me Up.wav', librosa.ex('libri1'), librosa.ex('libri2'), librosa.ex('libri3'), librosa.ex('fishin'), 
    librosa.ex('brahms'), librosa.ex('choice'), librosa.ex('robin'), librosa.ex('brahms'), librosa.ex('humpback'), librosa.ex('nutcracker'), 
    librosa.ex('pistachio'), librosa.ex('sweetwaltz'), librosa.ex('trumpet'), librosa.ex('vibeace')]
melody_result = []
for ex in examples:
    melody, score, total_score = checking_audio_melody(ex)
    melody_result.append(melody)
    print(ex[ex.rfind('\\'):], ":", score, "out of", total_score, melody)
print(melody_result)

v : 35.0 out of 90 False
v : 162.0 out of 198 True
\5703-47212-0000.ogg : 4 out of 9 False
\3436-172162-0000.ogg : 5 out of 9 False
\198-209-0000.ogg : 5 out of 9 False
\Karissa_Hobbs_-_Let's_Go_Fishin'.ogg : 112.0 out of 117 True
\Hungarian_Dance_number_5_-_Allegro_in_F_sharp_minor_(string_orchestra).ogg : 34.0 out of 36 True
\admiralbob77_-_Choice_-_Drum-bass.ogg : 7 out of 9 True
\456440__inspectorj__bird-whistling-robin-single-13.ogg : 6 out of 9 False
\Hungarian_Dance_number_5_-_Allegro_in_F_sharp_minor_(string_orchestra).ogg : 34.0 out of 36 True


  pitches = librosa.pitch_tuning(pitches)


\glacier-bay-humpback.ogg : 26.0 out of 54 False
\Kevin_MacLeod_-_P_I_Tchaikovsky_Dance_of_the_Sugar_Plum_Fairy.ogg : 99.0 out of 99 True
\442789__lena-orsa__happy-music-pistachio-ice-cream-ragtime.ogg : 60.0 out of 63 True
\147793__setuniman__sweet-waltz-0i-22mi.ogg : 36.0 out of 36 True
\sorohanro_-_solo-trumpet-06.ogg : 7 out of 9 True
\Kevin_MacLeod_-_Vibe_Ace.ogg : 50.0 out of 54 True
[False, True, False, False, False, True, True, True, False, True, False, True, True, True, True, True]


In [None]:
# examples = ['./New Recording 7.wav', './Pull Me Up.wav', librosa.ex('libri1'), librosa.ex('libri2'), librosa.ex('libri3'), librosa.ex('fishin'), 
#     librosa.ex('brahms'), librosa.ex('choice'), librosa.ex('robin'), librosa.ex('brahms'), librosa.ex('humpback'), librosa.ex('nutcracker'), 
#     librosa.ex('pistachio'), librosa.ex('sweetwaltz'), librosa.ex('trumpet'), librosa.ex('vibeace')]
# for ex in examples:
#     y, sr = librosa.load(ex)
#     chroma = librosa.feature.chroma_stft(y = y, sr = sr)
    

In [10]:
# not usable right now
def main_check(path):
    # check duration, should be 1:30 - 5:30
    y, sr = librosa.load(path)
    duration = librosa.get_duration(y = y, sr = sr)
    if duration < 90 or duration > 330:
        return False
    # stereo check, posiibly not working
    # if (y.shape[0] == 1 or y.shape[0].all() == y.shape[1].all()):
    #     return False
    # music check
    count = 0
    if checking_audio_melody(path):
        count += 1
    
    