In [None]:
import glob
import numpy as np
import librosa
import matplotlib.pyplot as plt
from concurrent.futures import ThreadPoolExecutor

In [None]:
class analyze_music:
# analysis_musicの中のなくては動かないもの
    # サンプリングレートと窓関数のサイズを設定(libeosarの名前より参照)
    sr = 44100
    n_fft = 2048
    frame_length = 65536 # 特徴量を１つ算出するのに使うサンプル数
    hop_length   = 1638 # 何サンプルずらして特徴量を算出するかを決める変数
    # スペクトラルロールオフを計算する割合（通常は0.85や0.95が使われる）
    roll_percent = 0.85
    def __init__(self, music_path):
        self.music_path = music_path
        self.y, analyze_music.sr = librosa.load(self.music_path, sr=analyze_music.sr, mono=True ,dtype=np.float64)
        self.S = np.abs(librosa.stft(self.y))
# analysis_musicの中の機能や結果
    def title(self):
        music_title = self.music_path.split("/")[-1]
        return music_title
    # ノイズの多さについて調べる
    def zero_crossings_rate(self):
        zcr = librosa.feature.zero_crossing_rate(self.y, pad=False, frame_length=analyze_music.frame_length, hop_length=analyze_music.hop_length)[0]
        print(zcr.shape)
        return zcr
    def spectral_centroid(self):
        # スペクトル重心：短時間ごとの音色の煌びやかさを算出
        sc   = librosa.feature.spectral_centroid(y=self.y,S=self.S, n_fft=analyze_music.frame_length, hop_length=analyze_music.hop_length)[0]
        return sc
    def spectral_rolloff(self):
        # スペクトラルロールオフ
        sro = librosa.feature.spectral_rolloff(y=self.y,S=self.S, sr=analyze_music.sr, hop_length=analyze_music.hop_length)[0]
        return sro
    def mfccs(self):
        # メル周波数スペクトル
        mfccs = librosa.feature.mfcc(y=self.y,S=self.S, sr=analyze_music.sr)
        return mfccs

In [None]:
def collect_music(music_path):
    sc_row = np.array([])
    sro_row = np.array([])
    for i in range(len(music_path)):
        a = analyze_music(music_path[i])
        title = a.title()
        sc = a.spectral_centroid()
        sro = a.spectral_rolloff()
        sc_row = np.append(sc_row, sc)
        sro_row = np.append(sr_row, sro)
        # 使っていない部分
        # mfccs = a.mfccs()
        # zcr =a.zero_crossings_rate()
        # with open("./csv/{}.csv".format(a.title()), mode='w') as f:
        #     w = csv.writer(f)
        #     w.writerow(sc)
        #     w.writerow(sro)
        #     w.writerow(mfccs)
    return sc_row, sroo_row

In [None]:
# 200曲から特徴量を抽出
music_path = glob.glob(r"曲のパス")
random_music = np.random.choice(music_path, 200, replace=False)
print(len(random_music))

f = open(r'.\music_rand.txt', 'w', encoding='utf-8_sig')
for i in range(len(random_music)):
    f.writelines(random_music[i] + '\n')

In [None]:
with ThreadPoolExecutor(max_workers=1000) as executor:
    feature = executor.submit(collect_music,random_music)
    sc_row, sro_row = feature.result()

In [None]:
sc_row_list = str(sc_row.tolist())
sro_row_list = str(sro_row.tolist())
with open(r'.\sro_row_list.txt', 'w', encoding='utf-8_sig') as f:
    for i in range(len(sro_row_list)):
        f.writelines(sro_row_list[i])

In [None]:
plt.hist(sc_row, bins=1000, range=(1, sc_row.max()))
plt.show()
plt.hist(sro_row, bins=1000, range=(1, sro_row.max()))
plt.show()    


In [None]:
plt.scatter(sc_row, sro_row)

In [None]:
def filtered(filtered):
    top = np.percentile(filtered, 95)
    bottom = np.percentile(filtered, 5)
    filtered = filtered[(filtered > bottom) & (filtered < top)]
    return filtered


In [None]:
sc_filtered = filtered(sc_row)
sro_filtered = filtered(sro_row)

In [None]:
print(sc_filtered.min(), sc_filtered.max())
print(sro_filtered.min(), sro_filtered.max())
plt.hist(sc_filtered, bins=100, range=(sc_filtered.min(), sc_row.max()))
plt.show()
plt.hist(sro_filtered, bins=100, range=(sro_filtered.min(), sro_row.max()))
plt.show()    
