In [2]:
# データセット作成

!pip install scipy

window_size = 1024
window_num_per_clip = 40
clip_size = window_size * window_num_per_clip

from scipy.io.wavfile import read
import glob
import os
import numpy as np

def standardization(x):
    ret = np.log(x + 1) / np.log(np.finfo(np.float32).max)
    if (np.sum(ret) == 0):
        return ret
    else:
        return ret / np.sum(ret)

if os.path.isfile("../data/arrays/clips.npy"):
    print("clips.npy exists.")
    clips = np.load("../data/arrays/clips.npy")
else:
    files = glob.glob("../data/wav44100/*")
    #files = ["../data/wav44100/3DEmbodimentFromLines.wav"]
    raw_data_list = [read(file)[1] for file in files]

    clips = np.zeros((0, clip_size), dtype=np.int16)
    for raw_data in raw_data_list:
        tmp = [raw_data[i:i + clip_size] for i in range(0, len(raw_data) - clip_size, clip_size)]
        clips = np.vstack((clips, np.array(tmp)))
    np.save("../data/arrays/clips", clips)
    
print("clips.shape: " + str(clips.shape))
    
if os.path.isfile("../data/arrays/fft_log.npy"):
    print("fft_log.npy exists.")
    fft = np.load("../data/arrays/fft_log.npy")
else:
    fft = np.zeros((0, window_num_per_clip, window_size))
    num = 0
    for clip in clips:
        print("fft progress: clip " + str(num))
        
        fft_tmp = np.zeros((0, window_size))
        tmp = [clip[i:i + window_size] for i in range(0, len(clip) - window_size + 1, window_size)]
        for tmp2 in tmp:
            spectrum = np.abs(np.fft.fft(tmp2))
            spectrum = standardization(spectrum)
            fft_tmp = np.vstack((fft_tmp, spectrum))
        fft = np.vstack((fft, fft_tmp.reshape(1, window_num_per_clip, window_size)))
        num += 1

    np.save("../data/arrays/fft_log", fft)

print(fft.shape)
print(type(fft[0][0][0]))
print(clips.shape)

print(fft[0][0])

You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m
clips.npy exists.
clips.shape: (28576, 40960)
fft_log.npy exists.


KeyboardInterrupt: 

In [None]:
# 楽曲の出力

!pip install scikit-learn

import sklearn
from sklearn.metrics.pairwise import cosine_similarity
from scipy.io import wavfile

fft_non_zero = np.delete(fft, np.where(np.sum(fft[:,0,:], axis=1) < 50)[0], axis=0)
clips_non_zero = np.delete(clips, np.where(np.sum(fft[:,0,:], axis=1) < 50)[0], axis=0)

first_index = np.random.randint(0, len(fft_non_zero))
current_index = first_index
out = np.zeros((0, ), dtype=np.int16)

for i in range(10):
    nextFFT = fft_non_zero[current_index, -1, :]
    
    index = 0
    sim = np.zeros(len(fft_non_zero))
    for spectrum in fft_non_zero[:, 0, :]:
        sim[index] = cosine_similarity(np.array([nextFFT]), np.array([spectrum])) 
        index += 1
    
    sorted_indices = sorted(range(len(fft_non_zero)), key=lambda x: sim[x])
    current_index = sorted_indices[np.random.randint(0, 100)]
    
    out = np.hstack((out, clips_non_zero[current_index]))
    
wavfile.write("../data/out/fft_rank_1.wav", 44100, out)