<a href="https://colab.research.google.com/github/TakayukiNJ/vanpireBach/blob/main/vanpireBach.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ライブラリのインストール https://colab.research.google.com/notebooks/magenta/hello_magenta/hello_magenta.ipynb
!apt-get update -qq && apt-get install -qq libfluidsynth1 fluid-soundfont-gm build-essential libasound2-dev libjack-dev
!pip install -qU pyfluidsynth pretty_midi
!pip install -qU magenta

In [None]:
# ライブラリの導入 (GANSynthは音色を変えるAI) https://colab.research.google.com/notebooks/magenta/gansynth/gansynth_demo.ipynb
import numpy as np
import matplotlib.pyplot as plt
from google.colab import files
import tensorflow.compat.v1 as tf
import librosa

import magenta.music as mm
from magenta.models.gansynth.lib import flags as lib_flags
from magenta.models.gansynth.lib import generate_util as gu
from magenta.models.gansynth.lib import model as lib_model
from magenta.models.gansynth.lib import util
from note_seq.notebook_utils import colab_play as play

import note_seq

In [None]:
# 曲生成の設定
BATCH_SIZE = 16  # 一度に扱うデータ数
SR = 16000  # サンプリングレート

# 音声を処理する関数
def load_midi(midi_path, min_pitch=36, max_pitch=84):
  """Load midi as a notesequence."""
  midi_path = util.expand_path(midi_path)
  ns = note_seq.midi_file_to_sequence_proto(midi_path)
  pitches = np.array([n.pitch for n in ns.notes])
  velocities = np.array([n.velocity for n in ns.notes])
  start_times = np.array([n.start_time for n in ns.notes])
  end_times = np.array([n.end_time for n in ns.notes])
  valid = np.logical_and(pitches >= min_pitch, pitches <= max_pitch)
  notes = {'pitches': pitches[valid],
           'velocities': velocities[valid],
           'start_times': start_times[valid],
           'end_times': end_times[valid]}
  return ns, notes

def get_envelope(t_note_length, t_attack=0.010, t_release=0.3, sr=16000):
  """Create an attack sustain release amplitude envelope."""
  t_note_length = min(t_note_length, 3.0)
  i_attack = int(sr * t_attack)
  i_sustain = int(sr * t_note_length)
  i_release = int(sr * t_release)
  i_tot = i_sustain + i_release  # attack envelope doesn't add to sound length
  envelope = np.ones(i_tot)
  # Linear attack
  envelope[:i_attack] = np.linspace(0.0, 1.0, i_attack)
  # Linear release
  envelope[i_sustain:i_tot] = np.linspace(1.0, 0.0, i_release)
  return envelope

def combine_notes(audio_notes, start_times, end_times, velocities, sr=16000):
  """Combine audio from multiple notes into a single audio clip.

  Args:
    audio_notes: Array of audio [n_notes, audio_samples].
    start_times: Array of note starts in seconds [n_notes].
    end_times: Array of note ends in seconds [n_notes].
    sr: Integer, sample rate.

  Returns:
    audio_clip: Array of combined audio clip [audio_samples]
  """
  n_notes = len(audio_notes)
  clip_length = end_times.max() + 3.0
  audio_clip = np.zeros(int(clip_length) * sr)

  for t_start, t_end, vel, i in zip(start_times, end_times, velocities, range(n_notes)):
    # Generate an amplitude envelope
    t_note_length = t_end - t_start
    envelope = get_envelope(t_note_length)
    length = len(envelope)
    audio_note = audio_notes[i, :length] * envelope
    # Normalize
    audio_note /= audio_note.max()
    audio_note *= (vel / 127.0)
    # Add to clip buffer
    clip_start = int(t_start * sr)
    clip_end = clip_start + length
    audio_clip[clip_start:clip_end] += audio_note

  # Normalize
  audio_clip /= audio_clip.max()
  audio_clip /= 2.0
  return audio_clip

# Plotting tools
def specplot(audio_clip):
  p_min = np.min(36)
  p_max = np.max(84)
  f_min = librosa.midi_to_hz(p_min)
  f_max = 2 * librosa.midi_to_hz(p_max)
  octaves = int(np.ceil(np.log2(f_max) - np.log2(f_min)))
  bins_per_octave = 36
  n_bins = int(bins_per_octave * octaves)
  C = librosa.cqt(audio_clip, sr=SR, hop_length=2048, fmin=f_min, n_bins=n_bins, bins_per_octave=bins_per_octave)
  power = 10 * np.log10(np.abs(C)**2 + 1e-6)
  plt.matshow(power[::-1, 2:-2], aspect='auto', cmap=plt.cm.magma)
  plt.yticks([])
  plt.xticks([])

In [None]:
# 学習済みモデルを読み込み
tf.disable_v2_behavior()  # tensorflow2で1.xのコードを動かす
tf.reset_default_graph()  # tensorflowのグラフをリセット

model_dir = "gs://magentadata/models/gansynth/acoustic_only"
flags = lib_flags.Flags({
    "batch_size_schedule": [BATCH_SIZE],
    "tfds_data_dir": "gs://tfds-data/datasets",
})
model = lib_model.Model.load_from_path(model_dir, flags)

In [None]:
# ファイルの読み込み（ヴァンパイアのMIDIファイルをダウンロードして、colabにアップロードしてください）　https://otoiro.co.jp/special/
midi_path = "/content/Vampire_Melo_164BPM.mid"
ns, notes = load_midi(midi_path)

note_seq.plot_sequence(ns)
note_seq.play_sequence(ns, synth=note_seq.fluidsynth) 

In [None]:
# 音色を変更 https://youtu.be/cjNFTLG4EUQ
seconds_per_instrument = 5  # 楽器が切り替わる間隔

# 潜在変数がランダムにゆっくりと変化
z_instruments, t_instruments = gu.get_random_instruments(  # 潜在変数とその時間
    model,
    notes["end_times"][-1],
    secs_per_instrument=seconds_per_instrument)

# 各noteの潜在変数を取得
z_notes = gu.get_z_notes(notes["start_times"], z_instruments, t_instruments)

# 各ノートの音声を生成
audio_notes = model.generate_samples_from_z(z_notes, notes["pitches"])

# 1つの音声にまとめる
audio = combine_notes(
    audio_notes,
    notes["start_times"],
    notes["end_times"],
    notes["velocities"]
    )

specplot(audio)  # スペクトログラムの表示
play(audio, sample_rate=SR)

In [None]:
# 音声をwavデータに変換してダウンロード
file_name = "atashiVanpire.wav"
gu.save_wav(audio, file_name)
files.download(file_name)

In [None]:
# 変数 ns の中身が良い感じに使えそうなのを確認
print(ns)

In [None]:
# Polyphony RNN を使ってバッハ風の曲を作成 https://github.com/magenta/magenta/tree/main/magenta/models/polyphony_rnn
from magenta.models.performance_rnn import performance_sequence_generator
from magenta.models.shared import sequence_generator_bundle

# モデルの初期化
note_seq.notebook_utils.download_bundle("performance_with_dynamics.mag", "/models/")  # Bundle（.magファイル）をダウンロード
bundle = sequence_generator_bundle.read_bundle_file("/models/performance_with_dynamics.mag")  # Bundleの読み込み
generator_map = performance_sequence_generator.get_generator_map()
performance_rnn = generator_map["performance_with_dynamics"](checkpoint=None, bundle=bundle)  # 生成器の設定
performance_rnn.initialize()  # 初期化

In [None]:
from note_seq.protobuf import generator_pb2

total_time = 360 # 曲の長さ（秒）
temperature = 1 # 曲の「ランダム度合い」を決める定数

base_end_time = max(note.end_time for note in ns.notes)  #ベース曲の終了時刻

# 生成器に関する設定
generator_options = generator_pb2.GeneratorOptions()  # 生成器のオプション
generator_options.args["temperature"].float_value = temperature  # ランダム度合い
generator_options.generate_sections.add(
    start_time=base_end_time,  # 作曲開始時刻
    end_time=total_time)  # 作曲終了時刻

# 曲の生成
gen_seq = performance_rnn.generate(ns, generator_options)

note_seq.plot_sequence(gen_seq)  # NoteSequenceの可視化
note_seq.play_sequence(gen_seq, synth=note_seq.fluidsynth)  # NoteSequenceの再生