# 方法定义

## 1.1 Midi可视化代码

In [None]:
import moviepy.editor as me
import mido
import numpy as np
from scipykit.mtp_initializer import disp, mfigure, get_cmap
import matplotlib as mtp
import matplotlib.style as mtps
import matplotlib.pyplot as plt
import re
# mtp.use('agg')
# %matplotlib inline
mtps.use('fast')
plt.rcParams['font.family'] = 'Sarasa Mono SC'
font_folder = r'C:/Users/Gray/AppData/Local/Microsoft/Windows/Fonts/'
plt.ioff()

In [None]:
# 脚本文案处理

class Script():
    pass
class Paragraph(Script):
    def __init__(self,para_data:str,start_bar=0):
        location,instruments,names,midi_selector,*_ = para_data[0].split('/')+[None,None,]
        selected_period = [float(s)-start_bar for s in location.split('~')]
        selected_channels = [s for s in instruments.split(';')]
        if (names != None) and (names != ''):
            selected_names = [s for s in names.split(';')]
        else:
            selected_names = None
        if (midi_selector != None) and (midi_selector != ''):
            midi_period = [float(s)-start_bar for s in midi_selector.split('~')]
        else:
            midi_period = selected_period
        
        self.selected_period = selected_period
        self.selected_channels = selected_channels
        self.selected_names = selected_names
        self.midi_period = midi_period
        self.text = para_data[1]
        print(selected_period,selected_channels,selected_names,midi_period)
        print(self.text)

class Script():

    def __init__(self,file_path:str):
        with open(file_path, 'r', encoding='utf-8') as f:
            data = f.read()
        
        metafile = re.search(r'#METAFILE\n(.+?)\n',data).group(1).split(',')
        self.steps_per_beat = int(metafile[0])
        self.beats_per_bar = int(metafile[1])
        self.beats_per_minute = int(metafile[2])
        self.Begin_Time = float(metafile[3])
        self.Start_Bar = float(metafile[4])
        self.Count_Num = int(metafile[5])

        self.title = re.search(r'#TITLE\n((.|\n)+?)\n\n',data).group(1)
        self.saying = re.search(r'#SAYING\n((.|\n)+?)\n\n',data).group(1)

        self.arr_para:list[Paragraph] = []
        for m in re.findall(r'# +(.+?)\n((.|\n)+?)\n\n',data):
            para = Paragraph(m,self.Start_Bar)
            self.arr_para.append(para)

In [None]:
# 打包选取部分midi可视化功能

class MidiVisualizer():

    class Note():
        pass
    
    def __init__(self,midifile:mido.MidiFile,script:Script,steps_per_beat=4,beats_per_bar=4,beats_per_minute=120):
        self.mid = midifile
        self.steps_per_beat = script.steps_per_beat
        self.beats_per_bar = script.beats_per_bar
        self.beats_per_minute = script.beats_per_minute
        self.timebase = self.mid.ticks_per_beat
        self.Begin_Time = script.Begin_Time
        self.Start_Bar = script.Start_Bar
        self.Count_Num = script.Count_Num

        self.arr_keyboard_white = (np.arange(2,9).reshape(-1,1)@np.ones(7).reshape(1,-1)*12+np.array([0,2,4,5,7,9,11])).reshape(-1)
        self.arr_keyboard_black = (np.arange(2,9).reshape(-1,1)@np.ones(5).reshape(1,-1)*12+np.array([1,3,6,8,10])).reshape(-1)
        self.arr_keyboard_white_block_1 = (np.arange(2,9).reshape(-1,1)@np.ones(1).reshape(1,-1)*12+np.array([0.25])).reshape(-1)
        self.arr_keyboard_white_block_2 = (np.arange(2,9).reshape(-1,1)@np.ones(1).reshape(1,-1)*12+np.array([2])).reshape(-1)
        self.arr_keyboard_white_block_3 = (np.arange(2,9).reshape(-1,1)@np.ones(1).reshape(1,-1)*12+np.array([8])).reshape(-1)

        self.color_bg = (49/256,56/256,62/256)
        self.color_black_key = (0.23,0.23,0.23)
        self.color_white_key = (0.9,0.9,0.9)
        self.color_white_c = (0.75,0.75,0.75)
        self.color_white_roll = (0.5,0.5,0.5,0.1) # (0.5,0.5,0.5,0.1)
        self.color_black_roll = (0.2,0.2,0.2,0.0) # (0.2,0.2,0.2,0.1)
        self.color_edge = (0.5,0.5,0.5)
        self.color_note = (0.9,0.9,0.9)
        self.arr_color_note = [(0.9,0.9,0.9),(0.75,0.75,0.75),(0.8,0.8,0.8),(0.75,0.75,0.75)]
        self.color_note_edge = (0.6,0.6,0.6,0.5)
        self.color_line = (0.1,0.5,0.7)

    def getBarTime(self,t_tick:int) -> float:
        return t_tick/(self.timebase/4*self.steps_per_beat*self.beats_per_bar)
    
    def getTrueTime(self,t_bar:float) ->float:
        return t_bar*self.beats_per_bar/self.beats_per_minute*60
    
    def getMovTime(self,t_bar:float) ->float:
        return t_bar*self.beats_per_bar/self.beats_per_minute*60+self.Begin_Time

    def maskNote(self,mtrack:list[Note],channel_names:list[str]) -> dict[str,np.ndarray]:
        arr_note = np.zeros((len(mtrack),5),dtype='float')
        for i,note in enumerate(mtrack):
            for n,name in enumerate(channel_names):
                if note.name == name:
                    arr_note[i] = [note.pitch,note.duration,note.start,note.velocity,n]
        return arr_note

    def generateMidiVC_fast(
            self,
            selected_channels:list[str],
            selected_period:list[float],
            midi_period:list[float]|None=None,
            channel_names:list[str]|None=None,
            selected_pitch:list[int]|None=None,
            dpi:int = 600,
            omit_skills:bool=True
        ) -> me.VideoClip:
        # 入参：selected_channels,selected_period,
        # 出参：有背景的vedioClip，并且已经放在合适的位置

        time_offset = selected_period[0]
        if midi_period != None:
            selected_period = midi_period
        mtrack = []
        if channel_names == None:
            channel_names = selected_channels
        for channel in range(len(selected_channels)):
            for mid_track in self.mid.tracks:
                if mid_track[0].type == 'track_name' and mid_track[0].name == selected_channels[channel]:
                    # 进入正确的轨道
                    note_dic = {}
                    t_tick = 0
                    for msg in mid_track:
                        t_tick += msg.time
                        if selected_period[0]<=self.getBarTime(t_tick)<=selected_period[1] :
                            if msg.type == 'note_on':
                                if (omit_skills and msg.note>=40) or (not omit_skills):
                                    note_dic[msg.note] = Note(msg.note,msg.velocity,self.getBarTime(t_tick),channel=channel)
                            elif msg.type == 'note_off' and note_dic.__contains__(msg.note):
                                note_dic[msg.note].name = channel_names[channel]
                                note_dic[msg.note].setEnd(self.getBarTime(t_tick))
                                mtrack.append(note_dic[msg.note])
                                note_dic.pop(msg.note)

        if len(mtrack) > 0:
            arr_note = self.maskNote(mtrack,channel_names)
        else:
            arr_note = np.array([[60,0,0,0,0]])
        
        duration = self.getTrueTime(selected_period[1]-selected_period[0])
        print(f'selected_period = {selected_period} | duration = {duration:.2f}')
        
        xlim = selected_period
        if selected_pitch == None:
            ylim = [
                min([arr_note[:,0].min()-4,arr_note[:,0].mean()-10]),
                max([arr_note[:,0].max()+4,arr_note[:,0].mean()+10])
            ]
        else:
            ylim = selected_pitch

        # 绘图部分，先静后动
        plt.close()
        fig = plt.figure(figsize=(6.4,1.8),dpi=dpi,
                    facecolor=self.color_bg
                    )
        ax_bg = fig.add_axes([0,0.1,1,0.9])
        ax_fg = fig.add_axes([0.05,0.1,0.95,0.9])

        ax_bg.barh(self.arr_keyboard_white,width=1,height=1,color=self.color_white_roll)
        ax_bg.barh(self.arr_keyboard_black,width=1,height=1,color=self.color_black_roll)
        ax_bg.barh(self.arr_keyboard_white_block_2,width=0.05,height=5,color=self.color_white_key,ec=self.color_edge,lw=0.2)
        ax_bg.barh(self.arr_keyboard_white_block_3,width=0.05,height=7,color=self.color_white_key,ec=self.color_edge,lw=0.2)
        ax_bg.barh(self.arr_keyboard_white_block_1,width=0.05,height=1.5,color=self.color_white_c,ec=self.color_edge,lw=0.2)
        ax_bg.barh(self.arr_keyboard_black,width=0.03,height=1,color=self.color_black_key)
        for c in self.arr_keyboard_white_block_1:
            if ylim[0]<c<ylim[1]:
                ax_bg.text(x=0.048,y=c,s=f'C{c//12:.0f}',fontsize=110/(ylim[1]-ylim[0]),va='center',ha='right')
        ax_bg.set_xticks([])
        ax_bg.set_yticks([])
        ax_bg.set_xlim([0,1])
        ax_bg.set_ylim(ylim)
        # [x.set_visible(False) for x in ax_bg.spines.values()]
        ax_bg.axis('off')

        ax_fg.set_facecolor((0,0,0,0))
        for n,name in enumerate(channel_names):
            channel_note = arr_note[arr_note[:,4]==n]
            ax_fg.barh(
                channel_note[:,0],width=channel_note[:,1],height=1,left=channel_note[:,2],
                color=self.arr_color_note[n],ec=self.color_note_edge,
                lw=0.4,zorder=3,label=name
            )
        ax_fg.set_xlim(xlim)
        ax_fg.set_xticks(np.arange(np.floor(xlim[0]),np.floor(xlim[1])))
        ax_fg.set_xticks(np.arange(np.floor(xlim[0]),np.floor(xlim[1])+0.5,1/self.beats_per_bar),minor=True)
        ax_fg.xaxis.set_tick_params(which='both',colors=self.color_white_key,direction='in')
        ax_fg.set_yticks([])
        ax_fg.set_ylim(ylim)
        ax_fg.grid(which='minor',ls=':',lw=0.2)
        ax_fg.grid(which='major',ls='-',lw=0.1)
        [x.set_visible(False) for x in ax_fg.spines.values()]


        art_line, = ax_fg.plot([],[],color=self.color_line)
        def makeFrame(t):
            t_bar = t/60*self.beats_per_minute/self.beats_per_bar+time_offset
            art_line.set_data([t_bar,t_bar],ylim)
            fig.canvas.draw()
            frame = np.array(fig.canvas.buffer_rgba())
            # display(fig)

            # ax_bg.clear()
            # ax_fg.clear()
            return frame[:,:,:3]
        mov = me.VideoClip(makeFrame,duration=duration)
        return mov
        # display(mov.ipython_display(fps=12))

    def generateMidiVC(
            self,
            selected_channels:list[str],
            selected_period:list[float],
            channel_names:list[str]|None=None,
            selected_pitch:list[int]|None=None,
            dpi:int = 600,
            omit_skills:bool=True
        ) -> me.VideoClip:
        # 入参：selected_channels,selected_period,
        # 出参：有背景的vedioClip，并且已经放在合适的位置

        mtrack = []
        for channel in range(len(selected_channels)):
            for mid_track in self.mid.tracks:
                if mid_track[0].type == 'track_name' and mid_track[0].name == selected_channels[channel]:
                    # 进入正确的轨道
                    note_dic = {}
                    t_tick = 0
                    for msg in mid_track:
                        t_tick += msg.time
                        if selected_period[0]<=self.getBarTime(t_tick)<=selected_period[1] :
                            if msg.type == 'note_on':
                                if (omit_skills and msg.note>=36) or (not omit_skills):
                                    note_dic[msg.note] = Note(msg.note,msg.velocity,self.getBarTime(t_tick),channel=channel)
                            elif msg.type == 'note_off' and note_dic.__contains__(msg.note):
                                if channel_names != None:
                                    note_dic[msg.note].name = channel_names[channel]
                                note_dic[msg.note].setEnd(self.getBarTime(t_tick))
                                mtrack.append(note_dic[msg.note])
                                note_dic.pop(msg.note)

        if len(mtrack) > 0:
            arr_note = self.maskNote(mtrack)
        else:
            arr_note = np.array([[60,0,0,0]])

        # 绘图部分
        plt.close()
        fig = plt.figure(figsize=(6.4,1.8),dpi=dpi,
                    facecolor=self.color_bg
                    )
        ax_bg = fig.add_axes([0,0.1,1,0.9])
        ax_fg = fig.add_axes([0.05,0.1,0.95,0.9])

        duration = self.getTrueTime(selected_period[1]-selected_period[0])
        print(duration)

        def makeFrame(t):
            # xlen = selected_period[1]-selected_period[0]
            xlim = selected_period
            if selected_pitch == None:
                ylim = [
                    min([arr_note[:,0].min()-4,arr_note[:,0].mean()-10]),
                    max([arr_note[:,0].max()+4,arr_note[:,0].mean()+10])
                ]
            else:
                ylim = selected_pitch

            ax_bg.barh(self.arr_keyboard_white,width=1,height=1,color=self.color_white_roll)
            ax_bg.barh(self.arr_keyboard_black,width=1,height=1,color=self.color_black_roll)
            ax_bg.barh(self.arr_keyboard_white_block_2,width=0.05,height=5,color=self.color_white_key,ec=self.color_edge,lw=0.2)
            ax_bg.barh(self.arr_keyboard_white_block_3,width=0.05,height=7,color=self.color_white_key,ec=self.color_edge,lw=0.2)
            ax_bg.barh(self.arr_keyboard_white_block_1,width=0.05,height=1.5,color=self.color_white_c,ec=self.color_edge,lw=0.2)
            ax_bg.barh(self.arr_keyboard_black,width=0.03,height=1,color=self.color_black_key)
            for c in self.arr_keyboard_white_block_1:
                if ylim[0]<c<ylim[1]:
                    ax_bg.text(x=0.048,y=c,s=f'C{c//12:.0f}',fontsize=110/(ylim[1]-ylim[0]),va='center',ha='right')
            ax_bg.set_xticks([])
            ax_bg.set_yticks([])
            ax_bg.set_xlim([0,1])
            ax_bg.set_ylim(ylim)
            # [x.set_visible(False) for x in ax_bg.spines.values()]
            ax_bg.axis('off')

            ax_fg.set_facecolor((0,0,0,0))
            ax_fg.barh(arr_note[:,0],width=arr_note[:,1],height=1,left=arr_note[:,2],color=self.color_note,ec=self.color_note_edge,lw=0.2,zorder=3)
            ax_fg.set_xlim(xlim)
            ax_fg.set_xticks(np.arange(np.floor(xlim[0]),np.floor(xlim[1])))
            ax_fg.set_xticks(np.arange(np.floor(xlim[0]),np.floor(xlim[1])+0.5,1/self.beats_per_bar),minor=True)
            ax_fg.xaxis.set_tick_params(which='both',colors=self.color_white_key,direction='in')
            ax_fg.set_yticks([])
            ax_fg.set_ylim(ylim)
            ax_fg.grid(which='minor',ls=':',lw=0.2)
            ax_fg.grid(which='major',ls='-',lw=0.1)
            [x.set_visible(False) for x in ax_fg.spines.values()]

            t_bar = t/60*self.beats_per_minute/self.beats_per_bar+xlim[0]
            ax_fg.plot([t_bar,t_bar],ylim,color=self.color_line)

            fig.canvas.draw()
            frame = np.array(fig.canvas.buffer_rgba())
            # display(fig)

            ax_bg.clear()
            ax_fg.clear()
            return frame[:,:,:3]
        mov = me.VideoClip(makeFrame,duration=duration)
        return mov
        # display(mov.ipython_display(fps=12))

    def showFrame(self,clip:me.VideoClip,t_bar:float=0,figsize=(6.4,3.2),Begin_Time:float|None=None,dpi:int=600):
        if Begin_Time == None: Begin_Time = self.Begin_Time
        t_time = self.getTrueTime(t_bar)+Begin_Time
        plt.close()
        fig = plt.figure(figsize=figsize,dpi=dpi)
        fig.set_facecolor((1,1,1,0))
        ax = fig.add_axes([0,0,1,1])
        frame = clip.get_frame(t_time)
        ax.imshow(frame)
        ax.axis('off')
        display(fig)
        return frame

class Note(MidiVisualizer):

    def __init__(self,pitch:int=60,velocity:int=100,start:float=0,duration:None|float=None,channel:int=0):
        self.pitch = pitch # 音高
        self.velocity = velocity # 力度（速度）
        self.start = start # 起始时间，以bar为单位
        if duration != None:
            self.duration = duration/(self.steps_per_beat*self.beats_per_bar) # 持续时间，以bar为单位，输入时采用step单位
            self.end = self.start+self.duration
        else:
            self.duration = None
            self.end = None
        self.channel = channel
        self.name = None
    
    def setDuration(self,duration:float):
        self.duration = duration/(self.steps_per_beat*self.beats_per_bar)
        self.end = self.start+self.duration

    def setEnd(self,end:float):
        self.end = end
        self.duration = (end-self.start)

# 2 正式编辑

In [None]:
# 视频分辨率相关参数
h,w = 1080,2160

In [None]:
# 定义音频文件和背景板

ac_au = me.AudioFileClip('./音频.mp3')
vc_bg = me.ImageClip(
    np.ones((h,w,3))*np.array([49,56,62]),
)

In [None]:
script = Script('./脚本.txt')
mid = mido.MidiFile('./音频.mid')
print(script.steps_per_beat,script.beats_per_bar,script.beats_per_minute)
mv = MidiVisualizer(mid,script)


In [None]:
ac_au.duration

In [None]:
arr_clip_bg = [
    vc_bg.set_audio(ac_au.set_start(mv.Begin_Time)).set_duration(ac_au.duration+mv.Begin_Time),
]
# arr_clip_bg = [
#     vc_bg.set_duration(248+mv.Begin_Time),
# ]

In [None]:
print(arr_clip_bg[0].duration)
mv.Begin_Time

In [None]:
arr_clip_Begin = []

# 标题与开始

tc_title = me.TextClip(
    script.title,
    color = '#EAEAEA',
    font = font_folder+'sarasa-mono-sc-regular.ttf',
    fontsize = 96,
    align='center',
)

tc_saying = me.TextClip(
    script.saying,
    color = '#C1C1C1',
    font = font_folder+'sarasa-mono-sc-regular.ttf',
    fontsize = 48,
    align='center',
)

tc_name = me.TextClip(
    '—— Gray Frezicical',
    color = '#C1C1C1',
    font = font_folder+'sarasa-mono-sc-regular.ttf',
    fontsize = 48,
    align='center',
)

arr_clip_Begin += [
    tc_title.set_position(('center',0.3),relative=True).set_duration(mv.getMovTime(4/4)),
    # 标题持续到影片第一小节结束，包含begin_time
    tc_saying.set_position(('center',0.67),relative=True).set_duration(mv.getMovTime((4-mv.Count_Num)/4)),
    tc_name.set_position((0.65,0.8),relative=True).set_duration(mv.getMovTime((4-mv.Count_Num)/4)),
]

In [None]:
arr_clip_Count = []

tc_circle = me.TextClip(
    '●',
    color = '#222222',
    font = font_folder+'sarasa-mono-sc-regular.ttf',
    fontsize = 400,
    align='center',
)

arr_clip_Count += [tc_circle.set_position(('center',0.4),relative=True).set_duration(mv.getTrueTime(mv.Count_Num/4)).set_start(mv.getMovTime((4-mv.Count_Num)/4)),]

for i in range(mv.Count_Num,0,-1):
    tc_count_down = me.TextClip(
        f'{i}',
        color = '#CCCCCC',
        font = font_folder+'sarasa-mono-sc-regular.ttf',
        fontsize = 106,
        align='center',
    )
    arr_clip_Count += [
        tc_count_down.set_position(('center',0.566),relative=True).set_duration(mv.getTrueTime(1/4)).set_start(mv.getMovTime((4-i)/4)),
    ]

In [None]:
arr_clip_Para = []

for para in script.arr_para:

    if para.selected_channels[0] != 'omit':
        vc_mv = mv.generateMidiVC_fast(
            para.selected_channels,
            para.selected_period,
            para.midi_period,
            para.selected_names,
            omit_skills=False,
            dpi=282 # 600*0.47
        )
        arr_clip_Para += [
            vc_mv.set_position((0.095,0.47),relative=True).set_duration(mv.getTrueTime(para.selected_period[1]-para.selected_period[0])).set_start(mv.getMovTime(para.selected_period[0])),
        ]
    tc_anno = me.TextClip(
        para.text,
        color = '#EAEAEA',
        font = font_folder+'sarasa-mono-sc-regular.ttf',
        fontsize = 64,
        align='West',
    )

    arr_clip_Para += [
        tc_anno.set_position((0.115,0.13),relative=True).set_duration(mv.getTrueTime(para.selected_period[1]-para.selected_period[0])).set_start(mv.getMovTime(para.selected_period[0])),
    ]

In [None]:
if script.title != 'omit':
    arr_clip = arr_clip_bg+arr_clip_Begin+arr_clip_Count+arr_clip_Para
else:
    arr_clip = arr_clip_bg+arr_clip_Para
mov = me.CompositeVideoClip(arr_clip,size=(w,h))
# mv.showFrame(mov,t_bar=23,Begin_Time=1,dpi=510)
pass

In [None]:
mv.showFrame(mov,t_bar=1,dpi=510)
pass

In [None]:
# # display(mov.subclip(mv.getMovTime(0),mv.getMovTime(4)).ipython_display(fps=4,threads=8))
# display(mov.subclip(mv.getMovTime(62.5),mv.getMovTime(68)).ipython_display(fps=4,threads=8))
mov.subclip(0,mv.getMovTime(68)).write_videofile('./华尔兹管弦.mp4',audio_bitrate='192k',fps=30,threads=8)
pass