In [1]:
import json
BLANK_DATA_FILE = "./Data/JSON/BlankData.json"

def GetData():
    with open(BLANK_DATA_FILE) as fp:
        data = json.load(fp)
    return data

In [2]:
# 制作索引
def GetIndex(data):
    index = {}
    for val in data:
        index[(int(val['songid']), val['id'])] = val
    return index

In [3]:
def CheckBlankExist(index, musicId, blankId):
    return index.get((musicId, blankId)) is not None

In [4]:
def GetLength(index, songId, blankId):
    if not CheckBlankExist(index, songId, blankId):
        # 第一段的前一段和最后一段的后一段长度视为 1 (反正就是很短)
        return 1
    else:
        starttime = index[(songId, blankId)]['starttime']
        endtime   = index[(songId, blankId)]['endtime']
        return max(endtime - starttime, 0)

In [5]:
def GetEnd(index, songId, blankId):
    if not CheckBlankExist(index, songId, blankId):
        # 第一段的前一段和最后一段的后一段长度视为 1 (反正就是很短)
        return 1
    else:
        # starttime = index[(songId, blankId)]['starttime']
        endtime   = index[(songId, blankId)]['endtime']
        return endtime

In [6]:
# 计算歌曲总长度
MUSIC_LENGTH_CACHE = {}
def GetTotalTimeForSong(index, songId):
    global MUSIC_LENGTH_CACHE

    # 未在缓存中，重新计算音乐总长度
    if MUSIC_LENGTH_CACHE.get(songId) is None:
        blankId = 0
        ans = 0
        while CheckBlankExist(index,songId, blankId):
            ans     = max(ans, GetEnd(index, songId, blankId))
            blankId += 1
        MUSIC_LENGTH_CACHE[songId] = ans
        assert ans > 0, "ERROR WHEN CALCULATE THE LENGTH OF SONG_ID: %d" % songId
    
    return MUSIC_LENGTH_CACHE[songId]

In [7]:
def GetFeatures(index, val):
    songId  = int(val['songid'])
    blankId = val['id']

    import math
    # 片段长度用对数长度衡量

    totalTime = GetTotalTimeForSong(index, songId)
    
    lastLen  = math.log(GetLength(index, songId, blankId - 1))
    beginPos = val['starttime'] / totalTime
    nowLen   = math.log(GetLength(index, songId, blankId + 0))
    endnPos = val['endtime'] / totalTime
    nextLen  = math.log(GetLength(index, songId, blankId + 1))

    isShort = (GetLength(index, songId, blankId + 0) < 100)
    return [lastLen, beginPos, nowLen, endnPos, nextLen], isShort

In [8]:
def GetDataXy(data, deleteEnd = True, SelectAll = True, musicIdMin = 3, musicIdMax = 26):
    X_all = []
    y_all = []

    # 制作索引
    index = GetIndex(data)

    for musicId in range(musicIdMin, musicIdMax + 1):
        blankId = 0
        while CheckBlankExist(index, musicId, blankId):
            val = index[(musicId, blankId)]
            blankId += 1

            # 不要将最后一列加入到训练集合中
            if not deleteEnd or CheckBlankExist(index, musicId, blankId):
                x_now, isShort = GetFeatures(index, val)
                if not isShort or SelectAll:
                    X_all.append(x_now)
                    y_all.append(val['cut'])

    import numpy as np
    return np.array(X_all), np.array(y_all)

In [9]:
def GetSvm():
    data = GetData()
    X, y = GetDataXy(data, deleteEnd = True)

    from sklearn.svm import SVC
    svm = SVC(kernel='rbf', C=30, gamma=0.1).fit(X, y)

    return svm

In [10]:
def GetSolveName(songid: str):
    if type(songid) == int:
        songid = "%04d" % songid

    assert len(songid) == 4
    return "./Data/SOLVE/%s.solve.json" % songid

In [11]:
import math
def GetSongFeatureBySongId(songId):
    solveFileName = GetSolveName(songId)

    with open(solveFileName) as fp:
        solveData = json.load(fp)

    totalTime = 0
    X = []
    for blank in solveData:
        totalTime = max(totalTime, blank['endtime'])

        beginPos = blank['starttime']
        nowLen   = math.log(max(blank['endtime'] - blank['starttime'], 1))
        endPos = blank['endtime']

        dataNow = [ 0, beginPos, nowLen, endPos, 0] # beginPos 和 endPos 需要后期缩放
        X.append(dataNow) # lastLen， nextLen 需要一会填充

    assert totalTime > 0

    for i in range(len(X)):
        X[i][0] = 0 if i == len(X) - 1 else X[i+1][2]
        X[i][1] /= totalTime
        X[i][3] /= totalTime
        X[i][4] = 0 if i == 0 else X[i-1][2]

    import numpy as np
    return np.array(X)


In [12]:
def GetSongBlanksBySongId(songId):
    solveFileName = GetSolveName(songId)

    with open(solveFileName) as fp:
        solveData = json.load(fp)
    
    return solveData

In [13]:
def GetTimeFromPos(pos):
    pos = int(pos + 0.5)
    
    minute = pos // 60
    second = pos % 60

    return "%02d:%02d" % (minute, second)

In [14]:
def FitSong(songId: int):
    print("FIT SONG %d ..." % songId)
    svm = GetSvm()

    X = GetSongFeatureBySongId(songId)
    blanks = GetSongBlanksBySongId(songId)

    # 预测是否在右端点处分段
    y = svm.predict(X)
    assert len(y) == len(blanks)

    for i in range(len(y)):
        if y[i] == 1:
            pos = max(blanks[i]['endtime'] / 200 - 0.5, 0)
            tnow = GetTimeFromPos(pos)
            print(tnow)
    print("DONE.")

In [17]:
FitSong(24)

FIT SONG 24 ...
00:01
00:13
01:05
01:32
01:33
01:45
02:54
04:04
04:17
DONE.
