In [24]:
import json
BLANK_DATA_FILE = "./Data/JSON/BlankData.json"

def GetData():
    with open(BLANK_DATA_FILE) as fp:
        data = json.load(fp)
    return data

In [25]:
# 制作索引
def GetIndex(data):
    index = {}
    for val in data:
        index[(int(val['songid']), val['id'])] = val
    return index

In [26]:
def CheckBlankExist(index, musicId, blankId):
    return index.get((musicId, blankId)) is not None

In [27]:
def GetLength(index, songId, blankId):
    if not CheckBlankExist(index, songId, blankId):
        # 第一段的前一段和最后一段的后一段长度视为 1 (反正就是很短)
        return 1
    else:
        starttime = index[(songId, blankId)]['starttime']
        endtime   = index[(songId, blankId)]['endtime']
        return max(endtime - starttime, 0)

In [28]:
# 计算歌曲总长度
MUSIC_LENGTH_CACHE = {}
def GetTotalTimeForSong(index, songId):
    global MUSIC_LENGTH_CACHE

    # 未在缓存中，重新计算音乐总长度
    if MUSIC_LENGTH_CACHE.get(songId) is None:
        blankId = 0
        ans = 0
        while CheckBlankExist(index, songId, blankId):
            ans     += GetLength(index, songId, blankId)
            blankId += 1
        MUSIC_LENGTH_CACHE[songId] = ans
        assert ans > 0, "ERROR WHEN CALCULATE THE LENGTH OF SONG_ID: %d" % songId
    
    return MUSIC_LENGTH_CACHE[songId]

In [29]:
def GetFeatures(index, val):
    songId  = int(val['songid'])
    blankId = val['id']

    import math
    # 片段长度用对数长度衡量
    
    lastLen  = math.log(GetLength(index, songId, blankId - 1))
    beginPos = val['starttime'] / GetTotalTimeForSong(index, songId)
    nowLen   = math.log(GetLength(index, songId, blankId + 0))
    endnPos = val['endtime'] / GetTotalTimeForSong(index, songId)
    nextLen  = math.log(GetLength(index, songId, blankId + 1))

    isShort = (GetLength(index, songId, blankId + 0) < 100)
    return [lastLen, beginPos, nowLen, endnPos, nextLen], isShort

In [30]:
def GetDataXy(data, deleteEnd = True, SelectAll = True, musicIdMin = 3, musicIdMax = 26):
    X_all = []
    y_all = []

    # 制作索引
    index = GetIndex(data)

    for musicId in range(musicIdMin, musicIdMax + 1):
        blankId = 0
        while CheckBlankExist(index, musicId, blankId):
            val = index[(musicId, blankId)]
            blankId += 1

            # 不要将最后一列加入到训练集合中
            if not deleteEnd or CheckBlankExist(index, musicId, blankId):
                x_now, isShort = GetFeatures(index, val)
                if not isShort or SelectAll:
                    X_all.append(x_now)
                    y_all.append(val['cut'])

    import numpy as np
    return np.array(X_all), np.array(y_all)

In [19]:
def GetSvm():
    data = GetData()
    X, y = GetDataXy(data, deleteEnd = True)

    from sklearn.svm import SVC
    svm = SVC(kernel='rbf', C=30, gamma=0.1).fit(X, y)

    return svm

In [None]:
def FitSong(songId):
    svm = GetSvm()

    