In [1]:
import glob
import json
import math
import os
import re
import requests

# third-party libraries
import pandas as pd

pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000
pd.set_option('display.max_colwidth', -1)  # or 199

  pd.set_option('display.max_colwidth', -1)  # or 199


In [2]:
folders = r'D:\Desktop\music_score'
filenames = glob.glob(f'{folders}\\*\\*')
filenames

['D:\\Desktop\\music_score\\0026_01\\easy.txt',
 'D:\\Desktop\\music_score\\0026_01\\expert.txt',
 'D:\\Desktop\\music_score\\0026_01\\hard.txt',
 'D:\\Desktop\\music_score\\0026_01\\master.txt',
 'D:\\Desktop\\music_score\\0026_01\\normal.txt',
 'D:\\Desktop\\music_score\\0049_01\\easy #10.txt',
 'D:\\Desktop\\music_score\\0049_01\\expert #6.txt',
 'D:\\Desktop\\music_score\\0049_01\\hard #9.txt',
 'D:\\Desktop\\music_score\\0049_01\\master #11.txt',
 'D:\\Desktop\\music_score\\0049_01\\normal #7.txt',
 'D:\\Desktop\\music_score\\0066_01\\easy #17.txt',
 'D:\\Desktop\\music_score\\0066_01\\expert #12.txt',
 'D:\\Desktop\\music_score\\0066_01\\hard #14.txt',
 'D:\\Desktop\\music_score\\0066_01\\master #15.txt',
 'D:\\Desktop\\music_score\\0066_01\\normal #13.txt']

In [3]:
def is_valid_note(line):
    if not line:
        return False
    for c in line:
        if c.isdigit() or c in '#:abcdef':
            pass
        else:
            return False
    return True

In [4]:
def parse_notes(line):
    notes = []
    
    part_a, part_b = line[1:].split(':')
    
    # part_a parser
    measure = int(part_a[:3])
    note_class = int(part_a[3])
    start_pos = int(part_a[4], 16)
    if len(part_a) == 6:
        # the notes during holding period
        hold_id = int(part_a[5])
    else:
        hold_id = -1
    
    # part_b parser
    note_scaling = len(part_b) // 2
    for i in range(note_scaling):
        note_property, note_width = int(part_b[i*2]), int(part_b[i*2+1], 16)
        if note_property == note_width == 0:
            continue
        notes.append({
            'measure': measure,
            'note_class': note_class,
            'start_pos': start_pos,
            'property': note_property,
            'width': note_width,
            'scaling': note_scaling,
            'timestamp': i,
            'hold_id': hold_id
        })

    return notes

In [5]:
def filename_to_notes(filename):

    note_lines = []
    with open(filename, 'r') as f:
        for line in f:
            line = line.strip()
            if is_valid_note(line):
                note_lines.append(line)

    notes = []
    for line in note_lines:
        notes.extend(parse_notes(line))
    return notes

In [6]:
def preprocessing(df):
    df['real_timestamp'] = df['timestamp'] / df['scaling']
    df = df.drop_duplicates(['measure', 'note_class', 'start_pos', 'property', 'width', 'hold_id', 'real_timestamp'])
    return df

In [7]:
def discard_skill_notes(df):
    # if start_pos == 0 and class == 1 and property == 4 and width == 1,
    # or say, #xxx10:41 in the *.sus file, it's a skill note
    skill_note_indices = (df['note_class'] == 1) & \
                         (df['start_pos'] == 0) & \
                         (df['property'] == 4) & \
                         (df['width'] == 1)

    # extract skill notes, there should be six skill notes
    df_skill = df[skill_note_indices]
#     assert len(df_skill) == 6, len(df_skill)
    if len(df_skill) != 6:
        print(f'Warning: the number of skill notes of this score is {len(df_skill)}')

    # filter out skill notes
    df = df[~skill_note_indices]
    return df

In [8]:
def discard_fever_indicator_notes(df):
    # if start_pos == 15 and class == 1 and property in [1, 2] and width == 1,
    # or say, #xxx1f:11 or #xxx1f:21 in the *.sus file, it's a fever indicator note.
    # #xxx1f:11 is start point of fever period, #xxx1f:21 is end point of fever period
    fever_indicator_indices = (df['note_class'] == 1) & \
                              (df['start_pos'] == 15) & \
                              ((df['property'] == 1) | (df['property'] == 2)) & \
                              (df['width'] == 1)

    # extract skill notes, there should be six skill notes
    df_fever_indicator = df[fever_indicator_indices]
#     assert len(df_fever_indicator) == 2, len(df_fever_indicator)
    if len(df_fever_indicator) != 2:
        print(f'Warning: the number of fever notes of this score is {len(df_fever_indicator)}')

    # filter out fever indicator note
    df = df[~fever_indicator_indices]
    return df

In [9]:
def discard_air_notes(df):
    return df.query('note_class != 5')

In [10]:
def parse_difficulty(difficulty):
    symbols = ['easy', 'normal', 'hard', 'expert', 'master']
    for symbol in symbols:
        if difficulty.startswith(symbol):
            return symbol
    return None

In [11]:
def get_holding_notes(df_hold, df_not_hold):
    
    df_hold_start_end_pts = df_hold.query('property == 1 or property == 2')
    df_hold_diamond_pts   = df_hold.query('property == 3')
    diamond_notes = len(df_hold_diamond_pts)
    
    # to check if start points and end points of holding notes are critcal or not
    # critical lookup table
    df_not_hold_lookup = df_not_hold.query('property == 2')[['measure', 'start_pos', 'width', 'real_timestamp']]
    df_not_hold_lookup['is_critical'] = 1

    # join two dataframes
    df_hold_start_end_pts = df_hold_start_end_pts.merge(
        df_not_hold_lookup,
        left_on=['measure', 'real_timestamp', 'start_pos', 'width'],
        right_on=['measure', 'real_timestamp', 'start_pos', 'width'],
        how='left'
    ).fillna(0).sort_values(['measure', 'real_timestamp', 'hold_id', 'property'])

    # calculate holding_combo and critical_holding
    tmp_holding_period_start = {}
    holding_notes = 0
    critical_holding_endpoints = 0
    normal_holding_endpoints = 0
    for row_id, row in df_hold_start_end_pts.iterrows():

        # if it is start of the period, property will be 1, otherwise 2
        is_start = row['property'] == 1
        if is_start:
            # if holding period starts, record the start timestamp and is_critical
            tmp_holding_period_start[row['hold_id']] = (row['measure'], row['real_timestamp'], row['is_critical'])
        else:
            # if holding period ends, find the matched start point
            start_measure, start_timestamp, start_iscritical = tmp_holding_period_start[row['hold_id']]
            end_measure, end_timestamp, end_iscritical = row['measure'], row['real_timestamp'], row['is_critical']

            # calculate how many eighth note during period
            start_universe_timestamp = (start_measure + start_timestamp) * 8
            end_universe_timestamp = (end_measure + end_timestamp) * 8
            start_count = math.ceil(start_universe_timestamp)
            start_count += int(start_universe_timestamp == start_count)
            end_count = math.floor(end_universe_timestamp)
            end_count -= int(end_universe_timestamp == end_count)
            holding_notes += end_count - start_count + 1

            # handle the critical notes in the holding period
            if start_iscritical:
                # if start point is critical => though the end point will not be critical in the score, 
                # the game will view both start point and end point as two critical notes
                critical_holding_endpoints += 2

            elif end_iscritical:
                # if end point is critical => only end point is critical notes
                critical_holding_endpoints += 1
                normal_holding_endpoints += 1
            else:
                normal_holding_endpoints += 2

            tmp_holding_period_start[row['hold_id']] = []
    
    return {
        'diamonds': diamond_notes,
        'holding_eighth': holding_notes,
        'critical_endpoints': critical_holding_endpoints,
        'normal_endpoints': normal_holding_endpoints
    }

In [12]:
def get_not_hold_notes(df_hold, df_not_hold):
    # to check if basic notes have appeared in holding endpoints or not
    df_hold_lookup = df_hold[['measure', 'start_pos', 'width', 'real_timestamp']]
    df_hold_lookup['is_hold'] = 1

    df_real_not_hold = df_not_hold.merge(
        df_hold_lookup,
        left_on=['measure', 'real_timestamp', 'start_pos', 'width'],
        right_on=['measure', 'real_timestamp', 'start_pos', 'width'],
        how='left'
    ).fillna(0).query('is_hold != 1').sort_values(['measure', 'real_timestamp', 'start_pos', 'width'])

    critical_basic_notes = len(df_real_not_hold.query('property == 2'))
    normal_basic_notes = len(df_real_not_hold.query('property == 1'))
    return {
        'critical_basic_notes': critical_basic_notes,
        'normal_basic_notes': normal_basic_notes
    }

In [13]:
note_nums = []
for filename in filenames:
    # parse song_id and difficulty from filename
    subfolder, difficulty = os.path.split(filename)
    subfolder, song_id = os.path.split(subfolder)
    
    if not re.match('[0-9]{4}_[0-9]{2}', song_id):
        continue
    song_id = int(song_id[:-3])
    difficulty = parse_difficulty(difficulty)
    if not difficulty:
        continue
    print(f'Now Calculating song {song_id:04} : {difficulty}')
    
    # parse notes from file
    notes = filename_to_notes(filename)
    df = pd.DataFrame(notes)

    # preprocessing
    df = preprocessing(df)
    df = discard_skill_notes(df)
    df = discard_fever_indicator_notes(df)
    df = discard_air_notes(df)

    # separate hold part and not holding part
    df_hold = df.query('note_class == 3')
    df_not_hold = df.query('note_class == 1')

    # calculate separately
    note_num = {'song_id': song_id, 'difficulty': difficulty}
    holding_notes_dict = get_holding_notes(df_hold, df_not_hold)
    not_hold_notes_dict = get_not_hold_notes(df_hold, df_not_hold)
    total_notes = sum(holding_notes_dict.values()) + sum(not_hold_notes_dict.values())
    
    note_num.update(holding_notes_dict)
    note_num.update(not_hold_notes_dict)
    note_num['total_notes'] = total_notes
    note_nums.append(note_num)

note_nums = pd.DataFrame(note_nums)

Now Calculating song 0026 : easy
Now Calculating song 0026 : expert
Now Calculating song 0026 : hard


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_hold_lookup['is_hold'] = 1


Now Calculating song 0026 : master
Now Calculating song 0026 : normal
Now Calculating song 0049 : easy
Now Calculating song 0049 : expert
Now Calculating song 0049 : hard
Now Calculating song 0049 : master
Now Calculating song 0049 : normal
Now Calculating song 0066 : easy
Now Calculating song 0066 : expert
Now Calculating song 0066 : hard
Now Calculating song 0066 : master
Now Calculating song 0066 : normal


In [14]:
note_nums = note_nums.sort_values(['song_id', 'total_notes']).reset_index(drop=True)
note_nums.to_csv('note_nums_detailed.csv', index=False)
note_nums

Unnamed: 0,song_id,difficulty,diamonds,holding_eighth,critical_endpoints,normal_endpoints,critical_basic_notes,normal_basic_notes,total_notes
0,26,easy,0,70,0,20,10,48,148
1,26,normal,16,113,0,62,26,154,371
2,26,hard,47,134,10,66,35,228,520
3,26,expert,102,153,26,166,44,334,825
4,26,master,108,105,16,246,49,428,952
5,49,easy,0,193,0,54,2,127,376
6,49,normal,25,255,0,132,6,333,751
7,49,hard,100,246,1,115,45,609,1116
8,49,expert,41,284,9,249,45,874,1502
9,49,master,9,203,10,264,55,1125,1666


In [15]:
note_multiple_nums = note_nums.copy()
note_multiple_nums['critical'] = note_multiple_nums['critical_endpoints'] + note_multiple_nums['critical_basic_notes']
note_multiple_nums['normal'] = note_multiple_nums['normal_endpoints'] + note_multiple_nums['normal_basic_notes']
note_multiple_nums['holding'] = note_multiple_nums['holding_eighth'] + note_multiple_nums['diamonds']
note_multiple_nums = note_multiple_nums.drop(columns=['diamonds', 'holding_eighth', 'critical_endpoints', 'normal_endpoints', 'critical_basic_notes', 'normal_basic_notes'])
note_multiple_nums.to_csv('note_nums_aggregated.csv', index=False)
note_multiple_nums

Unnamed: 0,song_id,difficulty,total_notes,critical,normal,holding
0,26,easy,148,10,68,70
1,26,normal,371,26,216,129
2,26,hard,520,45,294,181
3,26,expert,825,70,500,255
4,26,master,952,65,674,213
5,49,easy,376,2,181,193
6,49,normal,751,6,465,280
7,49,hard,1116,46,724,346
8,49,expert,1502,54,1123,325
9,49,master,1666,65,1389,212


In [16]:
correct_max_combo = requests.get('https://raw.githubusercontent.com/Sekai-World/sekai-master-db-diff/master/musicDifficulties.json')
correct_max_combo = pd.DataFrame(json.loads(correct_max_combo.text))
correct_max_combo = correct_max_combo[['musicId', 'musicDifficulty', 'noteCount', 'playLevel']]
# correct_max_combo

In [17]:
check_correctness = note_nums.merge(correct_max_combo, left_on=['song_id', 'difficulty'], right_on=['musicId', 'musicDifficulty'], how='left')
check_correctness = check_correctness.drop(columns=['musicId', 'musicDifficulty'])
# check_correctness

In [18]:
check_correctness.query('total_notes != noteCount')

Unnamed: 0,song_id,difficulty,diamonds,holding_eighth,critical_endpoints,normal_endpoints,critical_basic_notes,normal_basic_notes,total_notes,noteCount,playLevel


In [42]:
score_per_note = pd.Series([3444, 1152, 794, 519, 428, 1617, 670, 407, 285])

In [43]:
guess = note_multiple_nums.copy()
guess['weight'] = guess['critical'] * 2 + guess['normal'] * 1 + guess['holding'] * 0.1
guess = guess.head(9)
guess['score_per_note'] = score_per_note
guess['party_power'] = 80989
guess['multiplication'] = guess['weight'] * guess['score_per_note'] / guess['party_power']
guess

Unnamed: 0,song_id,difficulty,total_notes,critical,normal,holding,weight,score_per_note,party_power,multiplication
0,26,easy,148,10,68,70,95.0,3444,80989,4.039808
1,26,normal,371,26,216,129,280.9,1152,80989,3.995565
2,26,hard,520,45,294,181,402.1,794,80989,3.942108
3,26,expert,825,70,500,255,665.5,519,80989,4.264709
4,26,master,952,65,674,213,825.3,428,80989,4.361437
5,49,easy,376,2,181,193,204.3,1617,80989,4.078987
6,49,normal,751,6,465,280,505.0,670,80989,4.177728
7,49,hard,1116,46,724,346,850.6,407,80989,4.274583
8,49,expert,1502,54,1123,325,1263.5,285,80989,4.446252


In [41]:
note_multiple_nums

Unnamed: 0,song_id,difficulty,total_notes,critical,normal,holding
0,26,easy,148,10,68,70
1,26,normal,371,26,216,129
2,26,hard,520,45,294,181
3,26,expert,825,70,500,255
4,26,master,952,65,674,213
5,49,easy,376,2,181,193
6,49,normal,751,6,465,280
7,49,hard,1116,46,724,346
8,49,expert,1502,54,1123,325
9,49,master,1666,65,1389,212
