In [1]:
import pandas as pd
import json
import re

# Read pid-performance matching file 
pid_file = 'pidperf.csv'
pidinfo = pd.read_csv(pid_file)


In [2]:
# pid-performance table check
pidinfo

Unnamed: 0,pid,filename
0,11,MIDI-Unprocessed_02_R3_2008_01-03_ORIG_MID--AU...
1,12,MIDI-Unprocessed_09_R3_2008_01-07_ORIG_MID--AU...
2,13,MIDI-Unprocessed_12_R3_2008_01-04_ORIG_MID--AU...
3,14,MIDI-Unprocessed_R2_D2-19-21-22_mid--AUDIO-fro...
4,15,MIDI-Unprocessed_R2_D2-12-13-15_mid--AUDIO-fro...
...,...,...
63,82,MIDI-Unprocessed_R2_D2-19-21-22_mid--AUDIO-fro...
64,83,MIDI-Unprocessed_12_R3_2008_01-04_ORIG_MID--AU...
65,84,MIDI-Unprocessed_17_R3_2011_MID--AUDIO_R3-D6_0...
66,85,MIDI-Unprocessed_075_PIANO075_MID--AUDIO-split...


In [12]:
# 퍼포먼스 매칭, pid - filename

def get_filename_from_pid(pid):
    if pid in pidinfo['pid'].values:
        filename = pidinfo.loc[pidinfo['pid'] == pid, 'filename'].values[0]
        return filename
    else:
        return "No filename data"

In [4]:
#item - feature mapping
#feature: 1~19 / new_feature: item number
#back means option is opposite

feature_mapping = {
    'feature': [1, 9, 11, 2, 3, 7, 6, 8, 10, 4, 5, 12, 13, 14, 15, 16, 17, 18, 19],
    'new_feature': [11, 12, 13, 14, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
    'back': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
}

df2 = pd.DataFrame(feature_mapping)

In [5]:
def get_feature(item):
    subset = df2[df2['new_feature'] == item]

    feature_value = subset.iloc[0]['feature']
    back_value = subset.iloc[0]['back']
    return int(feature_value), int(back_value)


# 예시: item이 14인 경우
item = 22
feature, back = get_feature(item)
print(f"Item {item}의 feature 값은 {feature}, back 값은 {back}입니다.")

Item 22의 feature 값은 4, back 값은 0입니다.


In [36]:
def generate_JSON(df, outputname):

    # 수정된 annotation 중 가장 최근 annotation만 남기기 (item, pid 기준)
    indices_to_delete = set()

    # Iterate over each unique combination of 'item' and 'pid'
    for (annotation_id, pid), group in df.groupby(['annotation_id', 'pid']):
        # Iterate over each pair of rows in the group
        for i in range(len(group)):
            if len(group) == 1 : 

            else:

                for j in range(i + 1, len(group)):
                    row1 = group.iloc[i]
                    row2 = group.iloc[j]

                    # Check for overlapping range
                    if not (row1['end_measure'] < row2['start_measure'] or row1['start_measure'] > row2['end_measure']):
                        indices_to_delete.add(row1.name)

    df = df.drop(indices_to_delete)

    # JSON 구조를 생성합니다.
    output = []
    count =0

    inner_list = [0] * 5  # 예시로 [0, 0, 0]으로 초기화
    outer_list = [inner_list[:] for _ in range(20)]

    for pid in df['pid'].unique():

        pname = get_filename_from_pid(pid)

        subset2 = df[df['pid'] == pid]
        if (subset2['item'].isin(df2['new_feature'])).any():
            file_data = {"performance_id": pname, "features": []}

            # 이전 19개 label로 filter
            for item in subset2['item'].unique():
                if item in df2['new_feature'].values:

                    subset = subset2[subset2['item'] == item]
                    # check
                    #print(subset)

                    if len(subset) > 0:

                        feature, back = get_feature(item)

                        feature_data = {"feature_type": int(feature), "intervals": []}

                        for _, row in subset.iterrows():
                            # check
                            #print(row['start_measure'],row['end_measure'],row['observation'])
                            if(back==1): 
                                value = 8-int(row['observation'])
                            else:
                                value = int(row['observation'])

                            interval = {
                                "interval": [int(row['start_measure'])-1, int(row['end_measure'])-1],
                                "value": value
                            }
                            value_id = value
                            outer_list[feature][4] += int(row['end_measure']) -int(row['start_measure'])
                            outer_list[feature][3] += 1 #count
                            if value_id > 4:
                                outer_list[feature][0] += 1
                            elif value_id < 4:
                                outer_list[feature][2] += 1
                            else:
                                outer_list[feature][1] += 1

                            feature_data["intervals"].append(interval)
                        file_data["features"].append(feature_data)

            output.append(file_data)
            count += 1


    # JSON 객체를 생성합니다.
    json_data = {"output": output}
    # JSON 파일로 저장합니다.
    with open(outputname, 'w') as json_file:
        json.dump(json_data, json_file, indent=4)

    # 최종 결과만 출력
    print("JSON 파일이 성공적으로 생성되었습니다.")
    print("feature별 annotation 통계 (index 1부터) - high/neutral/low/count/measure")
    print(outer_list)


IndentationError: expected an indented block (<ipython-input-36-5a0b2cff8ad3>, line 12)

In [37]:

# CSV 파일을 읽어옵니다.
csv_file = 'beethoven2.csv'
df = pd.read_csv(csv_file)



In [38]:
# Table 확인
df

Unnamed: 0,annotation_item_id,annotation_id,item,level,pid,suggestion,observation,start_measure,end_measure
0,11131,357,14,2,61,5,3,22,24
1,11163,358,14,2,61,6,2,14,16
2,11165,358,16,2,61,2,6,14,16
3,11223,360,10,2,61,5,3,57,72
4,11256,361,11,2,61,2,5,145,152
...,...,...,...,...,...,...,...,...,...
332,16666,530,13,2,85,6,7,215,224
333,16677,530,24,2,85,5,3,215,224
334,16679,530,26,2,85,5,3,215,224
335,16680,530,27,2,85,6,7,215,224


In [39]:
# 퍼포먼스 개수
len(sorted(df['pid'].unique()))

24

In [40]:
generate_JSON(df, 'beethoven2.json')

357
360
366
367
370
373
374
377
381
386
387
388
394
403
422
423
429
432
433
435
436
459
477
479
493
498
499
500
507
508
511
515
519
     annotation_item_id  annotation_id  item  level  pid  suggestion  \
0                 11131            357    14      2   61           5   
2                 11165            358    16      2   61           2   
3                 11223            360    10      2   61           5   
6                 11277            361    32      2   61           5   
8                 11303            362    26      2   61           5   
..                  ...            ...   ...    ...  ...         ...   
324               16536            526    11      2   85           5   
326               16584            527    27      2   85           4   
328               16613            528    24      2   85           5   
331               16646            529    25      2   85           5   
336               16685            530    32      2   85           5   

   