In [82]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import openai
from openai import OpenAI
from tqdm import tqdm
import os
import time
from config import OPENAI_API_KEY

pd.set_option('future.no_silent_downcasting', True)
openai.api_key = OPENAI_API_KEY

In [30]:
bach_path = 'midi_data/bach/'
beethoven_path = 'midi_data/beethoven/'
chopin_path = 'midi_data/chopin/'
chopin_prelude_path = 'midi_data/chopin_prelude/'
mozart_path = 'midi_data/mozart/'

In [31]:
input_data = pd.read_csv(os.path.join(bach_path, 'hyunsung', 'bach_note_perfect1.csv'))
target_data = pd.read_csv(os.path.join(bach_path, 'bach_note_target.csv'))

In [32]:
input_data.head()

Unnamed: 0,sec,msg_type,channel,note,velocity,dynamic,accent,count,main_vol,depth,pedal,pan
0,0.0,['note_on'],[0],[64],[50],p,0,1,100,0,0,125
1,0.1,['note_on'],[0],[67],[57],p,0,1,0,0,0,0
2,0.2,[],[],[],[],,0,0,0,0,0,0
3,0.3,['note_on'],[0],[72],[54],p,0,1,0,0,0,0
4,0.4,[],[],[],[],,0,0,0,0,0,0


In [33]:
def filter_pedal(pedal_value):
    if isinstance(pedal_value, str):
        if pedal_value.startswith('['):
            pedal_list = eval(pedal_value)
            return 1 if any(v >= 64 for v in pedal_list) else 0
        else:
            pedal_value = int(pedal_value)
            return 1 if pedal_value >= 64 else 0
    elif isinstance(pedal_value, list):
        return 1 if any(v >= 64 for v in pedal_value) else 0
    elif isinstance(pedal_value, int):
        return 1 if pedal_value >= 64 else 0
    else:
        return 0

In [53]:
input_df = pd.DataFrame({
    "note": input_data['note'],
    "velocity": input_data['velocity'],
    "dynamic": input_data['dynamic'],
    "accent": input_data['accent'],
    "pedal": input_data['pedal']
})

target_df = pd.DataFrame({
    "note": target_data['note'],
    "velocity": target_data['velocity'],
    "dynamic": target_data['dynamic'],
    "accent": target_data['accent'],
    "pedal": target_data['pedal']
})

# dynamic preprocessing
replace_dict = {np.nan: 0, '': 0, 'ppp': 1, 'pp': 2, 'p': 3, 'mp': 4, 'mf': 5, 'f': 6, 'ff': 7, 'fff': 8}

input_df['dynamic'] = input_df['dynamic'].replace(replace_dict)
target_df['dynamic'] = target_df['dynamic'].replace(replace_dict)

# pedal preprocessing
input_df['pedal'] = input_df['pedal'].apply(filter_pedal)
target_df['pedal'] = target_df['pedal'].apply(filter_pedal)

In [55]:
input_df.head()

Unnamed: 0,note,velocity,dynamic,accent,pedal
0,[64],[50],3,0,0
1,[67],[57],3,0,0
2,[],[],0,0,0
3,[72],[54],3,0,0
4,[],[],0,0,0


In [56]:
input_df['note']

0       [64]
1       [67]
2         []
3       [72]
4         []
        ... 
2654      []
2655      []
2656      []
2657      []
2658      []
Name: note, Length: 2659, dtype: object

In [39]:
# Select Model
while True:
    GPT_model = input("Select a GPT Model\n• gpt-3.5-turbo : Fast, Moderate Quality results\n• gpt-4o : Slow, Great Quality results\n>> ").strip()
    if GPT_model != 'gpt-3.5-turbo' and GPT_model != 'gpt-4o':
        print("Type the name of the model correctly\n")
    else:
        break

client = OpenAI(api_key = openai.api_key)

# 테스트 프롬프트
test_prompt = "Hello! Can you confirm that the API is working?"

try:
    completion = client.chat.completions.create(
        model=GPT_model,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": test_prompt},
        ]
    )

    # 응답 메시지
    response_message = completion.choices[0].message.content

    print("\nAPI Response:")
    print(response_message)

except Exception as e:
    print(f"An error occurred: {e}")

Select a GPT Model
• gpt-3.5-turbo : Fast, Moderate Quality results
• gpt-4o : Slow, Great Quality results
>> gpt-3.5-turbo

API Response:
ChatCompletionMessage(content='Hello! I can certainly help you check if the API is working. Could you please provide me with more details on the API you are referring to and any specific endpoints or functionality you would like me to test?', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None)


In [45]:
response_message.content
#[0]

'Hello! I can certainly help you check if the API is working. Could you please provide me with more details on the API you are referring to and any specific endpoints or functionality you would like me to test?'

In [58]:
print("Columns in target_df:", target_df.columns)
print("Columns in input_df:", input_df.columns)

Columns in target_df: Index(['note', 'velocity', 'dynamic', 'accent', 'pedal'], dtype='object')
Columns in input_df: Index(['note', 'velocity', 'dynamic', 'accent', 'pedal'], dtype='object')


In [80]:
# Design Prompt

prompt = f"""
You are an expert system for evaluating piano performances.
Analyze the performance data against the target sheet music and provide detailed scores for each 10-second chunk (100 indices per chunk).

For each chunk, evaluate the following criteria:
1. Note Accuracy: How accurately the performed notes match the target notes, considering pitch, time alignment, and overlap.
2. Dynamic Similarity: How closely the performed dynamics resemble the target dynamics.
3. Dynamic Change Consistency: How consistently the dynamic changes in the performance align with those in the target sheet music.
4. Accent Accuracy: How accurately accents in the performance match the accents in the target sheet music.
5. Articulation Similarity: How similar the articulation (e.g., staccato, legato) in the performance is to the target sheet music.
6. Rhythm Similarity: How accurately the rhythm (note duration and timing) matches the target rhythm.
7. Pedaling Accuracy: How accurately the pedal usage aligns with the target pedal markings.

After evaluating each chunk, provide:
- A percentage score for each criterion.
- A brief explanation highlighting key observations.

Finally, calculate the overall score by averaging the chunk-level scores for each criterion, and provide a summary of the overall performance.

Additionally, based on the performance evaluation, provide personalized feedback including:
1. Strengths: Highlight aspects where the performance excelled.
2. Weaknesses: Point out areas needing improvement.
3. Suggestions for Improvement: Provide actionable advice to enhance future performances.

Input Data:
"""

# 타겟 및 퍼포먼스 데이터를 각 청크별로 프롬프트에 추가
for i, (target_chunk, performance_chunk) in enumerate(zip(target_chunks, performance_chunks), start=1):
    # 각 타겟 청크의 고유값 추출
    target_notes = target_chunk['note'].dropna().unique().tolist()
    target_dynamic = target_chunk['dynamic'].dropna().unique().tolist()
    target_accent = target_chunk['accent'].dropna().unique().tolist()
    target_pedal = target_chunk['pedal'].dropna().unique().tolist()

    # 각 퍼포먼스 청크의 고유값 추출
    performance_notes = performance_chunk['note'].dropna().unique().tolist()
    performance_dynamic = performance_chunk['dynamic'].dropna().unique().tolist()
    performance_accent = performance_chunk['accent'].dropna().unique().tolist()
    performance_pedal = performance_chunk['pedal'].dropna().unique().tolist()

    # 프롬프트에 타겟과 퍼포먼스 청크 추가
    prompt += f"""
Chunk {i}:
- Target Notes: {target_notes}
- Performance Notes: {performance_notes}
- Target Dynamic: {target_dynamic}
- Performance Dynamic: {performance_dynamic}
- Target Accent: {target_accent}
- Performance Accent: {performance_accent}
- Target Pedal: {target_pedal}
- Performance Pedal: {performance_pedal}
"""

prompt += """
Output:
Do not provide label any section as 'N/A' even if data is insufficient.
Do not include section titles, bullet points, or extra formatting like asterisks.

For each chunk (10 seconds), provide scores and explanations for:
1. Note Accuracy: (Percentage, reasoning)
2. Dynamic Similarity: (Percentage, reasoning)
3. Dynamic Change Consistency: (Percentage, reasoning)
4. Accent Accuracy: (Percentage, reasoning)
5. Articulation Similarity: (Percentage, reasoning)
6. Rhythm Similarity: (Percentage, reasoning)
7. Pedaling Accuracy: (Percentage, reasoning)

After evaluating all chunks, calculate the overall score by averaging the scores across all chunks for each criterion. Provide:
- The overall percentage scores for each criterion.

Additionally, provide personalized feedback including:
1. Strengths: What aspects of the performance were particularly well-executed?
2. Weaknesses: What aspects of the performance need improvement?
3. Suggestions for Improvement: What can the performer do to enhance their future performances? Include specific actionable steps.
"""

print(prompt)


You are an expert system for evaluating piano performances.
Analyze the performance data against the target sheet music and provide detailed scores for each 10-second chunk (100 indices per chunk).

For each chunk, evaluate the following criteria:
1. Note Accuracy: How accurately the performed notes match the target notes, considering pitch, time alignment, and overlap.
2. Dynamic Similarity: How closely the performed dynamics resemble the target dynamics.
3. Dynamic Change Consistency: How consistently the dynamic changes in the performance align with those in the target sheet music.
4. Accent Accuracy: How accurately accents in the performance match the accents in the target sheet music.
5. Articulation Similarity: How similar the articulation (e.g., staccato, legato) in the performance is to the target sheet music.
6. Rhythm Similarity: How accurately the rhythm (note duration and timing) matches the target rhythm.
7. Pedaling Accuracy: How accurately the pedal usage aligns with th

In [83]:
# Select Model
while True:
    GPT_model = input("Select a GPT Model\n• gpt-3.5-turbo : Fast, Moderate Quality results\n• gpt-4o : Slow, Great Quality results\n>> ").strip()
    if GPT_model != 'gpt-3.5-turbo' and GPT_model != 'gpt-4o':
        print("Type the name of the model correctly\n")
    else:
        break

client = OpenAI(api_key = openai.api_key)

try:
    start_time = time.time()
    
    completion = client.chat.completions.create(
        model=GPT_model,
        messages=[
            {"role": "system", "content": "You are an expert system designed to evaluate piano performances. Your role is to analyze the provided performance data against the target sheet music, calculate scores for specific criteria, and provide detailed feedback. Your feedback must be professional, constructive, and helpful for improving future performances."},
            {"role": "user", "content": prompt},
        ]
    )
    
    print("Request completed. Processing response...\n")
    
    end_time = time.time()
    
    # 응답 메시지
    response_message = completion.choices[0].message.content

    print("\nAPI Response:")
    print(response_message)

except Exception as e:
    print(f"An error occurred: {e}")

Select a GPT Model
• gpt-3.5-turbo : Fast, Moderate Quality results
• gpt-4o : Slow, Great Quality results
>> gpt-4o
Request completed. Processing response...


API Response:
Chunk 1:
Note Accuracy: 60%. The performance matched some of the target notes but missed several combinations, and there were misalignments, particularly with the intended pitches.
Dynamic Similarity: 100%. Dynamics were matched perfectly with the target.
Dynamic Change Consistency: 100%. Dynamic changes were executed as marked in the sheet music.
Accent Accuracy: 100%. Accents matched the target.
Articulation Similarity: 80%. Some notes, like those of multi-note chords, could be, clearer.
Rhythm Similarity: 70%. The timing of notes had some discrepancies, especially where notes were missing.
Pedaling Accuracy: 50%. The pedaling was reversed from the target markings.

Chunk 2:
Note Accuracy: 50%. Many target notes were missed or not performed as intended in terms of pitch and combination.
Dynamic Similarity: 60%. 

In [84]:
print(f"Response generated in {end_time - start_time:.2f} seconds.")

Response generated in 36.61 seconds.
