# Yearly

In [None]:
import pandas as pd
import requests
import json

# Load the unique genre list from the CSV file using raw string
genre_df = pd.read_csv(r'[KR] Unique Genre Counts.csv')

# Assuming the genre list is in a column named 'Genre'
unique_genres = genre_df['Genre'].tolist()
genre_list_str = ', '.join(unique_genres)
print(genre_list_str)

In [None]:
import requests
import json
import pandas as pd
from openai import OpenAI

# Function to prepare the zero-shot payload
def prepare_zero_shot_payload(lyrics, unique_genres):
    genre_list_str = ', '.join(unique_genres)
    
    prompt = (
        f"Here is a list of unique music genres: [{genre_list_str}].\n\n"
        f"Say nothing but the Genre as Genre: {{the output}}\n\n"
        f"Output example: Genre: [발라드, 댄스, 랩/힙합]\n\n"
        f"Lyrics: '{lyrics}'\n\n"
        f"Genre:"
    )
    
    messages = [{"role": "user", "content": prompt}]
    
    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 40,
        "temperature": 0
    }
    
    return payload


def prepare_cot_payload(lyrics, unique_genres):
    genre_list_str = ', '.join(unique_genres)
    
    prompt = (
        f"Here is a list of unique music genres: [{genre_list_str}].\n\n"
        f"Based on the lyrics provided, identify the genres.\n\n"
        f"Say nothing but the Genre as Genre: {{the output}}\n\n"
        f"Output example: Genre: [발라드, 댄스, 랩/힙합]\n\n"
        f"Lyrics: '{lyrics}'\n\n"
        f"Genre:"
    )
    
    messages = [{"role": "user", "content": prompt}]
    
    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 40,
        "temperature": 0
    }
    
    return payload


def prepare_cot_few_shot_payload(lyrics, unique_genres):
    genre_list_str = ', '.join(unique_genres)
    
    example_lyrics = (
        "그치지 않기를 바랬죠\n"
        "처음 그대 내게로 오던 그날에\n"
        "잠시 동안 적시는\n"
        "그런 비가 아니길\n"
        "간절히 난 바래왔었죠\n"
        "그대도 내 맘 아나요\n"
        "매일 그대만 그려왔던 나를\n"
        "오늘도 내 맘에 스며들죠\n"
        "그대는 선물입니다\n"
        "하늘이 내려준\n"
        "홀로 선 세상 속에\n"
        "그댈 지켜줄게요\n"
        "어느 날 문득\n"
        "소나기처럼\n"
        "내린 그대지만\n"
        "오늘도 불러 봅니다\n"
        "내겐 소중한 사람\n"
        "Oh\n"
        "떨어지는 빗물이\n"
        "어느새 날 깨우고\n"
        "그대 생각에 잠겨요\n"
        "이제는 내게로 와요\n"
        "언제나처럼 기다리고 있죠\n"
        "그대 손을 꼭 잡아줄게요\n"
    )

    example_genres = "발라드, 국내드라마"
    
    prompt = (
        f"Here is a list of unique music genres: [{genre_list_str}].\n\n"
        
        f"Example:\n\n"
        f"Lyrics: '{example_lyrics}'\n\n"
        f"Genre: {example_genres}\n\n"

        f"Now, based on the provided lyrics, identify the genres.\n\n"
        f"Say nothing but the Genre as Genre: {{the output}}\n\n"
        f"Output example: Genre: [pop, r&b, hip hop]\n\n"
        f"Lyrics: '{lyrics}'\n\n"
        f"Genre:"
    )
    
    messages = [{"role": "user", "content": prompt}]
    
    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 40,
        "temperature": 0
    }
    
    return payload

In [None]:
api_key = #“Enter your own API code”
client = OpenAI(api_key=api_key)

In [None]:
df = pd.read_csv('[1990-2023 Lyrics, Genre, Description] Melon.csv')

In [None]:
# Prepare the tasks for each method
tasks_zero_shot = []
tasks_cot = []
tasks_cot_few_shot = []

for i, row in df.iterrows():
    lyrics = row['Lyrics']
    
    # Zero-shot task
    payload_zero_shot = prepare_zero_shot_payload(lyrics, unique_genres)
    task_zero_shot = {
        "custom_id": f"zero_shot_{i}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": payload_zero_shot
    }
    tasks_zero_shot.append(task_zero_shot)
    
    # Chain-of-thought task
    payload_cot = prepare_cot_payload(lyrics, unique_genres)
    task_cot = {
        "custom_id": f"cot_{i}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": payload_cot
    }
    tasks_cot.append(task_cot)
    
    # Chain-of-thought few-shot task
    payload_cot_few_shot = prepare_cot_few_shot_payload(lyrics, unique_genres)
    task_cot_few_shot = {
        "custom_id": f"cot_few_shot_{i}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": payload_cot_few_shot
    }
    tasks_cot_few_shot.append(task_cot_few_shot)

In [None]:
# Save tasks to JSONL files
file_name_zero_shot = "batch_zero_shot_KR_Genre.jsonl"
file_name_cot = "batch_cot_KR_Genre.jsonl"
file_name_cot_few_shot = "batch_cot_few_shot_KR_Genre.jsonl"

def save_tasks_to_file(file_name, tasks):
    with open(file_name, 'w') as file:
        for task in tasks:
            file.write(json.dumps(task) + '\n')

save_tasks_to_file(file_name_zero_shot, tasks_zero_shot)
save_tasks_to_file(file_name_cot, tasks_cot)
save_tasks_to_file(file_name_cot_few_shot, tasks_cot_few_shot)

In [None]:
# Upload the files
batch_file_zero_shot = client.files.create(file=open(file_name_zero_shot, "rb"), purpose="batch")
batch_file_cot = client.files.create(file=open(file_name_cot, "rb"), purpose="batch")
batch_file_cot_few_shot = client.files.create(file=open(file_name_cot_few_shot, "rb"), purpose="batch")

In [None]:
# Checking the status of the batch jobs
batch_job_zero_shot_status = client.batches.retrieve(batch_job_zero_shot.id)
batch_job_cot_status = client.batches.retrieve(batch_job_cot.id)
batch_job_cot_few_shot_status = client.batches.retrieve(batch_job_cot_few_shot.id)

print("Zero-shot batch job status:", batch_job_zero_shot_status)
print("Chain-of-thought batch job status:", batch_job_cot_status)
print("Chain-of-thought few-shot batch job status:", batch_job_cot_few_shot_status)

### Evaluation

In [None]:
from openai import OpenAI

api_key = #“Enter your own API code”
client = OpenAI(api_key=api_key)

In [None]:
# zero-shot: Description

zero_shot_output_file_id = 'file-s7hhvUVKMhRYvXjCVkvfyBAF'

result_zeroshot = client.files.content(zero_shot_output_file_id).content
result_zeroshot_file_name = "batch_job_zeroshot_KR_Genre_results.jsonl"

with open(result_zeroshot_file_name, "w") as file:
    file.write(result_zeroshot.decode('utf-8'))
    
results_zeroshot = []
with open(result_zeroshot_file_name, 'r') as file:
    for line in file:
        json_obj = json.loads(line.strip())
        results_zeroshot.append(json_obj)

In [None]:
# cot: Description

cot_output_file_id = 'file-uzt2XcmAO9ZEyr30GV6rkYfe'

result_cot = client.files.content(cot_output_file_id).content
result_cot_file_name = "batch_job_cot_KR_Genre_results.jsonl"

with open(result_cot_file_name, "w") as file:
    file.write(result_cot.decode('utf-8'))
    
results_cot = []
with open(result_cot_file_name, 'r') as file:
    for line in file:
        json_obj = json.loads(line.strip())
        results_cot.append(json_obj)

In [None]:
# cot+few-shot: Description

cot_few_shot_output_file_id = 'file-K2JQzYzRD6u9rAiuaOdBvzO1'

result_cot_few_shot = client.files.content(cot_few_shot_output_file_id).content
result_cot_few_shot_file_name = "batch_job_cot_fewshot_KR_Genre_results.jsonl"

with open(result_cot_few_shot_file_name, "w") as file:
    file.write(result_cot_few_shot.decode('utf-8'))
    
results_cot_few_shot = []
with open(result_cot_few_shot_file_name, 'r') as file:
    for line in file:
        json_obj = json.loads(line.strip())
        results_cot_few_shot.append(json_obj)

In [None]:
# Load the JSONL Files and Create DataFrames

import json
import pandas as pd

# Load the JSONL files
files = {
    "zero_shot": "batch_job_zeroshot_KR_Genre_results.jsonl",
    "cot": "batch_job_cot_KR_Genre_results.jsonl",
    "cot_few_shot": "batch_job_cot_fewshot_KR_Genre_results.jsonl"
}

# Function to read JSONL file and return DataFrame
def read_jsonl(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            line = line.strip()
            if line:  # Check if the line is not empty
                try:
                    json_obj = json.loads(line)
                    # Extract the genre from the JSON object
                    genre = json_obj['response']['body']['choices'][0]['message']['content'].replace("Genre: ", "").strip()
                    # Append the genre to the data list
                    data.append({'genre': genre})
                except json.JSONDecodeError as e:
                    print(f"Error decoding JSON on line: {line}")
                    print(f"Error message: {str(e)}")
                except KeyError as e:
                    print(f"Missing key in JSON on line: {line}")
                    print(f"Error message: {str(e)}")
    return pd.DataFrame(data)

# Load the data into DataFrames
dfs = {key: read_jsonl(path) for key, path in files.items()}

# Align DataFrames by index (assuming they have the same number of rows)
aligned_df = pd.concat(dfs.values(), axis=1, keys=dfs.keys())

# Flatten the multi-level column index
aligned_df.columns = ['_'.join(col).strip() for col in aligned_df.columns.values]

# Load the ground truth
ground_truth_df = pd.read_csv("[1990-2023 Lyrics, Genre, Description] Melon.csv")

# Merge the ground truth with the predictions
merged_df = pd.concat([ground_truth_df, aligned_df], axis=1)

# Verify the column names in the merged DataFrame
print(merged_df.columns.tolist())

In [None]:
# Compare Predictions with Ground Truth

# Function to clean and split genre strings
def clean_and_split_genres(genre_str):
    genre_str = genre_str.replace('[', '').replace(']', '').replace("'", "")
    return set(genre_str.split(', '))

# Function to calculate overlap ratio
def calculate_overlap_ratio(predicted_genres, true_genres):
    predicted_set = clean_and_split_genres(predicted_genres)
    true_set = clean_and_split_genres(true_genres)
    intersection = predicted_set.intersection(true_set)
    return len(intersection) / len(true_set) if true_set else 0

# Function to calculate exact match accuracy
def calculate_exact_match(predicted_genres, true_genres):
    predicted_set = clean_and_split_genres(predicted_genres)
    true_set = clean_and_split_genres(true_genres)
    return 1 if not predicted_set.isdisjoint(true_set) else 0

In [None]:
# Merge the ground truth with the predictions
merged_df = pd.concat([ground_truth_df, aligned_df], axis=1)

# Initialize results list
results_summary = []

# Initialize totals for averaging
total_metrics = {
    'zero_shot_overlap_ratio': 0,
    'zero_shot_exact_match': 0,
    'cot_overlap_ratio': 0,
    'cot_exact_match': 0,
    'cot_few_shot_overlap_ratio': 0,
    'cot_few_shot_exact_match': 0,
    'total_rows': 0
}

# Iterate through the merged dataframe and calculate accuracy
for _, row in merged_df.iterrows():
    true_genre = row['Genre']  # Assuming the ground truth genre column is named 'Genre'
    
    # Zero-shot
    predicted_genre = row['zero_shot_genre']
    total_metrics['zero_shot_overlap_ratio'] += calculate_overlap_ratio(predicted_genre, true_genre)
    total_metrics['zero_shot_exact_match'] += calculate_exact_match(predicted_genre, true_genre)
    
    # Chain-of-thought
    predicted_genre = row['cot_genre']
    total_metrics['cot_overlap_ratio'] += calculate_overlap_ratio(predicted_genre, true_genre)
    total_metrics['cot_exact_match'] += calculate_exact_match(predicted_genre, true_genre)
    
    # Chain-of-thought few-shot
    predicted_genre = row['cot_few_shot_genre']
    total_metrics['cot_few_shot_overlap_ratio'] += calculate_overlap_ratio(predicted_genre, true_genre)
    total_metrics['cot_few_shot_exact_match'] += calculate_exact_match(predicted_genre, true_genre)
    
    total_metrics['total_rows'] += 1

# Calculate final averages
total_average_metrics = {key: value / total_metrics['total_rows'] for key, value in total_metrics.items() if key != 'total_rows'}

# Append final summary row
results_summary.append({
    'Zero-shot Overlap Ratio': total_average_metrics['zero_shot_overlap_ratio'],
    'Zero-shot Exact Match': total_average_metrics['zero_shot_exact_match'],
    'CoT Overlap Ratio': total_average_metrics['cot_overlap_ratio'],
    'CoT Exact Match': total_average_metrics['cot_exact_match'],
    'CoT Few-shot Overlap Ratio': total_average_metrics['cot_few_shot_overlap_ratio'],
    'CoT Few-shot Exact Match': total_average_metrics['cot_few_shot_exact_match']
})

# Create summary DataFrame
results_summary_df = pd.DataFrame(results_summary)

# Save the results to a new CSV file
results_summary_df.to_csv('Melon_Genre_GPT_4o_experiment_results_2024.csv', index=False)

print(results_summary_df)

# Monthly

In [None]:
import pandas as pd
import requests
import json

# Load the unique genre list from the CSV file using raw string
genre_df = pd.read_csv(r'[KR] Unique Genre Counts.csv')

# Assuming the genre list is in a column named 'Genre'
unique_genres = genre_df['Genre'].tolist()
genre_list_str = ', '.join(unique_genres)
print(genre_list_str)

In [None]:
import requests
import json
import pandas as pd

# Function to prepare the zero-shot payload
def prepare_zero_shot_payload(lyrics, unique_genres):
    genre_list_str = ', '.join(unique_genres)
    
    prompt = (
        f"Here is a list of unique music genres: [{genre_list_str}].\n\n"
        f"Say nothing but the Genre as Genre: {{the output}}\n\n"
        f"Output example: Genre: [발라드, 댄스, 랩/힙합]\n\n"
        f"Lyrics: '{lyrics}'\n\n"
        f"Genre:"
    )
    
    messages = [{"role": "user", "content": prompt}]
    
    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 40,
        "temperature": 0
    }
    
    return payload


def prepare_cot_payload(lyrics, unique_genres):
    genre_list_str = ', '.join(unique_genres)
    
    prompt = (
        f"Here is a list of unique music genres: [{genre_list_str}].\n\n"
        f"Based on the lyrics provided, identify the genres.\n\n"
        f"Say nothing but the Genre as Genre: {{the output}}\n\n"
        f"Output example: Genre: [발라드, 댄스, 랩/힙합]\n\n"
        f"Lyrics: '{lyrics}'\n\n"
        f"Genre:"
    )
    
    messages = [{"role": "user", "content": prompt}]
    
    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 40,
        "temperature": 0
    }
    
    return payload


def prepare_cot_few_shot_payload(lyrics, unique_genres):
    genre_list_str = ', '.join(unique_genres)
    
    example_lyrics = (
        "그치지 않기를 바랬죠\n"
        "처음 그대 내게로 오던 그날에\n"
        "잠시 동안 적시는\n"
        "그런 비가 아니길\n"
        "간절히 난 바래왔었죠\n"
        "그대도 내 맘 아나요\n"
        "매일 그대만 그려왔던 나를\n"
        "오늘도 내 맘에 스며들죠\n"
        "그대는 선물입니다\n"
        "하늘이 내려준\n"
        "홀로 선 세상 속에\n"
        "그댈 지켜줄게요\n"
        "어느 날 문득\n"
        "소나기처럼\n"
        "내린 그대지만\n"
        "오늘도 불러 봅니다\n"
        "내겐 소중한 사람\n"
        "Oh\n"
        "떨어지는 빗물이\n"
        "어느새 날 깨우고\n"
        "그대 생각에 잠겨요\n"
        "이제는 내게로 와요\n"
        "언제나처럼 기다리고 있죠\n"
        "그대 손을 꼭 잡아줄게요\n"
    )

    example_genres = "발라드, 국내드라마"
    
    prompt = (
        f"Here is a list of unique music genres: [{genre_list_str}].\n\n"
        
        f"Example:\n\n"
        f"Lyrics: '{example_lyrics}'\n\n"
        f"Genre: {example_genres}\n\n"

        f"Now, based on the provided lyrics, identify the genres.\n\n"
        f"Say nothing but the Genre as Genre: {{the output}}\n\n"
        f"Output example: Genre: [pop, r&b, hip hop]\n\n"
        f"Lyrics: '{lyrics}'\n\n"
        f"Genre:"
    )
    
    messages = [{"role": "user", "content": prompt}]
    
    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 40,
        "temperature": 0
    }
    
    return payload

In [None]:
api_key = #“Enter your own API code”

def make_request(payload, api_key):
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    response_data = response.json()
    return response_data

# Load the CSV file
df = pd.read_csv('melon_chart_combined_2024.csv')

# Prepare the results dataframe
results = {
    'Title': [],
    'Artist': [],
    'zero_shot_Genre': [],
    'cot_Genre': [],
    'cot_few_shot_Genre': [],
}

result_keys = ['Title', 'Artist', 'zero_shot_Genre', 'cot_Genre','cot_few_shot_Genre']

# Iterate over each row in the dataframe
count = 0
for index, row in df.iterrows():
    count += 1
    if count % 100 == 0:
        url = f'2024 Lyrics, Genre Melon Results {count}.csv'
        results_df = pd.DataFrame(results)
        results_df.to_csv(url, index=False)
        print(f"Results saved to '{url}'")
        for keys in result_keys:
            results[keys].clear()
    
    title = row['Title']
    artist = row['Artist']
    lyrics = row['Lyrics']

    # Zero-shot
    payload = prepare_zero_shot_payload(lyrics, unique_genres)
    response = make_request(payload, api_key)
    zero_shot_response = response['choices'][0]['message']['content'].strip()
    
    # Extract genre and description
    zero_shot_genre = zero_shot_response.split("Description:")[0].replace("Genres:", "").replace("Genre:", "").strip()
    
    # Chain-of-thought
    payload = prepare_cot_payload(lyrics, unique_genres)
    response = make_request(payload, api_key)
    cot_response = response['choices'][0]['message']['content'].strip()
    
    # Extract genre and description
    cot_genre = cot_response.split("Description:")[0].replace("Genres:", "").replace("Genre:", "").strip()
    
    # Chain-of-thought few-shot
    payload = prepare_cot_few_shot_payload(lyrics, unique_genres)
    response = make_request(payload, api_key)
    cot_few_shot_response = response['choices'][0]['message']['content'].strip()
    
    # Extract genre and description
    cot_few_shot_genre = cot_few_shot_response.split("Description:")[0].replace("Genres:", "").replace("Genre:", "").strip()

    # Save the results
    results['Title'].append(title)
    results['Artist'].append(artist)
    results['zero_shot_Genre'].append(zero_shot_genre)
    results['cot_Genre'].append(cot_genre)
    results['cot_few_shot_Genre'].append(cot_few_shot_genre)

# Convert the results to a dataframe
results_df = pd.DataFrame(results)

print(results)

# Save the results to a new CSV file
results_df.to_csv('2024 Lyrics, Genre Melon Results 303.csv', index=False)

### Evaluation

In [None]:
import pandas as pd
import glob
import re

# Function to clean and split genre strings
def clean_and_split_genres(genre_str):
    genre_str = genre_str.replace('[', '').replace(']', '').replace("'", "")
    return set(genre_str.split(', '))

# Function to calculate overlap ratio
def calculate_overlap_ratio(predicted_genres, true_genres):
    predicted_set = clean_and_split_genres(predicted_genres)
    true_set = clean_and_split_genres(true_genres)
    intersection = predicted_set.intersection(true_set)
    return len(intersection) / len(true_set) if true_set else 0

# Function to calculate exact match accuracy
def calculate_exact_match(predicted_genres, true_genres):
    predicted_set = clean_and_split_genres(predicted_genres)
    true_set = clean_and_split_genres(true_genres)
    return 1 if not predicted_set.isdisjoint(true_set) else 0

# Load the ground truth descriptions
ground_truth_df = pd.read_csv('melon_chart_combined_2024.csv')

# Initialize results list
results_summary = []

# Initialize totals for averaging
total_metrics = {
    'zero_shot_overlap_ratio': 0,
    'zero_shot_exact_match': 0,
    'cot_overlap_ratio': 0,
    'cot_exact_match': 0,
    'cot_few_shot_overlap_ratio': 0,
    'cot_few_shot_exact_match': 0,
    'total_files': 0,
    'total_rows': 0
}

# List of result files
result_files = [
    '2024 Lyrics, Genre Melon Results 100.csv',
    '2024 Lyrics, Genre Melon Results 200.csv',
    '2024 Lyrics, Genre Melon Results 300.csv',
    '2024 Lyrics, Genre Melon Results 303.csv'
]

# Iterate through the result files
for result_file in result_files:
    # Load the results
    results_df = pd.read_csv(result_file)
    
    # Merge the dataframes on Title and Artist
    merged_df = results_df.merge(ground_truth_df, on=['Title', 'Artist'])
    
    # Initialize variables to accumulate the correct predictions for this file
    file_metrics = {
        'zero_shot_overlap_ratio': 0,
        'zero_shot_exact_match': 0,
        'cot_overlap_ratio': 0,
        'cot_exact_match': 0,
        'cot_few_shot_overlap_ratio': 0,
        'cot_few_shot_exact_match': 0,
        'n': len(merged_df)
    }
    
    # Iterate through the merged dataframe and calculate accuracy
    for i, row in merged_df.iterrows():
        true_genre = row['Genre']  # Assuming the ground truth genre column is named 'Genre'
        
        # Zero-shot
        predicted_genre = row['zero_shot_Genre']
        file_metrics['zero_shot_overlap_ratio'] += calculate_overlap_ratio(predicted_genre, true_genre)
        file_metrics['zero_shot_exact_match'] += calculate_exact_match(predicted_genre, true_genre)
        
        # Chain-of-thought
        predicted_genre = row['cot_Genre']
        file_metrics['cot_overlap_ratio'] += calculate_overlap_ratio(predicted_genre, true_genre)
        file_metrics['cot_exact_match'] += calculate_exact_match(predicted_genre, true_genre)
        
        # Chain-of-thought few-shot
        predicted_genre = row['cot_few_shot_Genre']
        file_metrics['cot_few_shot_overlap_ratio'] += calculate_overlap_ratio(predicted_genre, true_genre)
        file_metrics['cot_few_shot_exact_match'] += calculate_exact_match(predicted_genre, true_genre)
    
    # Calculate average overlap ratio as accuracy for this file
    for key in file_metrics:
        if key != 'n':
            file_metrics[key] /= file_metrics['n'] if file_metrics['n'] > 0 else 1
    
    # Extract file count from the filename using regex
    match = re.search(r'(\d+)_prompt\(5\)', result_file)
    file_count = int(match.group(1)) if match else None
    
    # Append results to summary
    results_summary.append({
        'File Count': file_count,
        'Zero-shot Overlap Ratio': file_metrics['zero_shot_overlap_ratio'],
        'Zero-shot Exact Match': file_metrics['zero_shot_exact_match'],
        'CoT Overlap Ratio': file_metrics['cot_overlap_ratio'],
        'CoT Exact Match': file_metrics['cot_exact_match'],
        'CoT Few-shot Overlap Ratio': file_metrics['cot_few_shot_overlap_ratio'],
        'CoT Few-shot Exact Match': file_metrics['cot_few_shot_exact_match']
    })
    
    # Accumulate totals for final summary
    for key in total_metrics:
        if key in file_metrics:
            total_metrics[key] += file_metrics[key] * file_metrics['n']  # Sum the ratios back as counts
        if key == 'total_rows':
            total_metrics[key] += file_metrics['n']
    
    total_metrics['total_files'] += 1

# Calculate final averages
total_average_metrics = {}
for key in total_metrics:
    if key.startswith('total'):
        continue
    total_average_metrics[key] = total_metrics[key] / total_metrics['total_rows'] if total_metrics['total_rows'] > 0 else 0

# Append final summary row
results_summary.append({
    'File Count': 'Total Average',
    'Zero-shot Overlap Ratio': total_average_metrics['zero_shot_overlap_ratio'],
    'Zero-shot Exact Match': total_average_metrics['zero_shot_exact_match'],
    'CoT Overlap Ratio': total_average_metrics['cot_overlap_ratio'],
    'CoT Exact Match': total_average_metrics['cot_exact_match'],
    'CoT Few-shot Overlap Ratio': total_average_metrics['cot_few_shot_overlap_ratio'],
    'CoT Few-shot Exact Match': total_average_metrics['cot_few_shot_exact_match']
})

results_summary_df = pd.DataFrame(results_summary)

# Sort the DataFrame by the File Count, handling the 'Total Average' row separately
results_summary_df['File Count'] = results_summary_df['File Count'].apply(lambda x: float('inf') if x == 'Total Average' else x)
results_summary_df = results_summary_df.sort_values(by='File Count').reset_index(drop=True)
results_summary_df['File Count'] = results_summary_df['File Count'].apply(lambda x: 'Total Average' if x == float('inf') else x)

# Save the results to a new CSV file
results_summary_df.to_csv('Melon_Genre_Score_2024.csv', index=False)

print(results_summary_df)