# Billboard - Genre

## Yearly

In [None]:
# Load the unique genre list from the CSV file using raw string
genre_df = pd.read_csv(r'[EN] Unique Genre Counts.csv')

# Assuming the genre list is in a column named 'Genre'
unique_genres = genre_df['Genre'].tolist()
genre_list_str = ', '.join(unique_genres)
print(genre_list_str)

In [None]:
import json
import pandas as pd
import requests
from openai import OpenAI

# Function to prepare the zero-shot payload
def prepare_zero_shot_payload(lyrics, unique_genres):
    genre_list_str = ', '.join(unique_genres)
    
    prompt = (
        f"Here is a list of unique music genres: [{genre_list_str}].\n\n"
        f"Say nothing but the Genre as Genre: {{the output}}\n\n"
        f"Output example: Genre: [pop, r&b, hip hop]\n\n"
        f"Lyrics: '{lyrics}'\n\n"
        f"Genres: "
    )
    
    messages = [{"role": "user", "content": prompt}]
    
    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 40,
        "temperature": 0
    }
    
    return payload

# Function to prepare the chain-of-thought prompt payload
def prepare_cot_payload(lyrics, unique_genres):
    genre_list_str = ', '.join(unique_genres)
    
    prompt = (
        f"Here is a list of unique music genres: [{genre_list_str}].\n\n"
        f"Based on the lyrics provided, identify the genres.\n\n"
        f"Say nothing but the Genre as Genre: {{the output}}\n\n"
        f"Output example: Genre: [pop, r&b, hip hop]\n\n"
        f"Lyrics: '{lyrics}'\n\n"
        f"Genre:"
    )
    messages = [{"role": "user", "content": prompt}]
    
    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 40,
        "temperature": 0
    }
    
    return payload

# Function to prepare the chain-of-thought few-shot prompt payload
def prepare_cot_few_shot_payload(lyrics, unique_genres):
    genre_list_str = ', '.join(unique_genres)
    
    example_lyrics = (
        "And she spoke words that would melt in your hands\n"
        "And she spoke words of wisdom\n"
        "To the basement, people, to the basement\n"
        "Many surprises await you\n"
        "In the basement, people, in the basement\n"
        "\n"
        "You hid there last time, you know we're gonna find you\n"
        "Sick in the car seat, 'cause you're not up to going\n"
        "Out on the main streets, completing your mission\n"
        "You hid there last time, you know we're gonna find you\n"
        "Sick in the car seat, 'cause you're not up to going\n"
        "Out on the main streets, completing your mission\n"
    )

    example_genres = "indie pop"

    prompt = (
        f"Here is a list of unique music genres: [{genre_list_str}].\n\n"
        
        f"Example:\n\n"
        f"Lyrics: '{example_lyrics}'\n\n"
        f"Genre: {example_genres}\n\n"

        f"Now, based on the provided lyrics, identify the genres.\n\n"
        f"Say nothing but the Genre as Genre: {{the output}}\n\n"
        f"Output example: Genre: [pop, r&b, hip hop]\n\n"
        f"Lyrics: '{lyrics}'\n\n"
        f"Genre:"
    )
    messages = [{"role": "user", "content": prompt}]
    
    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 40,
        "temperature": 0
    }
    
    return payload

In [None]:
api_key = #“Enter your own API code”
client = OpenAI(api_key=api_key)

In [None]:
df = pd.read_csv('Billboard_yearly_filtered.csv')

In [None]:
# Prepare the tasks for each method
tasks_zero_shot = []
tasks_cot = []
tasks_cot_few_shot = []

for i, row in df.iterrows():
    lyrics = row['lyrics']
    
    # Zero-shot task
    payload_zero_shot = prepare_zero_shot_payload(lyrics, unique_genres)
    task_zero_shot = {
        "custom_id": f"zero_shot_{i}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": payload_zero_shot
    }
    tasks_zero_shot.append(task_zero_shot)
    
    # Chain-of-thought task
    payload_cot = prepare_cot_payload(lyrics, unique_genres)
    task_cot = {
        "custom_id": f"cot_{i}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": payload_cot
    }
    tasks_cot.append(task_cot)
    
    # Chain-of-thought few-shot task
    payload_cot_few_shot = prepare_cot_few_shot_payload(lyrics, unique_genres)
    task_cot_few_shot = {
        "custom_id": f"cot_few_shot_{i}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": payload_cot_few_shot
    }
    tasks_cot_few_shot.append(task_cot_few_shot)

In [None]:
# Save tasks to JSONL files
file_name_zero_shot = "batch_zero_shot.jsonl"
file_name_cot = "batch_cot.jsonl"
file_name_cot_few_shot = "batch_cot_few_shot.jsonl"

def save_tasks_to_file(file_name, tasks):
    with open(file_name, 'w') as file:
        for task in tasks:
            file.write(json.dumps(task) + '\n')

save_tasks_to_file(file_name_zero_shot, tasks_zero_shot)
save_tasks_to_file(file_name_cot, tasks_cot)
save_tasks_to_file(file_name_cot_few_shot, tasks_cot_few_shot)

In [None]:
# Upload the files
batch_file_zero_shot = client.files.create(file=open(file_name_zero_shot, "rb"), purpose="batch")
batch_file_cot = client.files.create(file=open(file_name_cot, "rb"), purpose="batch")
batch_file_cot_few_shot = client.files.create(file=open(file_name_cot_few_shot, "rb"), purpose="batch")

In [None]:
# Create the batch jobs
batch_job_zero_shot = client.batches.create(
    input_file_id=batch_file_zero_shot.id,
    endpoint="/v1/chat/completions",
    completion_window="24h"
)

batch_job_cot = client.batches.create(
    input_file_id=batch_file_cot.id,
    endpoint="/v1/chat/completions",
    completion_window="24h"
)

batch_job_cot_few_shot = client.batches.create(
    input_file_id=batch_file_cot_few_shot.id,
    endpoint="/v1/chat/completions",
    completion_window="24h"
)

In [None]:
# Batch job IDs from the status output
batch_job_zero_shot_id = #file id
batch_job_cot_id = #file id
batch_job_cot_few_shot_id = #file id

# Checking the status of the batch jobs
batch_job_zero_shot_status = client.batches.retrieve(batch_job_zero_shot_id)
batch_job_cot_status = client.batches.retrieve(batch_job_cot_id)
batch_job_cot_few_shot_status = client.batches.retrieve(batch_job_cot_few_shot_id)

print("Zero-shot batch job status:", batch_job_zero_shot_status)
print("Chain-of-thought batch job status:", batch_job_cot_status)
print("Chain-of-thought few-shot batch job status:", batch_job_cot_few_shot_status)

### Evaluation

In [None]:
from openai import OpenAI
import json
import pandas as pd
import requests

api_key = #“Enter your own API code”
client = OpenAI(api_key=api_key)

In [None]:
# zero-shot: Genre

zero_shot_output_file_id = #file id

result_zeroshot = client.files.content(zero_shot_output_file_id).content
result_zeroshot_file_name = "batch_job_zeroshot_Genre_results.jsonl"

with open(result_zeroshot_file_name, "w") as file:
    file.write(result_zeroshot.decode('utf-8'))
    
results_zeroshot = []
with open(result_zeroshot_file_name, 'r') as file:
    for line in file:
        json_obj = json.loads(line.strip())
        results_zeroshot.append(json_obj)

In [None]:
# cot: Genre

cot_output_file_id = #file id

result_cot = client.files.content(cot_output_file_id).content
result_cot_file_name = "batch_job_cot_Genre_results.jsonl"

with open(result_cot_file_name, "w") as file:
    file.write(result_cot.decode('utf-8'))
    
results_cot = []
with open(result_cot_file_name, 'r') as file:
    for line in file:
        json_obj = json.loads(line.strip())
        results_cot.append(json_obj)

In [None]:
# cot+few-shot: Genre

cot_few_shot_output_file_id = #file id

result_cot_few_shot = client.files.content(cot_few_shot_output_file_id).content
result_cot_few_shot_file_name = "batch_job_cot_fewshot_Genre_results.jsonl"

with open(result_cot_few_shot_file_name, "w") as file:
    file.write(result_cot_few_shot.decode('utf-8'))
    
results_cot_few_shot = []
with open(result_cot_few_shot_file_name, 'r') as file:
    for line in file:
        json_obj = json.loads(line.strip())
        results_cot_few_shot.append(json_obj)

In [None]:
# Load the JSONL Files and Create DataFrames

import json
import pandas as pd

# Load the JSONL files
files = {
    "zero_shot": "batch_job_zeroshot_Genre_results.jsonl",
    "cot": "batch_job_cot_Genre_results.jsonl",
    "cot_few_shot": "batch_job_cot_fewshot_Genre_results.jsonl"
}

# Function to read JSONL file and return DataFrame
def read_jsonl(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            line = line.strip()
            if line:  # Check if the line is not empty
                try:
                    json_obj = json.loads(line)
                    # Extract the genre from the JSON object
                    genre = json_obj['response']['body']['choices'][0]['message']['content'].replace("Genre: ", "").strip()
                    # Append the genre to the data list
                    data.append({'genre': genre})
                except json.JSONDecodeError as e:
                    print(f"Error decoding JSON on line: {line}")
                    print(f"Error message: {str(e)}")
                except KeyError as e:
                    print(f"Missing key in JSON on line: {line}")
                    print(f"Error message: {str(e)}")
    return pd.DataFrame(data)

# Load the data into DataFrames
dfs = {key: read_jsonl(path) for key, path in files.items()}

# Align DataFrames by index (assuming they have the same number of rows)
aligned_df = pd.concat(dfs.values(), axis=1, keys=dfs.keys())

# Flatten the multi-level column index
aligned_df.columns = ['_'.join(col).strip() for col in aligned_df.columns.values]

# Load the ground truth
ground_truth_df = pd.read_csv('Billboard_yearly_filtered.csv')

# Merge the ground truth with the predictions
merged_df = pd.concat([ground_truth_df, aligned_df], axis=1)

# Verify the column names in the merged DataFrame
print(merged_df.columns.tolist())

In [None]:
# Compare Predictions with Ground Truth

# Function to clean and split genre strings
def clean_and_split_genres(genre_str):
    genre_str = genre_str.replace('[', '').replace(']', '').replace("'", "")
    return set(genre_str.split(', '))

# Function to calculate overlap ratio
def calculate_overlap_ratio(predicted_genres, true_genres):
    predicted_set = clean_and_split_genres(predicted_genres)
    true_set = clean_and_split_genres(true_genres)
    intersection = predicted_set.intersection(true_set)
    return len(intersection) / len(true_set) if true_set else 0

# Function to calculate exact match accuracy
def calculate_exact_match(predicted_genres, true_genres):
    predicted_set = clean_and_split_genres(predicted_genres)
    true_set = clean_and_split_genres(true_genres)
    return 1 if not predicted_set.isdisjoint(true_set) else 0

In [None]:
# Merge the ground truth with the predictions
merged_df = pd.concat([ground_truth_df, aligned_df], axis=1)

# Initialize results list
results_summary = []

# Initialize totals for averaging
total_metrics = {
    'zero_shot_overlap_ratio': 0,
    'zero_shot_exact_match': 0,
    'cot_overlap_ratio': 0,
    'cot_exact_match': 0,
    'cot_few_shot_overlap_ratio': 0,
    'cot_few_shot_exact_match': 0,
    'total_rows': 0
}

# Iterate through the merged dataframe and calculate accuracy
for _, row in merged_df.iterrows():
    true_genre = row['Genre']  # Assuming the ground truth genre column is named 'Genre'
    
    # Zero-shot
    predicted_genre = row['zero_shot_genre']
    total_metrics['zero_shot_overlap_ratio'] += calculate_overlap_ratio(predicted_genre, true_genre)
    total_metrics['zero_shot_exact_match'] += calculate_exact_match(predicted_genre, true_genre)
    
    # Chain-of-thought
    predicted_genre = row['cot_genre']
    total_metrics['cot_overlap_ratio'] += calculate_overlap_ratio(predicted_genre, true_genre)
    total_metrics['cot_exact_match'] += calculate_exact_match(predicted_genre, true_genre)
    
    # Chain-of-thought few-shot
    predicted_genre = row['cot_few_shot_genre']
    total_metrics['cot_few_shot_overlap_ratio'] += calculate_overlap_ratio(predicted_genre, true_genre)
    total_metrics['cot_few_shot_exact_match'] += calculate_exact_match(predicted_genre, true_genre)
    
    total_metrics['total_rows'] += 1

# Calculate final averages
total_average_metrics = {key: value / total_metrics['total_rows'] for key, value in total_metrics.items() if key != 'total_rows'}

# Append final summary row
results_summary.append({
    'Zero-shot Overlap Ratio': total_average_metrics['zero_shot_overlap_ratio'],
    'Zero-shot Exact Match': total_average_metrics['zero_shot_exact_match'],
    'CoT Overlap Ratio': total_average_metrics['cot_overlap_ratio'],
    'CoT Exact Match': total_average_metrics['cot_exact_match'],
    'CoT Few-shot Overlap Ratio': total_average_metrics['cot_few_shot_overlap_ratio'],
    'CoT Few-shot Exact Match': total_average_metrics['cot_few_shot_exact_match']
})

# Create summary DataFrame
results_summary_df = pd.DataFrame(results_summary)

# Save the results to a new CSV file
results_summary_df.to_csv('Billboard_Genre_GPT_4o_experiment_results_1990-2023.csv', index=False)

print(results_summary_df)

## Monthly

In [None]:
import pandas as pd
import requests
import json

# Load the unique genre list from the CSV file using raw string
genre_df = pd.read_csv(r'[EN] Unique Genre Counts.csv')

# Assuming the genre list is in a column named 'Genre'
unique_genres = genre_df['Genre'].tolist()
genre_list_str = ', '.join(unique_genres)
print(genre_list_str)

In [None]:
import requests
import json
import pandas as pd

# Function to prepare the zero-shot payload
def prepare_zero_shot_payload(lyrics, unique_genres):
    genre_list_str = ', '.join(unique_genres)
    
    prompt = (
        f"Here is a list of unique music genres: [{genre_list_str}].\n\n"
        f"Lyrics: '{lyrics}'\n\n"
        f"Say nothing but the Genre as Genre: {{the output}}\n\n"
        f"Output example: Genre: [pop, r&b, hip hop]\n\n"
        f"Genres: "
    )
    
    messages = [{"role": "user", "content": prompt}]
    
    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 40,
        "temperature": 0
    }
    
    return payload

# Function to prepare the chain-of-thought prompt payload
def prepare_cot_payload(lyrics, unique_genres):
    genre_list_str = ', '.join(unique_genres)
    
    prompt = (
        f"Here is a list of unique music genres: [{genre_list_str}].\n\n"
        f"Based on the lyrics provided, identify the genres.\n\n"
        f"Lyrics: '{lyrics}'\n\n"
        f"Say nothing but the Genre as Genre: {{the output}}\n\n"
        f"Output example: Genre: [pop, r&b, hip hop]\n\n"
        f"Genre:"
    )
    messages = [{"role": "user", "content": prompt}]
    
    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 40,
        "temperature": 0
    }
    
    return payload


# Function to prepare the chain-of-thought few-shot prompt payload
def prepare_cot_few_shot_payload(lyrics, unique_genres):
    genre_list_str = ', '.join(unique_genres)
    
    example_lyrics = (
        "I'd like to say we gave it a try\n"
        "I'd like to blame it all on life\n"
        "Maybe we just weren't right\n"
        "But that's a lie, that's a lie\n\n"

        "And we can deny it as much as we want\n"
        "But in time, our feelings will show\n"
        "'Cause sooner or later, we'll wonder why we gave up\n"
        "The truth is everyone knows, oh\n\n"

        "Almost, almost is never enough\n"
        "So close to being in love\n"
        "If I would have known that you wanted me the way I wanted you\n"
        "Then maybe we wouldn't be two worlds apart (Ah)\n"
        "But right here in each other's arms\n"
        "And we almost, we almost knew what love was\n"
        "But almost is never enough (Ah)\n\n"

        "If I could change the world overnight (Ah)\n"
        "There'd be no such thing as goodbye (Ah)\n"
        "You'd be standing right where you were (Ah)\n"
        "And we'd get the chance we deserve, oh (Ah)\n"
        "See upcoming pop shows\n"
        "Get tickets for your favorite artists\n\n"

        "Try to deny it as much as you want\n"
        "But in time, our feelings will show (Ah)\n"
        "'Cause sooner or later, we'll wonder why we gave up\n"
        "The truth is everyone knows (Ah)\n\n"

        "Almost, almost is never enough\n"
        "So close to being in love\n"
        "If I would have known that you wanted me the way I wanted you, woah\n"
        "Then maybe we wouldn't be two worlds apart\n"
        "But right here (Right here) in each other's arms\n"
        "And we almost, we almost knew what love was\n"
        "But almost is never enough\n\n"

        "Huh (Woah; Huh, baby), huh, baby (Mm)\n"
        "You know (You know)\n"
        "You know, baby (Huh, baby; Huh)\n"
        "Almost (Baby, baby, baby) is never enough, baby\n"
        "You know (Hm-hm), ooh-yeah\n\n"

        "And we can deny it as much as we want\n"
        "But in time, our feelings will show\n"
        "'Cause sooner or later, we'll wonder why we gave up\n"
        "The truth is (Truth is) everyone knows (Oh)\n\n"

        "Almost (Almost), almost is never enough (Is never enough, baby)\n"
        "(We were close) So close to being in love (So close)\n"
        "If I would have known that you wanted me (That you wanted me)\n"
        "The way I wanted you, babe\n"
        "Then maybe we wouldn't be two worlds apart\n"
        "But right here in each other's arms\n"
        "And we almost, we almost knew what love was (Baby)\n"
        "But almost is never enough\n\n"
        "[Outro: Ariana Grande, Both & Nathan Sykes]\n"
        "Huh, huh, baby (Almost)\n"
        "You know (Hey), you know, baby (Oh)\n"
        "Almost (Never)\n"
        "Is never enough, baby (Never)\n"
        "You know (Ooh), hey\n"
    )
    example_genres = "pop, soul"

    prompt = (
        f"Here is a list of unique music genres: [{genre_list_str}].\n\n"
        
        f"Example:\n\n"
        f"Lyrics: '{example_lyrics}'\n\n"
        f"Genre: {example_genres}\n\n"

        f"Now, based on the provided lyrics, identify the genres.\n\n"
        f"Lyrics: '{lyrics}'\n\n"
        f"Say nothing but the Genre as Genre: {{the output}}\n\n"
        f"Output example: Genre: [pop, r&b, hip hop]\n\n"
        f"Genre:"
    )
    messages = [{"role": "user", "content": prompt}]
    
    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 40,
        "temperature": 0
    }
    
    return payload

In [None]:
api_key = #“Enter your own API code”

def make_request(payload, api_key):
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    response_data = response.json()
    return response_data

# Load the CSV file
df = pd.read_csv('Billboard_weekly_filtered.csv')

# Prepare the results dataframe
results = {
    'Title': [],
    'Artist': [],
    'zero_shot_Genre': [],
    'cot_Genre': [],
    'cot_few_shot_Genre': [],
}

result_keys = ['Title', 'Artist', 'zero_shot_Genre', 'cot_Genre','cot_few_shot_Genre']

# Iterate over each row in the dataframe
count = 0
for index, row in df.iterrows():
    count += 1
    if count % 100 == 0:
        url = f'2024 Lyrics, Genre, Description Billboard Results {count}_prompt(10).csv'     ############## 여기 변경 필요
        results_df = pd.DataFrame(results)
        results_df.to_csv(url, index=False)
        print(f"Results saved to '{url}'")
        for keys in result_keys:
            results[keys].clear()
    
    title = row['Title']
    artist = row['Artist']
    lyrics = row['lyrics']

    # Zero-shot
    payload = prepare_zero_shot_payload(lyrics, unique_genres)
    response = make_request(payload, api_key)
    zero_shot_response = response['choices'][0]['message']['content'].strip()
    
    # Extract genre and description
    zero_shot_genre = zero_shot_response.split("Description:")[0].replace("Genres:", "").replace("Genre:", "").strip()
    
    # Chain-of-thought
    payload = prepare_cot_payload(lyrics, unique_genres)
    response = make_request(payload, api_key)
    cot_response = response['choices'][0]['message']['content'].strip()
    
    # Extract genre and description
    cot_genre = cot_response.split("Description:")[0].replace("Genres:", "").replace("Genre:", "").strip()
    
    # Chain-of-thought few-shot
    payload = prepare_cot_few_shot_payload(lyrics, unique_genres)
    response = make_request(payload, api_key)
    cot_few_shot_response = response['choices'][0]['message']['content'].strip()
    
    # Extract genre and description
    cot_few_shot_genre = cot_few_shot_response.split("Description:")[0].replace("Genres:", "").replace("Genre:", "").strip()

    # Save the results
    results['Title'].append(title)
    results['Artist'].append(artist)
    results['zero_shot_Genre'].append(zero_shot_genre)
    results['cot_Genre'].append(cot_genre)
    results['cot_few_shot_Genre'].append(cot_few_shot_genre)

# Convert the results to a dataframe
results_df = pd.DataFrame(results)
print(results_df)

# Save the results to a new CSV file
results_df.to_csv('2024 Lyrics, Genre, Description Billboard Results 175.csv', index=False)

### Evaluation

In [None]:
import pandas as pd
import glob
import re

# Function to clean and split genre strings
def clean_and_split_genres(genre_str):
    genre_str = genre_str.replace('[', '').replace(']', '').replace("'", "")
    return set(genre_str.split(', '))

# Function to calculate overlap ratio
def calculate_overlap_ratio(predicted_genres, true_genres):
    predicted_set = clean_and_split_genres(predicted_genres)
    true_set = clean_and_split_genres(true_genres)
    intersection = predicted_set.intersection(true_set)
    return len(intersection) / len(true_set) if true_set else 0

# Function to calculate exact match accuracy
def calculate_exact_match(predicted_genres, true_genres):
    predicted_set = clean_and_split_genres(predicted_genres)
    true_set = clean_and_split_genres(true_genres)
    return 1 if not predicted_set.isdisjoint(true_set) else 0

# Load the ground truth descriptions
ground_truth_df = pd.read_csv('Billboard_weekly_filtered.csv')

# Initialize results list
results_summary = []

# Initialize totals for averaging
total_metrics = {
    'zero_shot_overlap_ratio': 0,
    'zero_shot_exact_match': 0,
    'cot_overlap_ratio': 0,
    'cot_exact_match': 0,
    'cot_few_shot_overlap_ratio': 0,
    'cot_few_shot_exact_match': 0,
    'total_files': 0,
    'total_rows': 0
}

# Get list of result files
result_files = sorted(glob.glob('2024 Lyrics, Genre, Description Billboard Results 100.csv') + 
                      glob.glob('2024 Lyrics, Genre, Description Billboard Results 175.csv'))

# Iterate through the result files
for result_file in result_files:
    # Load the results
    results_df = pd.read_csv(result_file)
    
    # Merge the dataframes on Title and Artist
    merged_df = results_df.merge(ground_truth_df, on=['Title', 'Artist'])
    
    # Initialize variables to accumulate the correct predictions for this file
    file_metrics = {
        'zero_shot_overlap_ratio': 0,
        'zero_shot_exact_match': 0,
        'cot_overlap_ratio': 0,
        'cot_exact_match': 0,
        'cot_few_shot_overlap_ratio': 0,
        'cot_few_shot_exact_match': 0,
        'n': len(merged_df)
    }
    
    # Iterate through the merged dataframe and calculate accuracy
    for i, row in merged_df.iterrows():
        true_genre = row['Genre']  # Assuming the ground truth genre column is named 'Genre'
        
        # Zero-shot
        predicted_genre = row['zero_shot_Genre']
        file_metrics['zero_shot_overlap_ratio'] += calculate_overlap_ratio(predicted_genre, true_genre)
        file_metrics['zero_shot_exact_match'] += calculate_exact_match(predicted_genre, true_genre)
        
        # Chain-of-thought
        predicted_genre = row['cot_Genre']
        file_metrics['cot_overlap_ratio'] += calculate_overlap_ratio(predicted_genre, true_genre)
        file_metrics['cot_exact_match'] += calculate_exact_match(predicted_genre, true_genre)
        
        # Chain-of-thought few-shot
        predicted_genre = row['cot_few_shot_Genre']
        file_metrics['cot_few_shot_overlap_ratio'] += calculate_overlap_ratio(predicted_genre, true_genre)
        file_metrics['cot_few_shot_exact_match'] += calculate_exact_match(predicted_genre, true_genre)
    
    # Calculate average overlap ratio as accuracy for this file
    for key in file_metrics:
        if key != 'n':
            file_metrics[key] /= file_metrics['n'] if file_metrics['n'] > 0 else 1
    
    # Extract file count from the filename using regex
    match = re.search(r'(\d+)_prompt\(5\)', result_file)
    file_count = int(match.group(1)) if match else None
    
    # Append results to summary
    results_summary.append({
        'File Count': file_count,
        'Zero-shot Overlap Ratio': file_metrics['zero_shot_overlap_ratio'],
        'Zero-shot Exact Match': file_metrics['zero_shot_exact_match'],
        'CoT Overlap Ratio': file_metrics['cot_overlap_ratio'],
        'CoT Exact Match': file_metrics['cot_exact_match'],
        'CoT Few-shot Overlap Ratio': file_metrics['cot_few_shot_overlap_ratio'],
        'CoT Few-shot Exact Match': file_metrics['cot_few_shot_exact_match']
    })
    
    # Accumulate totals for final summary
    for key in total_metrics:
        if key in file_metrics:
            total_metrics[key] += file_metrics[key] * file_metrics['n']  # Sum the ratios back as counts
        if key == 'total_rows':
            total_metrics[key] += file_metrics['n']
    
    total_metrics['total_files'] += 1

# Calculate final averages
total_average_metrics = {}
for key in total_metrics:
    if key.startswith('total'):
        continue
    total_average_metrics[key] = total_metrics[key] / total_metrics['total_rows'] if total_metrics['total_rows'] > 0 else 0

# Append final summary row
results_summary.append({
    'File Count': 'Total Average',
    'Zero-shot Overlap Ratio': total_average_metrics['zero_shot_overlap_ratio'],
    'Zero-shot Exact Match': total_average_metrics['zero_shot_exact_match'],
    'CoT Overlap Ratio': total_average_metrics['cot_overlap_ratio'],
    'CoT Exact Match': total_average_metrics['cot_exact_match'],
    'CoT Few-shot Overlap Ratio': total_average_metrics['cot_few_shot_overlap_ratio'],
    'CoT Few-shot Exact Match': total_average_metrics['cot_few_shot_exact_match']
})

results_summary_df = pd.DataFrame(results_summary)

# Sort the DataFrame by the File Count, handling the 'Total Average' row separately
results_summary_df['File Count'] = results_summary_df['File Count'].apply(lambda x: float('inf') if x == 'Total Average' else x)
results_summary_df = results_summary_df.sort_values(by='File Count').reset_index(drop=True)
results_summary_df['File Count'] = results_summary_df['File Count'].apply(lambda x: 'Total Average' if x == float('inf') else x)

# Save the results to a new CSV file
results_summary_df.to_csv('Billboard_Genre_GPT_4o_experiment_results_2024.csv', index=False)

print(results_summary_df)

# Billboard - Song Description

## Yearly

In [None]:
import json
import pandas as pd
import requests
from openai import OpenAI

# Function to prepare the zero-shot payload
def prepare_zero_shot_payload(lyrics):  
    prompt = (
        f"Say nothing but the Description as Description: {{the output}}\n\n"
        f"Output example: Description: The song explores themes of love and heartbreak.\n\n"
        f"Lyrics: '{lyrics}'\n\n"
        f"Description:"
    )
    
    messages = [{"role": "user", "content": prompt}]
    
    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 300,
        "temperature": 0
    }
    
    return payload

# Function to prepare the chain-of-thought prompt payload
def prepare_cot_payload(lyrics):
    prompt = (
        f"Based on the lyrics provided, write a brief description of the song.\n\n"
        f"Include the possible song title and artist name in the description.\n\n"
        f"Say nothing but the Description as Description: {{the output}}\n\n"
        f"Output example: Description: Honeymoon Avenue by Ariana Grande is about knowing you are at the end of a relationship and wishing it could not be the end and go back to the beginning and start over. \n\n"
        f"Lyrics: '{lyrics}'\n\n"
        f"Description:"
    )
    messages = [{"role": "user", "content": prompt}]
    
    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 300,
        "temperature": 0
    }
    
    return payload

# Function to prepare the chain-of-thought few-shot prompt payload
def prepare_cot_few_shot_payload(lyrics):
    
    example_lyrics = (
        "I'd like to say we gave it a try\n"
        "I'd like to blame it all on life\n"
        "Maybe we just weren't right\n"
        "But that's a lie, that's a lie\n\n"

        "And we can deny it as much as we want\n"
        "But in time, our feelings will show\n"
        "'Cause sooner or later, we'll wonder why we gave up\n"
        "The truth is everyone knows, oh\n\n"

        "Almost, almost is never enough\n"
        "So close to being in love\n"
        "If I would have known that you wanted me the way I wanted you\n"
        "Then maybe we wouldn't be two worlds apart (Ah)\n"
        "But right here in each other's arms\n"
        "And we almost, we almost knew what love was\n"
        "But almost is never enough (Ah)\n\n"

        "If I could change the world overnight (Ah)\n"
        "There'd be no such thing as goodbye (Ah)\n"
        "You'd be standing right where you were (Ah)\n"
        "And we'd get the chance we deserve, oh (Ah)\n"
        "See upcoming pop shows\n"
        "Get tickets for your favorite artists\n\n"

        "Try to deny it as much as you want\n"
        "But in time, our feelings will show (Ah)\n"
        "'Cause sooner or later, we'll wonder why we gave up\n"
        "The truth is everyone knows (Ah)\n\n"

        "Almost, almost is never enough\n"
        "So close to being in love\n"
        "If I would have known that you wanted me the way I wanted you, woah\n"
        "Then maybe we wouldn't be two worlds apart\n"
        "But right here (Right here) in each other's arms\n"
        "And we almost, we almost knew what love was\n"
        "But almost is never enough\n\n"

        "Huh (Woah; Huh, baby), huh, baby (Mm)\n"
        "You know (You know)\n"
        "You know, baby (Huh, baby; Huh)\n"
        "Almost (Baby, baby, baby) is never enough, baby\n"
        "You know (Hm-hm), ooh-yeah\n\n"

        "And we can deny it as much as we want\n"
        "But in time, our feelings will show\n"
        "'Cause sooner or later, we'll wonder why we gave up\n"
        "The truth is (Truth is) everyone knows (Oh)\n\n"

        "Almost (Almost), almost is never enough (Is never enough, baby)\n"
        "(We were close) So close to being in love (So close)\n"
        "If I would have known that you wanted me (That you wanted me)\n"
        "The way I wanted you, babe\n"
        "Then maybe we wouldn't be two worlds apart\n"
        "But right here in each other's arms\n"
        "And we almost, we almost knew what love was (Baby)\n"
        "But almost is never enough\n\n"
        "[Outro: Ariana Grande, Both & Nathan Sykes]\n"
        "Huh, huh, baby (Almost)\n"
        "You know (Hey), you know, baby (Oh)\n"
        "Almost (Never)\n"
        "Is never enough, baby (Never)\n"
        "You know (Ooh), hey\n"
    )

    example_description = (
        "On the collaborative track “Almost Is Never Enough,” Ariana Grande & Nathan Sykes play a couple who had a relationship that hadn’t gone right. "
        "Ariana would like to say things were going well but she knows that’s a lie and like the title states, almost is never enough to make the relationship work; you need to put full effort in. "
        "Both of them state that they didn’t feel the relationship while in it, but the mood of the song and lyrics suggest that they both want to either reconnect or they simply just miss better times.\n\n"
        "At the time of the song’s release, Nathan and Ariana were dating. Unfortunately, their relationship ended a few months later."
    )
    

    prompt = (
        f"Example:\n\n"
        f"Lyrics: '{example_lyrics}'\n\n"
        f"Genre: {example_description}\n\n"

        f"Now, based on the provided lyrics, write a brief description of the song.\n\n"
        f"Include the possible song title and artist name in the description.\n\n"
        f"Say nothing but the Description as Description: {{the output}}\n\n"
        f"Output example: Description: Honeymoon Avenue by Ariana Grande is about knowing you are at the end of a relationship and wishing it could not be the end and go back to the beginning and start over. \n\n"
        f"Lyrics: '{lyrics}'\n\n"
        f"Description:"
    )
    messages = [{"role": "user", "content": prompt}]
    
    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 300,
        "temperature": 0
    }
    
    return payload

In [None]:
api_key = # "Enter your API code"
client = OpenAI(api_key=api_key)

In [None]:
df = pd.read_csv('Billboard_yearly_filtered.csv')

In [None]:
# Prepare the tasks for each method
tasks_zero_shot = []
tasks_cot = []
tasks_cot_few_shot = []

for i, row in df.iterrows():
    lyrics = row['lyrics']
    
    # Zero-shot task
    payload_zero_shot = prepare_zero_shot_payload(lyrics)
    task_zero_shot = {
        "custom_id": f"zero_shot_{i}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": payload_zero_shot
    }
    tasks_zero_shot.append(task_zero_shot)
    
    # Chain-of-thought task
    payload_cot = prepare_cot_payload(lyrics)
    task_cot = {
        "custom_id": f"cot_{i}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": payload_cot
    }
    tasks_cot.append(task_cot)
    
    # Chain-of-thought few-shot task
    payload_cot_few_shot = prepare_cot_few_shot_payload(lyrics)
    task_cot_few_shot = {
        "custom_id": f"cot_few_shot_{i}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": payload_cot_few_shot
    }
    tasks_cot_few_shot.append(task_cot_few_shot)

In [None]:
# Save tasks to JSONL files
file_name_zero_shot = "batch_zero_shot_description_ver2.jsonl"
file_name_cot = "batch_cot_description_ver2.jsonl"
file_name_cot_few_shot = "batch_cot_few_shot_description_ver2.jsonl"

def save_tasks_to_file(file_name, tasks):
    with open(file_name, 'w') as file:
        for task in tasks:
            file.write(json.dumps(task) + '\n')

save_tasks_to_file(file_name_zero_shot, tasks_zero_shot)
save_tasks_to_file(file_name_cot, tasks_cot)
save_tasks_to_file(file_name_cot_few_shot, tasks_cot_few_shot)

In [None]:
# Upload the files
batch_file_zero_shot = client.files.create(file=open(file_name_zero_shot, "rb"), purpose="batch")
batch_file_cot = client.files.create(file=open(file_name_cot, "rb"), purpose="batch")
batch_file_cot_few_shot = client.files.create(file=open(file_name_cot_few_shot, "rb"), purpose="batch")

In [None]:
# Create the batch jobs
batch_job_zero_shot = client.batches.create(
    input_file_id=batch_file_zero_shot.id,
    endpoint="/v1/chat/completions",
    completion_window="24h"
)

batch_job_cot = client.batches.create(
    input_file_id=batch_file_cot.id,
    endpoint="/v1/chat/completions",
    completion_window="24h"
)

batch_job_cot_few_shot = client.batches.create(
    input_file_id=batch_file_cot_few_shot.id,
    endpoint="/v1/chat/completions",
    completion_window="24h"
)

In [None]:
# Checking the status of the batch jobs
batch_job_zero_shot_status = client.batches.retrieve(batch_job_zero_shot.id)
batch_job_cot_status = client.batches.retrieve(batch_job_cot.id)
batch_job_cot_few_shot_status = client.batches.retrieve(batch_job_cot_few_shot.id)

print("Zero-shot batch job status:", batch_job_zero_shot_status)
print("Chain-of-thought batch job status:", batch_job_cot_status)
print("Chain-of-thought few-shot batch job status:", batch_job_cot_few_shot_status)

### Evaluation

In [None]:
zero_shot_output_file_id = #output file id
cot_output_file_id = #output file id
cot_few_shot_output_file_id = #output file id

In [None]:
from openai import OpenAI

api_key = # "Enter your API code"
client = OpenAI(api_key=api_key)

In [None]:
# zero-shot: Description

zero_shot_output_file_id = #output file id

result_zeroshot = client.files.content(zero_shot_output_file_id).content
result_zeroshot_file_name = "batch_job_zeroshot_Description_results_ver2.jsonl"

with open(result_zeroshot_file_name, "w") as file:
    file.write(result_zeroshot.decode('utf-8'))
    
results_zeroshot = []
with open(result_zeroshot_file_name, 'r') as file:
    for line in file:
        json_obj = json.loads(line.strip())
        results_zeroshot.append(json_obj)

In [None]:
# cot: Description

cot_output_file_id = #output file id

result_cot = client.files.content(cot_output_file_id).content
result_cot_file_name = "batch_job_cot_Description_results_ver2.jsonl"

with open(result_cot_file_name, "w") as file:
    file.write(result_cot.decode('utf-8'))
    
results_cot = []
with open(result_cot_file_name, 'r') as file:
    for line in file:
        json_obj = json.loads(line.strip())
        results_cot.append(json_obj)

In [None]:
# cot+few-shot: Description

cot_few_shot_output_file_id = #output file id

result_cot_few_shot = client.files.content(cot_few_shot_output_file_id).content
result_cot_few_shot_file_name = "batch_job_cot_fewshot_Description_results_ver2.jsonl"

with open(result_cot_few_shot_file_name, "w") as file:
    file.write(result_cot_few_shot.decode('utf-8'))
    
results_cot_few_shot = []
with open(result_cot_few_shot_file_name, 'r') as file:
    for line in file:
        json_obj = json.loads(line.strip())
        results_cot_few_shot.append(json_obj)

In [None]:
import json
import pandas as pd

# Load the JSONL files
files = {
    "zero_shot": #file name,
    "cot": #file name,
    "cot_few_shot": # file name
}

# Function to read JSONL file and return DataFrame
def read_jsonl(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            line = line.strip()
            if line:  # Check if the line is not empty
                try:
                    json_obj = json.loads(line)
                    # Extract the description from the JSON object
                    description = json_obj['response']['body']['choices'][0]['message']['content'].replace("Description: ", "").strip()
                    # Append the description to the data list
                    data.append({'description': description})
                except json.JSONDecodeError as e:
                    print(f"Error decoding JSON on line: {line}")
                    print(f"Error message: {str(e)}")
                except KeyError as e:
                    print(f"Missing key in JSON on line: {line}")
                    print(f"Error message: {str(e)}")
    return pd.DataFrame(data)

# Load the data into DataFrames
dfs = {key: read_jsonl(path) for key, path in files.items()}

# Align DataFrames by index (assuming they have the same number of rows)
aligned_df = pd.concat(dfs.values(), axis=1, keys=dfs.keys())

# Flatten the multi-level column index
aligned_df.columns = ['_'.join(col).strip() for col in aligned_df.columns.values]

# Load the ground truth
ground_truth_df = pd.read_csv('Billboard_yearly_filtered.csv')

# Merge the ground truth with the predictions
merged_df = pd.concat([ground_truth_df, aligned_df], axis=1)

# Verify the column names in the merged DataFrame
print(merged_df.columns.tolist())

# Display the merged DataFrame
print(merged_df.head())

In [None]:
# Initialize BERTScorer
scorer = BERTScorer(lang="en", rescale_with_baseline=True)

In [None]:
# calculate scores
def evaluate_using_rouge(reference, hypothesis):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, hypothesis)
    avg_scores = {
        'rouge-1': scores['rouge1'],
        'rouge-l': scores['rougeL']
    }
    return avg_scores

def evaluate_using_bert_score(text, predicted_text, scorer):
    P, R, F1 = scorer.score([text], [predicted_text])
    return [P.mean().item(), R.mean().item(), F1.mean().item()]  # Calculate mean scores

In [None]:
# Main evaluation function
def evaluation(merged_df, scorer):
    methods = ['zero_shot', 'cot', 'cot_few_shot']
    results_summary = []

    for method in methods:
        rouge_1_f = rouge_1_p = rouge_1_r = 0.0
        rouge_L_f = rouge_L_p = rouge_L_r = 0.0
        bert_P = bert_R = bert_F1 = 0.0
        n = len(merged_df)

        for i, row in merged_df.iterrows():
            truth_description = row['description']  # Ground truth description
            predict_description = row[f'{method}_description']  # Predicted description

            rouge = evaluate_using_rouge(truth_description, predict_description)
            bert = evaluate_using_bert_score(truth_description, predict_description, scorer)

            rouge_1_f += rouge['rouge-1'].fmeasure
            rouge_1_p += rouge['rouge-1'].precision
            rouge_1_r += rouge['rouge-1'].recall
            rouge_L_f += rouge['rouge-l'].fmeasure
            rouge_L_p += rouge['rouge-l'].precision
            rouge_L_r += rouge['rouge-l'].recall
            bert_P += bert[0]
            bert_R += bert[1]
            bert_F1 += bert[2]

        results_summary.append({
            'method': method,
            'rouge-1_f1': rouge_1_f / n,
            'rouge-1_p': rouge_1_p / n,
            'rouge-1_r': rouge_1_r / n,
            'rouge-L_f1': rouge_L_f / n,
            'rouge-L_p': rouge_L_p / n,
            'rouge-L_r': rouge_L_r / n,
            'bert_p': bert_P / n,
            'bert_r': bert_R / n,
            'bert_f1': bert_F1 / n
        })

    return results_summary

# Perform evaluation
results = evaluation(merged_df, scorer)

# Create a DataFrame from the results summary
results_summary_df = pd.DataFrame(results)

# Save the results to a new CSV file
results_summary_df.to_csv('Billboard_Description_Evaluation_Results_Summary.csv', index=False)

# Print the results summary DataFrame to verify
print(results_summary_df)

## Monthly

In [None]:
import requests
import json
import pandas as pd

# Function to prepare the zero-shot payload
def prepare_zero_shot_payload(lyrics):  
    prompt = (
        f"Say nothing but the Description as Description: {{the output}}\n\n"
        f"Output example: Description: The song explores themes of love and heartbreak.\n\n"
        f"Lyrics: '{lyrics}'\n\n"
        f"Description:"
    )
    
    messages = [{"role": "user", "content": prompt}]
    
    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 300,
        "temperature": 0
    }
    
    return payload

# Function to prepare the chain-of-thought prompt payload
def prepare_cot_payload(lyrics):
    prompt = (
        f"Based on the lyrics provided, write a brief description of the song.\n\n"
        f"Include the possible song title and artist name in the description.\n\n"
        f"Say nothing but the Description as Description: {{the output}}\n\n"
        f"Output example: Description: Honeymoon Avenue by Ariana Grande is about knowing you are at the end of a relationship and wishing it could not be the end and go back to the beginning and start over. \n\n"
        f"Lyrics: '{lyrics}'\n\n"
        f"Description:"
    )
    messages = [{"role": "user", "content": prompt}]
    
    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 300,
        "temperature": 0
    }
    
    return payload


# Function to prepare the chain-of-thought few-shot prompt payload
def prepare_cot_few_shot_payload(lyrics):
    
    example_lyrics = (
        "I'd like to say we gave it a try\n"
        "I'd like to blame it all on life\n"
        "Maybe we just weren't right\n"
        "But that's a lie, that's a lie\n\n"

        "And we can deny it as much as we want\n"
        "But in time, our feelings will show\n"
        "'Cause sooner or later, we'll wonder why we gave up\n"
        "The truth is everyone knows, oh\n\n"

        "Almost, almost is never enough\n"
        "So close to being in love\n"
        "If I would have known that you wanted me the way I wanted you\n"
        "Then maybe we wouldn't be two worlds apart (Ah)\n"
        "But right here in each other's arms\n"
        "And we almost, we almost knew what love was\n"
        "But almost is never enough (Ah)\n\n"

        "If I could change the world overnight (Ah)\n"
        "There'd be no such thing as goodbye (Ah)\n"
        "You'd be standing right where you were (Ah)\n"
        "And we'd get the chance we deserve, oh (Ah)\n"
        "See upcoming pop shows\n"
        "Get tickets for your favorite artists\n\n"
    )

    example_description = (
        "On the collaborative track “Almost Is Never Enough,” Ariana Grande & Nathan Sykes play a couple who had a relationship that hadn’t gone right. "
        "Ariana would like to say things were going well but she knows that’s a lie and like the title states, almost is never enough to make the relationship work; you need to put full effort in. "
        "Both of them state that they didn’t feel the relationship while in it, but the mood of the song and lyrics suggest that they both want to either reconnect or they simply just miss better times.\n\n"
        "At the time of the song’s release, Nathan and Ariana were dating. Unfortunately, their relationship ended a few months later."
    )
    

    prompt = (
        f"Example:\n\n"
        f"Lyrics: '{example_lyrics}'\n\n"
        f"Genre: {example_description}\n\n"

        f"Based on the provided lyrics, write a brief description of the song.\n\n"
        f"Include the possible song title and artist name in the description.\n\n"
        f"Say nothing but the Description as Description: {{the output}}\n\n"
        f"Output example: Description: Honeymoon Avenue by Ariana Grande is about knowing you are at the end of a relationship and wishing it could not be the end and go back to the beginning and start over. \n\n"
        f"Lyrics: '{lyrics}'\n\n"
        f"Description:"
    )
    messages = [{"role": "user", "content": prompt}]
    
    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 300,
        "temperature": 0
    }
    
    return payload

In [None]:
api_key = # Enter your API code

def make_request(payload, api_key):
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    response_data = response.json()
    return response_data

# Load the CSV file
df = pd.read_csv('Billboard_weekly_filtered.csv')

# Prepare the results dataframe
results = {
    'Title': [],
    'Artist': [],
    'zero_shot_Description': [],
    'cot_Description': [],
    'cot_few_shot_Description': []
}

result_keys = ['Title', 'Artist','zero_shot_Description','cot_Description', 'cot_few_shot_Description']

# Iterate over each row in the dataframe
count = 0
for index, row in df.iterrows():
    count += 1
    if count % 100 == 0:
        url = f'2024 Lyrics, Genre, Description Billboard Results {count}_prompt(13).csv'     ############## 여기 변경 필요
        results_df = pd.DataFrame(results)
        results_df.to_csv(url, index=False)
        print(f"Results saved to '{url}'")
        for keys in result_keys:
            results[keys].clear()
    
    title = row['Title']
    artist = row['Artist']
    lyrics = row['lyrics']

    # Zero-shot
    payload = prepare_zero_shot_payload(lyrics)
    response = make_request(payload, api_key)
    zero_shot_response = response['choices'][0]['message']['content'].strip()
    
    # Extract genre and description
    zero_shot_description = zero_shot_response.split("Description:")[1].strip()
    
    # Chain-of-thought
    payload = prepare_cot_payload(lyrics)
    response = make_request(payload, api_key)
    cot_response = response['choices'][0]['message']['content'].strip()
    
    # Extract genre and description
    cot_description = cot_response.split("Description:")[1].strip()
    
    # Chain-of-thought few-shot
    payload = prepare_cot_few_shot_payload(lyrics)
    response = make_request(payload, api_key)
    cot_few_shot_response = response['choices'][0]['message']['content'].strip()
    
    # Extract genre and description
    cot_few_shot_description = cot_few_shot_response.split("Description:")[1].strip()

    # Save the results
    results['Title'].append(title)
    results['Artist'].append(artist)
    results['zero_shot_Description'].append(zero_shot_description)
    results['cot_Description'].append(cot_description)
    results['cot_few_shot_Description'].append(cot_few_shot_description)

# Convert the results to a dataframe
results_df = pd.DataFrame(results)

# Save the results to a new CSV file
results_df.to_csv('2024 Lyrics, Genre, Description Billboard Results 175_prompt(13).csv', index=False)

print("Results saved to '2024 Lyrics, Genre, Description Billboard Results.csv'")

### Evaluation

In [None]:
import os
import glob
import warnings
import pandas as pd
from rouge_score import rouge_scorer
from bert_score import BERTScorer

# Suppress specific warnings
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.file_download")
warnings.filterwarnings("ignore", category=UserWarning, module="transformers.modeling_utils")

def evaluate_using_rouge(reference, hypothesis):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, hypothesis)
    avg_scores = {
        'rouge-1': scores['rouge1'],
        'rouge-l': scores['rougeL']
    }
    return avg_scores

def evaluate_using_bert_score(text, predicted_text, scorer):
    P, R, F1 = scorer.score([text], [predicted_text])
    return P.mean().item(), R.mean().item(), F1.mean().item()  # Calculate mean scores

# Initializing BERT scorer
scorer = BERTScorer(lang="en", rescale_with_baseline=True)

# Load the ground truth descriptions
ground_truth_df = pd.read_csv('Billboard_weekly_filtered.csv')

# Initialize variables to accumulate the scores for each description type
file_scores = []

# Use glob to find and sort all the relevant CSV files
result_files = glob.glob('2024 Lyrics, Genre, Description Billboard Results 100_prompt(13).csv')
result_files += glob.glob('2024 Lyrics, Genre, Description Billboard Results 175_prompt(13).csv')

if not result_files:
    print("No result files found. Please check the file pattern and ensure files exist.")
else:
    print(f"Found {len(result_files)} result files.")

# Loop through the sorted CSV files
for csv_file in result_files:
    df = pd.read_csv(csv_file)

    # Extract the file count from the filename
    file_count = os.path.basename(csv_file).split(' ')[-1].split('.')[0]

    # Merge the dataframes on Title and Artist
    merged_df = df.merge(ground_truth_df, on=['Title', 'Artist'], how='inner')
    
    if merged_df.empty:
        print(f"No matching records found for {csv_file}. Skipping.")
        continue
    
    n = len(merged_df)
    total_scores = {
        'zero_shot': {'rouge1_f': 0, 'rouge1_r': 0, 'rouge1_p': 0, 'rougeL_f': 0, 'rougeL_r': 0, 'rougeL_p': 0, 'bert_p': 0, 'bert_r': 0, 'bert_f1': 0},
        'cot': {'rouge1_f': 0, 'rouge1_r': 0, 'rouge1_p': 0, 'rougeL_f': 0, 'rougeL_r': 0, 'rougeL_p': 0, 'bert_p': 0, 'bert_r': 0, 'bert_f1': 0},
        'cot_few_shot': {'rouge1_f': 0, 'rouge1_r': 0, 'rouge1_p': 0, 'rougeL_f': 0, 'rougeL_r': 0, 'rougeL_p': 0, 'bert_p': 0, 'bert_r': 0, 'bert_f1': 0}
    }

    # Iterate through the merged dataframe and calculate scores for each description type
    for _, row in merged_df.iterrows():
        reference = row['description']
        
        for desc_type in ['zero_shot', 'cot', 'cot_few_shot']:
            hypothesis = row[f'{desc_type}_Description']
            
            # Evaluate using ROUGE
            rouge_scores = evaluate_using_rouge(reference, hypothesis)
            total_scores[desc_type]['rouge1_f'] += rouge_scores['rouge-1'].fmeasure
            total_scores[desc_type]['rouge1_r'] += rouge_scores['rouge-1'].recall
            total_scores[desc_type]['rouge1_p'] += rouge_scores['rouge-1'].precision
            total_scores[desc_type]['rougeL_f'] += rouge_scores['rouge-l'].fmeasure
            total_scores[desc_type]['rougeL_r'] += rouge_scores['rouge-l'].recall
            total_scores[desc_type]['rougeL_p'] += rouge_scores['rouge-l'].precision
            
            # Evaluate using BERTScore
            P, R, F1 = evaluate_using_bert_score(reference, hypothesis, scorer)
            total_scores[desc_type]['bert_p'] += P
            total_scores[desc_type]['bert_r'] += R
            total_scores[desc_type]['bert_f1'] += F1

    # Calculate the average scores for this file
    if n > 0:
        avg_scores = {
            desc_type: {
                'avg_rouge1_f': round(total_scores[desc_type]['rouge1_f'] / n, 4),
                'avg_rouge1_r': round(total_scores[desc_type]['rouge1_r'] / n, 4),
                'avg_rouge1_p': round(total_scores[desc_type]['rouge1_p'] / n, 4),
                'avg_rougeL_f': round(total_scores[desc_type]['rougeL_f'] / n, 4),
                'avg_rougeL_r': round(total_scores[desc_type]['rougeL_r'] / n, 4),
                'avg_rougeL_p': round(total_scores[desc_type]['rougeL_p'] / n, 4),
                'avg_bert_p': round(total_scores[desc_type]['bert_p'] / n, 4),
                'avg_bert_r': round(total_scores[desc_type]['bert_r'] / n, 4),
                'avg_bert_f1': round(total_scores[desc_type]['bert_f1'] / n, 4)
            }
            for desc_type in ['zero_shot', 'cot', 'cot_few_shot']
        }
        file_scores.append((file_count, os.path.basename(csv_file), avg_scores))

# Prepare the summary dataframe
summary_data = {
    'File Count': [file_score[0] for file_score in file_scores],
    'File': [file_score[1] for file_score in file_scores]
}

# Calculate the overall average scores
if file_scores:
    overall_avg_scores = {
        desc_type: {
            'avg_rouge1_f': round(sum(file_score[2][desc_type]['avg_rouge1_f'] for file_score in file_scores) / len(file_scores), 4),
            'avg_rouge1_r': round(sum(file_score[2][desc_type]['avg_rouge1_r'] for file_score in file_scores) / len(file_scores), 4),
            'avg_rouge1_p': round(sum(file_score[2][desc_type]['avg_rouge1_p'] for file_score in file_scores) / len(file_scores), 4),
            'avg_rougeL_f': round(sum(file_score[2][desc_type]['avg_rougeL_f'] for file_score in file_scores) / len(file_scores), 4),
            'avg_rougeL_r': round(sum(file_score[2][desc_type]['avg_rougeL_r'] for file_score in file_scores) / len(file_scores), 4),
            'avg_rougeL_p': round(sum(file_score[2][desc_type]['avg_rougeL_p'] for file_score in file_scores) / len(file_scores), 4),
            'avg_bert_p': round(sum(file_score[2][desc_type]['avg_bert_p'] for file_score in file_scores) / len(file_scores), 4),
            'avg_bert_r': round(sum(file_score[2][desc_type]['avg_bert_r'] for file_score in file_scores) / len(file_scores), 4),
            'avg_bert_f1': round(sum(file_score[2][desc_type]['avg_bert_f1'] for file_score in file_scores) / len(file_scores), 4)
        }
        for desc_type in ['zero_shot', 'cot', 'cot_few_shot']
    }

    for desc_type in ['zero_shot', 'cot', 'cot_few_shot']:
        summary_data[f'{desc_type.capitalize()} ROUGE-1 F1'] = [
            round(file_score[2][desc_type]['avg_rouge1_f'], 4) for file_score in file_scores
        ]
        
        summary_data[f'{desc_type.capitalize()} ROUGE-1 Recall'] = [
            round(file_score[2][desc_type]['avg_rouge1_r'], 4) for file_score in file_scores
        ]
        
        summary_data[f'{desc_type.capitalize()} ROUGE-1 Precision'] = [
            round(file_score[2][desc_type]['avg_rouge1_p'], 4) for file_score in file_scores
        ]
        
        summary_data[f'{desc_type.capitalize()} ROUGE-L F1'] = [
            round(file_score[2][desc_type]['avg_rougeL_f'], 4) for file_score in file_scores
        ]
        
        summary_data[f'{desc_type.capitalize()} ROUGE-L Recall'] = [
            round(file_score[2][desc_type]['avg_rougeL_r'], 4) for file_score in file_scores
        ]
        
        summary_data[f'{desc_type.capitalize()} ROUGE-L Precision'] = [
            round(file_score[2][desc_type]['avg_rougeL_p'], 4) for file_score in file_scores
        ]
        
        summary_data[f'{desc_type.capitalize()} BERTScore Precision'] = [
            round(file_score[2][desc_type]['avg_bert_p'], 4) for file_score in file_scores
        ]
        
        summary_data[f'{desc_type.capitalize()} BERTScore Recall'] = [
            round(file_score[2][desc_type]['avg_bert_r'], 4) for file_score in file_scores
        ]
        
        summary_data[f'{desc_type.capitalize()} BERTScore F1'] = [
            round(file_score[2][desc_type]['avg_bert_f1'], 4) for file_score in file_scores
        ]

    # Add the "Total" row for each metric
    summary_data['File Count'].append('Total')
    summary_data['File'].append('Total')
    for desc_type in ['zero_shot', 'cot', 'cot_few_shot']:
        summary_data[f'{desc_type.capitalize()} ROUGE-1 F1'].append(round(overall_avg_scores[desc_type]['avg_rouge1_f'], 4))
        summary_data[f'{desc_type.capitalize()} ROUGE-1 Recall'].append(round(overall_avg_scores[desc_type]['avg_rouge1_r'], 4))
        summary_data[f'{desc_type.capitalize()} ROUGE-1 Precision'].append(round(overall_avg_scores[desc_type]['avg_rouge1_p'], 4))
        summary_data[f'{desc_type.capitalize()} ROUGE-L F1'].append(round(overall_avg_scores[desc_type]['avg_rougeL_f'], 4))
        summary_data[f'{desc_type.capitalize()} ROUGE-L Recall'].append(round(overall_avg_scores[desc_type]['avg_rougeL_r'], 4))
        summary_data[f'{desc_type.capitalize()} ROUGE-L Precision'].append(round(overall_avg_scores[desc_type]['avg_rougeL_p'], 4))
        summary_data[f'{desc_type.capitalize()} BERTScore Precision'].append(round(overall_avg_scores[desc_type]['avg_bert_p'], 4))
        summary_data[f'{desc_type.capitalize()} BERTScore Recall'].append(round(overall_avg_scores[desc_type]['avg_bert_r'], 4))
        summary_data[f'{desc_type.capitalize()} BERTScore F1'].append(round(overall_avg_scores[desc_type]['avg_bert_f1'], 4))

    # Create the summary dataframe
    summary_df = pd.DataFrame(summary_data)

    summary_df.to_csv('Billboard_Description_GPT_4o_experiment_results_2024_prompt(13).csv', index=False)

    print("Summary of results saved to 'Billboard_Description_GPT_4o_experiment_results_2024_prompt(13).csv'")

else:
    print("No data found in the specified files.")