In [2]:
from openai import OpenAI
import os
import pandas as pd
from pprint import pprint
from dotenv import load_dotenv


# Load environment variables from .env file
load_dotenv()

# Initialize OpenAI client with API key from environment variables
api_key = os.getenv("OPENAI_API_KEY")
if api_key is None:
    raise ValueError("OPENAI_API_KEY environment variable not found")

client = OpenAI(api_key=api_key)

In [3]:
def get_completion(prompt, model="gpt-3.5-turbo"): # Andrew mentioned that the prompt/ completion paradigm is preferable for this class
    messages = [{"role": "user", "content": prompt}]
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message


In [4]:
df = pd.read_csv('musiccaps-public.csv')
df.head(10)

Unnamed: 0,ytid,start_s,end_s,audioset_positive_labels,aspect_list,caption,author_id,is_balanced_subset,is_audioset_eval
0,-0Gj8-vB1q4,30,40,"/m/0140xf,/m/02cjck,/m/04rlf","['low quality', 'sustained strings melody', 's...",The low quality recording features a ballad so...,4,False,True
1,-0SdAVK79lg,30,40,"/m/0155w,/m/01lyv,/m/0342h,/m/042v_gx,/m/04rlf...","['guitar song', 'piano backing', 'simple percu...",This song features an electric guitar as the m...,0,False,False
2,-0vPFx-wRRI,30,40,"/m/025_jnm,/m/04rlf","['amateur recording', 'finger snipping', 'male...",a male voice is singing a melody with changing...,6,False,True
3,-0xzrMun0Rs,30,40,"/m/01g90h,/m/04rlf","['backing track', 'jazzy', 'digital drums', 'p...",This song contains digital drums playing a sim...,6,False,True
4,-1LrH01Ei1w,30,40,"/m/02p0sh1,/m/04rlf","['rubab instrument', 'repetitive melody on dif...",This song features a rubber instrument being p...,0,False,False
5,-1OlgJWehn8,30,40,"/m/04rlf,/m/06bz3","['instrumental', 'white noise', 'female vocali...",This clip is three tracks playing consecutivel...,7,False,True
6,-1UWSisR2zo,30,40,"/m/04rlf,/m/0xzly","['live performance', 'poor audio quality', 'am...",A male singer sings this groovy melody. The so...,1,False,True
7,-3Kv4fdm7Uk,30,40,"/m/04rlf,/m/04szw,/m/0l156b","['steeldrum', 'higher register', 'amateur reco...",someone is playing a high pitched melody on a ...,6,False,True
8,-4NLarMj4xU,30,40,"/m/04rlf,/t/dd00034","['pop', 'tinny wide hi hats', 'mellow piano me...",The Pop song features a soft female vocal sing...,4,False,False
9,-4SYC2YgzL8,30,40,"/m/04rlf,/m/04wptg,/m/0ggq0m",['solo live direct input acoustic guitar strum...,low fidelity audio from a live performance fea...,8,False,True


In [20]:
print(df['aspect_list'][103])
print(df['caption'][971])

['instrumental', 'medium tempo', 'electric guitar lead', 'ambient', 'steady drumming', 'groovy bass line', 'trumpets', 'melodic', 'pleasant', 'funky', 'groovy', 'soft rock', 'pop rock', 'funk rock', 'youthful', 'atmospheric', 'brass band', 'soul', 'neo soul', 'soothing', 'rhythmic acoustic guitar']
This Latin pop song features a male voice singing the main melody. The voice sings at a high pitch. This is accompanied by Latin style percussion. A variety of percussion is used including the claves and cowbell. The bass plays a groovy bassline. The piano plays chords with a Latin feel. The brass section plays fills in between lines. This song has a romantic mood. This song can be played in a party for a slow dance.


In [21]:
expert_0 = df['caption'][0] # Christian
expert_1 = df['caption'][15] # Electronic music
expert_2 = df['caption'][19] # africa
expert_3 = df['caption'][30] # rock
expert_4 = df['caption'][41] # classical
expert_5 = df['caption'][103] # funk
expert_6 = df['caption'][114] # reggae
expert_7 = df['caption'][91] # pop
expert_8 = df['caption'][82] # folk
expert_9 = df['caption'][124] # soul
expert_10 = df['caption'][13] # middle eastern
expert_11 = df['caption'][37] # R&B
expert_12 = df['caption'][139] # independent
expert_13 = df['caption'][202] # traditional (Indian)
expert_14 = df['caption'][997] # ska (Jamacian dance)
expert_15 = df['caption'][1342] # music of asia
expert_16 = df['caption'][5] # vocal
expert_17 = df['caption'][436] # new age
expert_18 = df['caption'][441] # hip hop
expert_19 = df['caption'][455] # country
expert_20 = df['caption'][966] # jazz
expert_21 = df['caption'][971] # latin american
expert_22 = df['caption'][588] # blues
expert_23 = df['caption'][614] # music for children



novice_0 = "A melancholic piano song with a female singer that would be played at church"
novice_1 = "R&B, male singer, string, strong bass, drums, suited for an intimate setting"
novice_2 = "Gospel music for children, bass and drums, spiritual feeling"
novice_3 = "Rock music with guitar and drums, with angry and aggressive vocals"
novice_4 = "Calming classical music similar to Bach with harp"
novice_5 = "instrumental piece with rhythmic guitar lead, relaxing funk"
novice_6 = "groovy reggae piece with male singer"
novice_7 = "joyful pop song with passionate male vocal with shiny drum set sounds and wooden percussion"
novice_8 = "joyous folk song with free-flowing flute and string melody, as well as wooden percussion"
novice_9 = "soul, romantic love song with lush saxophone, drum and piano accompaniment"
novice_10 = "a middle eastern folk song with oud, tambourine, and darbuka. A Moroccan market setting."
novice_11 = "danceable R&B piece with male vocal with '4 on the floor' pattern"
novice_12 = "indie song, female vocal, synth bass, industrial sound, medium fast"
novice_13 = "an energetic piece for the Bhangra dance, brass sound"
novice_14 = "a funny ska song with wah-wah effect on the guitar, brass melody"
novice_15 = "Chinese instrumental music with dizi and guqin, hopeful atmosphere"
novice_16 = "lively female vocal lead with male backup singers"
novice_17 = "meditative new age song with Indian tabla, melesmetic bass, and angel-like singing"
novice_18 = "addictive and groovy hip hop with rapper, repeating piano line and digitally processed female voice"
novice_19 = "nostalgic euro pop with walking bass"
novice_20 = "classic New Orleans jazz, vintage swing feel, jazz orchestra with charming female talking intro"
novice_21 = "party Latin dance music, male singer, I want to hear clear clave and cowbell sounds, lyrics depicting love"
novice_22 = "slow blues piece with guitar solos and bass guitar played live"
novice_23 = "light-hearted children music played from a toy, girl giggles"

In [24]:
experts = [
    expert_0, expert_1, expert_2, expert_3, expert_4, expert_5,
    expert_6, expert_7, expert_8, expert_9, expert_10, expert_11,
    expert_12, expert_13, expert_14, expert_15, expert_16, expert_17,
    expert_18, expert_19, expert_20, expert_21, expert_22, expert_23
]

novices = [
    novice_0, novice_1, novice_2, novice_3, novice_4, novice_5,
    novice_6, novice_7, novice_8, novice_9, novice_10, novice_11,
    novice_12, novice_13, novice_14, novice_15, novice_16, novice_17,
    novice_18, novice_19, novice_20, novice_21, novice_22, novice_23
]

In [26]:

def generate_prompt(expert_0, expert_1, expert_2, expert_3, expert_4, novice_0, novice_1, novice_2, novice_3, novice_4, caption):
    prompt = f"""
        
        ---
        Given these examples below:
        <expert>: {expert_0}
        <novice>: {novice_0}

        <expert>: {expert_1}
        <novice>: {novice_1}

        <expert>: {expert_2}
        <novice>: {novice_2}
        
        <expert>: {expert_3}
        <novice>: {novice_3}

        <expert>: {expert_4}
        <novice>: {novice_4}
        ---

        Transform the given input expert-level prompt into a prompt that a user with little music experience would use to prompt music generation models. 

        Keep the instruments, genres, mood, and other information that represents the essence of the music.
    
        Write the output succinctly in a coherent sentence.

        <expert>: {caption}
        <novice>:
        """
    return prompt

In [28]:
def generate_prompt(experts, novices, caption):
    """
    Generate a prompt by iterating through expert-novice pairs.
    
    Parameters:
    - experts: List of expert-level prompts (should contain 24 items).
    - novices: List of novice-level prompts (should contain 24 items).
    - caption: The expert-level prompt to be transformed.

    Returns:
    - A formatted prompt string.
    """
    
    prompt = "\n---\nGiven these examples below:\n"
    
    # Loop through the expert-novice pairs
    for i in range(24):
        prompt += f"<expert>: {experts[i]}\n<novice>: {novices[i]}\n\n"
    
    # Add the transformation instructions and the target expert prompt
    prompt += f"""---
    
Transform the given input expert-level prompt into a prompt that a user with little music experience would use to prompt music generation models.

Keep the instruments, genres, mood, and other information that represents the essence of the music.

Write the output succinctly in a coherent sentence.

<expert>: {caption}
<novice>:"""
    
    return prompt


# In-context todo
- Add examples for each genre
- Filter out balanced subset as validation
- Optimize the system prompt

In [None]:
balanced_set = df[df['is_balanced_subset'] == True]
balanced_set

In [29]:
novice_prompts = []

for caption in df['caption'][2:4]:
    # prompt = generate_prompt(expert_0, expert_1, expert_2, expert_3, expert_4, novice_0, novice_1, novice_2, novice_3, novice_4, caption)
    prompt = generate_prompt(experts, novices, caption)   
    response = get_completion(prompt)
    novice_prompts.append(response)

pprint(novice_prompts)
pprint([msg.content for msg in novice_prompts])


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [39]:
print([msg.content for msg in novice_prompts])

['Male singer with changing tempos, snapping fingers rhythmically, recorded in an empty room, casual and relaxed vibe', 'Upbeat digital music with drums, guitars, bass, piano, trumpet, and bongos. Sounds like it could be used in an ad.']
