In [1]:
from openai import OpenAI
import os
import pandas as pd
from pprint import pprint
from dotenv import load_dotenv


# Load environment variables from .env file
load_dotenv()

# Initialize OpenAI client with API key from environment variables
api_key = os.getenv("OPENAI_API_KEY")
if api_key is None:
    raise ValueError("OPENAI_API_KEY environment variable not found")

client = OpenAI(api_key=api_key)

In [2]:
def get_completion(prompt, model="gpt-3.5-turbo"): # Andrew mentioned that the prompt/ completion paradigm is preferable for this class
    messages = [{"role": "user", "content": prompt}]
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message


In [10]:
df = pd.read_csv('musiccaps-public.csv')
df.head(10)

Unnamed: 0,ytid,start_s,end_s,audioset_positive_labels,aspect_list,caption,author_id,is_balanced_subset,is_audioset_eval
0,-0Gj8-vB1q4,30,40,"/m/0140xf,/m/02cjck,/m/04rlf","['low quality', 'sustained strings melody', 's...",The low quality recording features a ballad so...,4,False,True
1,-0SdAVK79lg,30,40,"/m/0155w,/m/01lyv,/m/0342h,/m/042v_gx,/m/04rlf...","['guitar song', 'piano backing', 'simple percu...",This song features an electric guitar as the m...,0,False,False
2,-0vPFx-wRRI,30,40,"/m/025_jnm,/m/04rlf","['amateur recording', 'finger snipping', 'male...",a male voice is singing a melody with changing...,6,False,True
3,-0xzrMun0Rs,30,40,"/m/01g90h,/m/04rlf","['backing track', 'jazzy', 'digital drums', 'p...",This song contains digital drums playing a sim...,6,False,True
4,-1LrH01Ei1w,30,40,"/m/02p0sh1,/m/04rlf","['rubab instrument', 'repetitive melody on dif...",This song features a rubber instrument being p...,0,False,False
5,-1OlgJWehn8,30,40,"/m/04rlf,/m/06bz3","['instrumental', 'white noise', 'female vocali...",This clip is three tracks playing consecutivel...,7,False,True
6,-1UWSisR2zo,30,40,"/m/04rlf,/m/0xzly","['live performance', 'poor audio quality', 'am...",A male singer sings this groovy melody. The so...,1,False,True
7,-3Kv4fdm7Uk,30,40,"/m/04rlf,/m/04szw,/m/0l156b","['steeldrum', 'higher register', 'amateur reco...",someone is playing a high pitched melody on a ...,6,False,True
8,-4NLarMj4xU,30,40,"/m/04rlf,/t/dd00034","['pop', 'tinny wide hi hats', 'mellow piano me...",The Pop song features a soft female vocal sing...,4,False,False
9,-4SYC2YgzL8,30,40,"/m/04rlf,/m/04wptg,/m/0ggq0m",['solo live direct input acoustic guitar strum...,low fidelity audio from a live performance fea...,8,False,True


In [32]:
print(df['caption'][2])
print(df['caption'][3])

a male voice is singing a melody with changing tempos while snipping his fingers rhythmically. The recording sounds like it has been recorded in an empty room. This song may be playing, practicing snipping and singing along.
This song contains digital drums playing a simple groove along with two guitars. One strumming chords along with the snare the other one playing a melody on top. An e-bass is playing the footnote while a piano is playing a major and minor chord progression. A trumpet is playing a loud melody alongside the guitar. All the instruments sound flat and are being played by a keyboard. There are little bongo hits in the background panned to the left side of the speakers. Apart from the music you can hear eating sounds and a stomach rumbling. This song may be playing for an advertisement.


In [42]:
expert_0 = df['caption'][0] # Christian
expert_1 = df['caption'][15] # Electronic music
expert_2 = df['caption'][19] # Gospel
expert_3 = df['caption'][30] # rock
expert_4 = df['caption'][41] # classical
novice_0 = "A melancholic piano song with a female singer that would be played at church"
novice_1 = "R&B, male singer, string, strong bass, drums, suited for an intimate setting"
novice_2 = "Gospel music for children, bass and drums, spiritual feeling"
novice_3 = "Rock music with guitar and drums, with angry and aggressive vocals"
novice_4 = "Calming classical music similar to Bach with harp"
def generate_prompt(expert_0, expert_1, expert_2, expert_3, expert_4, novice_0, novice_1, novice_2, novice_3, novice_4, caption):
    prompt = f"""
        
        ---
        Given these examples below:
        <expert>: {expert_0}
        <novice>: {novice_0}

        <expert>: {expert_1}
        <novice>: {novice_1}

        <expert>: {expert_2}
        <novice>: {novice_2}
        
        <expert>: {expert_3}
        <novice>: {novice_3}

        <expert>: {expert_4}
        <novice>: {novice_4}
        ---

        Transform the given input expert-level prompt into a prompt that a user with little music experience would use to prompt music generation models. 

        Keep the instruments, genres, mood, and other information that represents the essence of the music.
    
        Write the output succinctly in a coherent sentence.

        <expert>: {caption}
        <novice>:
        """
    return prompt

# In-context todo
- Add examples for each genre
- Filter out balanced subset as validation
- Optimize the system prompt

In [None]:
balanced_set = df[df['is_balanced_subset'] == True]
balanced_set

In [43]:
novice_prompts = []

for caption in df['caption'][2:4]:
    prompt = generate_prompt(expert_0, expert_1, expert_2, expert_3, expert_4, novice_0, novice_1, novice_2, novice_3, novice_4, caption)
    response = get_completion(prompt)
    novice_prompts.append(response)

pprint(novice_prompts)
pprint([msg.content for msg in novice_prompts])


[ChatCompletionMessage(content='Male singer with changing tempos, snapping fingers rhythmically, recorded in an empty room, suited for practicing singing along.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None),
 ChatCompletionMessage(content='A catchy tune with guitars, piano, trumpet, and digital drums, suitable for an advertisement.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None)]
['Male singer with changing tempos, snapping fingers rhythmically, recorded in '
 'an empty room, suited for practicing singing along.',
 'A catchy tune with guitars, piano, trumpet, and digital drums, suitable for '
 'an advertisement.']


In [39]:
print([msg.content for msg in novice_prompts])

['Male singer with changing tempos, snapping fingers rhythmically, recorded in an empty room, casual and relaxed vibe', 'Upbeat digital music with drums, guitars, bass, piano, trumpet, and bongos. Sounds like it could be used in an ad.']
