## **Function Calling LLMs - Team Project**

In [47]:
import openai
import json
import os

import pandas as pd
from dotenv import load_dotenv


load_dotenv()
openai.api_key = os.environ.get("API_KEY")

## Data

##### Dataset 1: Most Popular Albums on Spotify

Source: https://www.kaggle.com/datasets/tobennao/rym-top-5000/

In [48]:
album_columns_to_keep = ["release_name",    # Name of the album 
                         "artist_name",     # Name of the artist/band/group
                         "release_date",    # Date the album was released
                         "primary_genres",  # Primary genre classifications
                         "secondary_genres",# Secondary genre classifications
                         "descriptors",     # Album tags
                         "avg_rating",      # Average rating, on a scale of 0-5
                         "rating_count",    # The number of ratings
                         "review_count"     # The number of reviews
                        ]

ALBUMS = pd.read_csv("./data/popular_albums.csv", usecols=album_columns_to_keep)

In [49]:
print(f"Attributes: {ALBUMS.columns.to_list()}")
print(f"Number of attributes (columns): {ALBUMS.shape[1]}")
print(f"Number of examples (rows): {ALBUMS.shape[0]}")

ALBUMS.head(n=10)

Attributes: ['release_name', 'artist_name', 'release_date', 'primary_genres', 'secondary_genres', 'descriptors', 'avg_rating', 'rating_count', 'review_count']
Number of attributes (columns): 9
Number of examples (rows): 5000


Unnamed: 0,release_name,artist_name,release_date,primary_genres,secondary_genres,descriptors,avg_rating,rating_count,review_count
0,OK Computer,Radiohead,1997-06-16,"Alternative Rock, Art Rock",,"melancholic, anxious, futuristic, malevocals, ...",4.24,74027,1541
1,Kid A,Radiohead,2000-10-03,"Art Rock, Experimental Rock, Electronic","Ambient, Electronic, IDM","cold, melancholic, futuristic, anxious, atmosp...",4.23,61658,751
2,The Dark Side of the Moon,Pink Floyd,1973-03-23,"Art Rock, Progressive Rock","Psychedelic Rock, Space Rock","philosophical, atmospheric, introspective, exi...",4.21,60192,1557
3,Loveless,My Bloody Valentine,1991-11-11,"Shoegaze, Noise Pop","Dream Pop, Neo-Psychedelia","noisy, ethereal, atmospheric, romantic, love, ...",4.24,53174,1264
4,My Beautiful Dark Twisted Fantasy,Kanye West,2010-11-22,"Pop Rap, Hip Hop",Art Pop,"epic, boastful, passionate, sampling, hedonist...",4.09,52149,638
5,In Rainbows,Radiohead,2007-10-10,"Art Rock, Alternative Rock","Electronic, Dream Pop, Art Pop","lush, malevocals, introspective, melancholic, ...",4.2,51335,752
6,Wish You Were Here,Pink Floyd,1975-09-12,"Progressive Rock, Art Rock","Space Rock, Psychedelic Rock","melancholic, atmospheric, progressive, malevoc...",4.3,51246,1006
7,In the Aeroplane Over the Sea,Neutral Milk Hotel,1998-02-10,"Indie Folk, Indie Rock","Psychedelic Folk, Singer-Songwriter, Lo-Fi/Sla...","passionate, poetic, death, cryptic, surreal, b...",4.09,50353,983
8,The Bends,Radiohead,1995-03-13,Alternative Rock,Britpop,"melancholic, lonely, melodic, anxious, introsp...",3.84,48578,798
9,To Pimp a Butterfly,Kendrick Lamar,2015-03-15,"Conscious Hip Hop, West Coast Hip Hop, Jazz Rap","Political Hip Hop, Neo-Soul, Funk, Poetry, Exp...","political, conscious, poetic, conceptalbum, pr...",4.3,47821,415


##### Dataset 2: Most Streamed Tracks on Spotify

Source: https://www.kaggle.com/datasets/nelgiriyewithana/top-spotify-songs-2023

In [50]:
song_columns_to_keep = ['track_name',           # Name of the song
                        'artist(s)_name',       # Name of the artist(s) of the song
                        'artist_count',         # Number of artists contributing to the song
                        'released_year',        # Year when the song was released
                        'released_month',       # Month when the song was released
                        'released_day',         # Day of the month when the song was released
                        'in_spotify_playlists', # Number of Spotify playlists the song is included in
                        'in_spotify_charts',    # Presence and rank of the song on Spotify charts
                        'streams',              # Total number of streams on Spotify
                        'in_apple_playlists',   # Number of Apple Music playlists the song is included in
                        'in_apple_charts',      # Presence and rank of the song on Apple Music charts
                        'in_deezer_playlists',  # Number of Deezer playlists the song is included in
                        'in_deezer_charts',     # Presence and rank of the song on Deezer charts
                        'in_shazam_charts',     # Presence and rank of the song on Shazam charts
                        'bpm',                  # Beats per minute, a measure of song tempo
                        'key',                  # Key of the song
                        'mode',                 # Mode of the song (major or minor)
                        'danceability_%',       # Percentage indicating how suitable the song is for dancing
                        'valence_%',            # Positivity of the song's musical content
                        'energy_%',             # Perceived energy level of the song
                        'acousticness_%',       # Amount of acoustic sound in the song
                        'instrumentalness_%',   # Amount of instrumental content in the song
                        'liveness_%',           # Presence of live performance elements
                        'speechiness_%'         # Amount of spoken words in the song
                        ]

SONGS = pd.read_csv("./data/tracks.csv", encoding_errors="ignore")

In [51]:
print(f"Attributes: {SONGS.columns.to_list()}")
print(f"Number of attributes (columns): {SONGS.shape[1]}")
print(f"Number of examples (rows): {SONGS.shape[0]}")

SONGS.head(n=10)

Attributes: ['track_name', 'artist(s)_name', 'artist_count', 'released_year', 'released_month', 'released_day', 'in_spotify_playlists', 'in_spotify_charts', 'streams', 'in_apple_playlists', 'in_apple_charts', 'in_deezer_playlists', 'in_deezer_charts', 'in_shazam_charts', 'bpm', 'key', 'mode', 'danceability_%', 'valence_%', 'energy_%', 'acousticness_%', 'instrumentalness_%', 'liveness_%', 'speechiness_%']
Number of attributes (columns): 24
Number of examples (rows): 953


Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,125,B,Major,80,89,83,31,0,8,4
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,92,C#,Major,71,61,74,7,0,10,4
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,138,F,Major,51,32,53,17,0,31,6
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,170,A,Major,55,58,72,11,0,11,15
4,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,144,A,Minor,65,23,80,14,63,11,6
5,Sprinter,"Dave, Central Cee",2,2023,6,1,2186,91,183706234,67,...,141,C#,Major,92,66,58,19,0,8,24
6,Ella Baila Sola,"Eslabon Armado, Peso Pluma",2,2023,3,16,3090,50,725980112,34,...,148,F,Minor,67,83,76,48,0,8,3
7,Columbia,Quevedo,1,2023,7,7,714,43,58149378,25,...,100,F,Major,67,26,71,37,0,11,4
8,fukumean,Gunna,1,2023,5,15,1096,83,95217315,60,...,130,C#,Minor,85,22,62,12,0,28,9
9,La Bebe - Remix,"Peso Pluma, Yng Lvcas",2,2023,3,17,2953,44,553634067,49,...,170,D,Minor,81,56,48,21,0,8,33


## Functions

In [63]:
from pprint import pprint
from helper import Registry, to_json

register = Registry()

#### Albums

In [64]:
@register.register
@to_json
def top_rated_albums(n=10):
    """
    Returns the top-rated albums based on average rating.
    
    Parameters:
        n (int): The number of albums to return. Default is 10.
    
    Returns:
        list[dict]: A list of dictionaries representing the top-rated albums.
    """
    top_rated = ALBUMS.sort_values(by='avg_rating', ascending=False).head(n)
    return top_rated.to_dict(orient='records')

@register.register
@to_json
def most_reviewed_albums(n=10):
    """
    Returns the most reviewed albums.
    
    Parameters:
        n (int): The number of albums to return. Default is 10.
    
    Returns:
        list[dict]: A list of dictionaries representing the most reviewed albums.
    """
    most_reviewed = ALBUMS.sort_values(by='review_count', ascending=False).head(n)
    return most_reviewed.to_dict(orient='records')
    
@register.register
@to_json
def albums_by_artist(artist_name):
    """
    Returns all albums by a given artist.
    
    Parameters:
        artist_name (str): The name of the artist.
    
    Returns:
        list[dict]: A list of dictionaries representing the albums by the given artist.
    """
    albums = ALBUMS[ALBUMS['artist_name'] == artist_name]
    return albums.to_dict(orient='records')

#### Songs

In [61]:
# @register.register
@to_json
def top_streamed_songs(n=10):
    """
    Returns the top-streamed songs.
    
    Parameters:
        n (int): The number of songs to return. Default is 10.
    
    Returns:
        list[dict]: A list of dictionaries representing the top-streamed songs.
    """
    top_songs = SONGS.sort_values(by='streams', ascending=False).head(n)
    return top_songs.to_dict(orient='records')

# @register.register
@to_json
def songs_in_spotify_playlists(n=10):
    """
    Returns the top songs featured in the most Spotify playlists.
    
    Parameters:
        n (int): The number of songs to return. Default is 10.
    
    Returns:
        list[dict]: A list of dictionaries representing the songs featured in the most Spotify playlists.
    """
    top_playlist_songs = SONGS.sort_values(by='in_spotify_playlists', ascending=False).head(n)
    return top_playlist_songs.to_dict(orient='records')

# @register.register
@to_json
def songs_by_artist(artist_name):
    """
    Returns all songs by a given artist.
    
    Parameters:
        artist_name (str): The name of the artist.
    
    Returns:
        list[dict]: A list of dictionaries representing the songs by the given artist.
    """
    songs = SONGS[SONGS['artist(s)_name'] == artist_name]
    return songs.to_dict(orient='records')

In [65]:
FUNCTIONS = [
    {
        "name": "top_rated_albums",
        "description": "Retrieve the top-rated albums based on average rating.",
        "parameters": {
            "type": "object",
            "properties": {
                "n": {
                    "type": "number",
                    "description": "The number of albums to return. Default is 10."
                }
            },
            "required": []
        }
    },
    {
        "name": "most_reviewed_albums",
        "description": "Retrieve the most reviewed albums.",
        "parameters": {
            "type": "object",
            "properties": {
                "n": {
                    "type": "number",
                    "description": "The number of albums to return. Default is 10."
                }
            },
            "required": []
        }
    },
    {
        "name": "albums_by_artist",
        "description": "Retrieve all albums by a given artist.",
        "parameters": {
            "type": "object",
            "properties": {
                "artist_name": {
                    "type": "string",
                    "description": "The name of the artist."
                }
            },
            "required": ["artist_name"]
        }
    },
    #     {
    #     "name": "top_streamed_songs",
    #     "description": "Retrieve the top-streamed songs.",
    #     "parameters": {
    #         "type": "object",
    #         "properties": {
    #             "n": {
    #                 "type": "number",
    #                 "description": "The number of songs to return. Default is 10."
    #             }
    #         },
    #         "required": []
    #     }
    # },
    # {
    #     "name": "songs_in_spotify_playlists",
    #     "description": "Retrieve the top songs featured in the most Spotify playlists.",
    #     "parameters": {
    #         "type": "object",
    #         "properties": {
    #             "n": {
    #                 "type": "number",
    #                 "description": "The number of songs to return. Default is 10."
    #             }
    #         },
    #         "required": []
    #     }
    # },
    # {
    #     "name": "songs_by_artist",
    #     "description": "Retrieve all songs by a given artist.",
    #     "parameters": {
    #         "type": "object",
    #         "properties": {
    #             "artist_name": {
    #                 "type": "string",
    #                 "description": "The name of the artist."
    #             }
    #         },
    #         "required": ["artist_name"]
    #     }
    # }
]

pprint(register.registered_functions) # all active functions

{'albums_by_artist': <function albums_by_artist at 0x00000295BDE358A0>,
 'most_reviewed_albums': <function most_reviewed_albums at 0x00000295BEAA8400>,
 'top_rated_albums': <function top_rated_albums at 0x00000295BEAA84A0>}


#### API

In [18]:
# Flask API (later)

## LLM

#### Settings

In [67]:
from enum import Enum
from typing import Union

class Role(Enum):
    ASSISTANT = "assistant"
    FUNCTION = "function"
    SYSTEM = "system"
    USER = "user"

class Model_Version(Enum):
    GPT3 = "gpt-3.5-turbo-0613"
    GPT4 = "gpt-4-0613"

CONTEXT = {"role": Role.SYSTEM.value, "content": "Answer briefly."}

MAX_ITER = 5

MODEL_VERSION = Model_Version.GPT3.value

In [68]:
def _add_message_to_context(role:Role, content:str, messages:list, function_call:dict=None) -> None:
    """Add a new message to messages. This extends the LLM context."""
    if role == Role.ASSISTANT:
        d = {"role": Role.ASSISTANT.value, "content": content}
        if function_call is not None: d |= {"function_call": function_call}
        messages.append(d)

    if role == Role.USER:
        messages.append({"role": Role.USER.value, "content": content})
    
    if role == Role.FUNCTION:
        messages.append({"role": Role.FUNCTION.value, "name": function_call["name"], "content": content})
        

def send_message(role:Union[Role.USER, Role.FUNCTION], content:str, messages:list, function_call:dict=None) -> dict:
    """Send and receive a message to the LLM. Add request and response message to messages."""
    _add_message_to_context(role, content, messages, function_call)
    
    response = openai.ChatCompletion.create(
        model=MODEL_VERSION,
        messages=messages,
        function_call="auto",
        functions=FUNCTIONS,
    )
    
    response = response["choices"][0]["message"]
    response_message = response["content"]
    
    is_function_call = response.get("function_call")
    if is_function_call:
        _add_message_to_context(Role.ASSISTANT, response_message, messages, is_function_call.to_dict())
    
    return response


def handle_function(function:dict) -> json:
    """Invoke function and return result"""
    function_name, function_args = function["name"], json.loads(function["arguments"])
    
    if function_name in register.registered_functions:
        function_to_call = register[function_name]
        result = function_to_call(**function_args)
        return result


def run(user_question:str, verbose=False):    
    """Ask the LLM questions and let it run functions!"""
    messages = [CONTEXT]
    
    # Step 1: send the conversation (context) and available functions to GPT
    response = send_message(Role.USER, user_question, messages)
    if verbose: print(response)
    
    current_iteration = 1
    while (current_iteration <= MAX_ITER):
        # Step 2: check if GPT wanted to call a function 
        function_call = response.get("function_call")
        if not function_call: break
        if verbose: print(function_call)
        
        # Step 3: call the function
        function_result = handle_function(function_call)
        
        if verbose: print(function_result)
        
        # Step 4: send the info on the function call and function response to GPT
        response = send_message(Role.FUNCTION, function_result, messages, function_call)
        
        if verbose: print(response)
        
    if verbose: print(messages)
    
    return response["content"]

##### Run the LLM

In [70]:
response = run("What is the most popular album from Red Hot Chilli Peppers?", verbose=False)
pprint(response)

('The most popular album from Red Hot Chili Peppers is "Californication". It '
 'was released on June 8, 1999, and has an average rating of 3.48 based on '
 '16,401 ratings and 286 reviews.')


## Benchmark

In [58]:
obj = outer[0]

{'name': 'get_current_weather',
 'arguments': '{\n  "location": "San Francisco, CA"\n}'}