## **Function Calling LLMs - Team Project**

In [44]:
import openai
import json
import os

import pandas as pd
from pprint import pprint
from dotenv import load_dotenv


load_dotenv()
openai.api_key = os.environ.get("API_KEY")

## Data

##### Dataset 1: Most Popular Albums on Spotify

Source: https://www.kaggle.com/datasets/tobennao/rym-top-5000/

In [45]:
album_columns_to_keep = ["release_name",    # Name of the album 
                            "artist_name",     # Name of the artist/band/group
                            "release_date",    # Date the album was released
                            "primary_genres",  # Primary genre classifications
                            "secondary_genres",# Secondary genre classifications
                            "descriptors",     # Album tags
                            "avg_rating",      # Average rating, on a scale of 0-5
                            "rating_count",    # The number of ratings
                            "review_count"     # The number of reviews
                        ]

albums_popular = pd.read_csv("./data/popular_albums.csv", usecols=album_columns_to_keep)


# one attribute for release_date
def combine_and_deduplicate_genres(primary, secondary):
    combined_genres = set()

    if primary and not isinstance(primary, float):
        combined_genres.update(primary.split(', '))

    if secondary and not isinstance(secondary, float):
        combined_genres.update(secondary.split(', '))

    return ', '.join(combined_genres)


albums_popular['genres'] = albums_popular.apply(lambda row: combine_and_deduplicate_genres(
    row['primary_genres'], row['secondary_genres']), axis=1)

albums_popular.drop(['primary_genres', 'secondary_genres'], axis=1, inplace=True)


# reorder:
new_order = ["release_name", "artist_name", "release_date", "genres",
             "descriptors", "avg_rating", "rating_count", "review_count"]
albums_popular = albums_popular[new_order]

In [46]:
album_columns_to_keep = ["release_name",    # Name of the album
                         "artist_name",     # Name of the artist/band/group
                         "release_date",    # Date the album was released
                         "genres",          # Genre
                         "descriptors",     # Album tags
                         "avg_rating",      # Average rating, on a scale of 0-5
                         "rating_count",    # The number of ratings
                         "review_count"     # The number of reviews
                         ]

albums_long_tail = pd.read_csv(
    "./data/albums_long_tail.csv", usecols=album_columns_to_keep)

In [47]:
ALBUMS = pd.concat([albums_popular[:500], albums_long_tail], ignore_index=True)

print(f"Attributes: {ALBUMS.columns.to_list()}")
print(f"Number of attributes (columns): {ALBUMS.shape[1]}")
print(f"Number of examples (rows): {ALBUMS.shape[0]}")

ALBUMS[-10:].head(n=10)

Attributes: ['release_name', 'artist_name', 'release_date', 'genres', 'descriptors', 'avg_rating', 'rating_count', 'review_count']
Number of attributes (columns): 8
Number of examples (rows): 619


Unnamed: 0,release_name,artist_name,release_date,genres,descriptors,avg_rating,rating_count,review_count
609,Dismantled Into Juice,Blawan,2023-05-17,"UK Bass, Wonky Techno, Deconstructed Club, Wonky","mechanical, rhythmic, hypnotic, dissonant, raw...",3.34,527,2.0
610,But Here We Are,Foo Fighters,2023-06-02,"Alternative Rock, Power Pop, Post-Grunge, Shoe...","death, male vocalist, melodic, anthemic, bitte...",3.56,5118,76.0
611,Metro Boomin Presents Spider-Man: Across the S...,Metro Boomin,2023-06-02,"Film Soundtrack, Pop Rap, Trap, Afrobeats, Con...","male vocalist, female vocalist, optimistic, tr...",2.96,2295,19.0
612,Formal Growth in the Desert,Protomartyr,2023-06-02,"Post-Punk, Art Punk, Gothic Rock, Noise Rock","male vocalist, dark, rhythmic, atmospheric, de...",3.45,2785,29.0
613,Bunny,Beach Fossils,2023-06-02,"Jangle Pop, Indie Pop, Dream Pop, Indie Surf, ...","male vocalist, mellow, calm, soft, ethereal, w...",3.29,929,9.0
614,Everyone's Crushed,Water From Your Eyes,2023-05-26,"Experimental Rock, Art Pop, Neo-Psychedelia, P...","apathetic, urban, dissonant, noisy, energetic,...",3.32,1149,15.0
615,Aperture,Hannah Jadagu,2023-05-19,"Indie Pop, Dream Pop, Bedroom Pop","bittersweet, melancholic, sentimental, energet...",3.44,293,4.0
616,More Photographs (A Continuum),Kevin Morby,2023-05-26,"Folk Rock, Singer-Songwriter, Indie Folk, Amer...","lonely, sentimental, melodic, male vocalist, w...",3.13,143,2.0
617,Perfume,NCT DOJAEJUNG,2023-04-17,"Contemporary R&B, K-Pop, Dance-Pop, Future Bas...","sensual, male vocalist, melodic, rhythmic, rom...",3.44,360,3.0
618,AESTHETIC,tripleS / +(KR)ystal Eyes,2023-05-04,"K-Pop, Dance-Pop, Contemporary R&B, New Jack S...","female vocalist, warm, rhythmic, lush, melodic...",3.5,752,5.0


##### Dataset 2: Most Streamed Tracks on Spotify

Source: https://www.kaggle.com/datasets/nelgiriyewithana/top-spotify-songs-2023

In [48]:
song_columns_to_keep = ['track_name',           # Name of the song
                        'artist(s)_name',       # Name of the artist(s) of the song
                        'artist_count',         # Number of artists contributing to the song
                        'released_year',        # Year when the song was released
                        'released_month',       # Month when the song was released
                        'released_day',         # Day of the month when the song was released
                        'streams',              # Total number of streams on Spotify
                        'bpm',                  # Beats per minute, a measure of song tempo
                        'key',                  # Key of the song
                        'mode',                 # Mode of the song (major or minor)
                        'danceability_%',       # Percentage indicating how suitable the song is for dancing
                        'valence_%',            # Positivity of the song's musical content
                        'energy_%',             # Perceived energy level of the song
                        'acousticness_%',       # Amount of acoustic sound in the song
                        'instrumentalness_%',   # Amount of instrumental content in the song
                        'liveness_%',           # Presence of live performance elements
                        'speechiness_%'         # Amount of spoken words in the song
                        ]


songs_short_tail = pd.read_csv("./data/tracks.csv", encoding_errors="ignore")

# one attribute for release_date
songs_short_tail['release_date'] = pd.to_datetime(songs_short_tail['released_year'].astype(str) + '-' +
                                       songs_short_tail['released_month'].astype(str) + '-' +
                                       songs_short_tail['released_day'].astype(str))

songs_short_tail.drop(['released_year', 'released_month',
        'released_day'], axis=1, inplace=True)


songs_short_tail['release_date'] = songs_short_tail['release_date'].apply(
    lambda x: str(x).replace(" 00:00:00", "") if "00:00:00" in str(x) else x)

# reorder:
new_order = ['track_name', 'artist(s)_name', 'artist_count', 'release_date', 'streams', 'bpm', 'key', 'mode', 
             'danceability_%', 'valence_%', 'energy_%', 'acousticness_%', 'instrumentalness_%', 'liveness_%', 'speechiness_%']
songs_short_tail = songs_short_tail[new_order]

In [49]:
songs_long_tail = pd.read_csv("./data/tracks_long_tail.csv", encoding_errors="ignore")

In [50]:
SONGS = pd.concat([songs_short_tail[:500], songs_long_tail], ignore_index=True)

print(f"Attributes: {SONGS.columns.to_list()}")
print(f"Number of attributes (columns): {SONGS.shape[1]}")
print(f"Number of examples (rows): {SONGS.shape[0]}")

SONGS[495:].head(n=10)

Attributes: ['track_name', 'artist(s)_name', 'artist_count', 'release_date', 'streams', 'bpm', 'key', 'mode', 'danceability_%', 'valence_%', 'energy_%', 'acousticness_%', 'instrumentalness_%', 'liveness_%', 'speechiness_%', 'album', 'explicit', 'popularity', 'duration_in_min']
Number of attributes (columns): 19
Number of examples (rows): 960


Unnamed: 0,track_name,artist(s)_name,artist_count,release_date,streams,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%,album,explicit,popularity,duration_in_min
495,Run Rudolph Run - Single Version,Chuck Berry,1,1958-01-01,245350949.0,152,G,Minor,69,94,71,79,0,7,8,,,,
496,Jingle Bells - Remastered 1999,Frank Sinatra,1,1957-01-01,178660459.0,175,G#,Major,51,94,34,73,0,10,5,,,,
497,Far,SZA,1,2022-12-09,51641685.0,116,D,Major,61,48,55,67,0,16,8,,,,
498,On Time (with John Legend),"John Legend, Metro Boomin",2,2022-12-02,78139948.0,80,F,Minor,33,51,59,76,0,44,6,,,,
499,GAT��,"Maldy, Karol G",2,2022-08-25,322336177.0,93,B,Minor,63,34,86,26,0,21,39,,,,
500,3D (feat. Jack Harlow),"Jung Kook, Jack Harlow",2,2023-11-03,,108,C#,Major,86,89,83,4,0,9,11,GOLDEN,True,85.0,3.363533
501,Closer to You (feat. Major Lazer),"Jung Kook, Major Lazer",2,2023-11-03,,113,D,Minor,79,50,66,12,1,11,5,GOLDEN,False,86.0,2.849917
502,Seven (feat. Latto) (Explicit Ver.),"Jung Kook, Latto",2,2023-11-03,,124,B,Major,79,88,84,32,0,8,5,GOLDEN,True,87.0,3.059183
503,Standing Next to You,Jung Kook,1,2023-11-03,,106,D,Minor,72,82,81,5,0,34,10,GOLDEN,False,96.0,3.433667
504,Yes or No,Jung Kook,1,2023-11-03,,83,C#,Major,68,89,84,18,0,8,9,GOLDEN,False,88.0,2.459283


## Functions

#### 1 Parameters

- Albums: top_rated_albums, albums_by_artist, albums_by_genres
- Songs: songs_by_release_date, songs_by_longest_duration, songs_by_danceability, songs_by_explicitness

In [91]:
def top_rated_albums(n:int=10):
    """
    Returns the top-rated albums based on average rating.
    
    Parameters:
        n (int): The number of albums to return. Default is 10.
    
    Returns:
        list[dict]: A list of dictionaries representing the top-rated albums.
    """
    top_rated = ALBUMS.sort_values(by='avg_rating', ascending=False).head(n)
    return top_rated.to_dict(orient='records')

def albums_by_artist(artist_name:str):
    """
    Returns all albums by a given artist.
    
    Parameters:
        artist_name (str): The name of the artist.
    
    Returns:
        list[dict]: A list of dictionaries representing the albums by the given artist.
    """
    albums = ALBUMS[ALBUMS['artist_name'] == artist_name]
    return albums.to_dict(orient='records')

def albums_by_genres(genres: list[str]):
    """
    Get all albums that contain any of the genres specified in the genres list.

    Parameters:
        genres (list[str]): A list of genre strings to filter albums by.

    Returns:
        list[dict]: A list of dictionaries representing albums that match any of the specified genres.
    """

    # Filter the albums where any of the genres match the genres in the list
    # Assuming each album's genres are stored in a list-like structure within the 'genres' column
    filtered_albums = ALBUMS[ALBUMS['genres'].apply(lambda x: any(str(genre).lower() in str(x).lower() for genre in genres))]

    return filtered_albums.to_dict(orient='records')

# def artist_by_album(album_name):
    """
    Returns the artist of an album by its name.

    Parameters:
        album_name (str): The name of the album.

    Returns:
        dict: A dictionary with the album name and the artist.
              If the album is not found, the dictionary will be empty.
    """
    artist_dict = {}
    album = ALBUMS[ALBUMS['release_name'] == album_name]
    if not album.empty:
        artist_dict['Album name'] = album_name
        artist_dict['artist'] = album.iloc[0]['artist_name']
    
    return artist_dict

# --- SONGS ---

def songs_by_release_date(release_date:str):
    """
    Get all songs released on a specific date, month, or year.

    Parameters:
        release_date (str): Release date to filter by. Can be in YYYY, YYYY-MM, or YYYY-MM-DD format.

    Returns:
        list[dict]: A list of dictionaries representing all songs released on the specified date.
    """

    # Determine the length of the release_date string to understand the format
    date_length = len(release_date)

    if date_length == 4:  # Year format YYYY
        filter_condition = (SONGS['release_date'].str.startswith(release_date))
    elif date_length == 7:  # Month format YYYY-MM
        filter_condition = (SONGS['release_date'].str[:7] == release_date)
    elif date_length == 10:  # Day format YYYY-MM-DD
        filter_condition = (SONGS['release_date'] == release_date)
    else:
        raise ValueError("Invalid date format. Please use YYYY, YYYY-MM, or YYYY-MM-DD.")

    filtered_songs = SONGS[filter_condition]
    return filtered_songs.to_dict(orient='records')

def songs_by_longest_duration(n:int=10):
    """
    Returns the top n songs sorted by longest duration.
    
    Parameters:
        n (int): The number of top songs to return based on duration.
    
    Returns:
        list[dict]: A list of dictionaries representing the top n songs sorted by duration.
    """
    sorted_songs = SONGS.sort_values(by="duration_in_min", ascending=False)
    top_n_songs = sorted_songs.head(n)
    return top_n_songs.to_dict(orient='records')

def songs_by_danceability(danceability_threshold:int):
    """
    Get the top 10 songs sorted by streams that have a danceability rating above a certain threshold in %.

    Parameters:
        danceability_threshold (float): The minimum danceability rating to filter songs by.

    Returns:
        list[dict]: A list of dictionaries representing the top N songs meeting the danceability threshold in %, sorted by streams.
    """

    # Filter songs by danceability threshold
    filtered_songs = SONGS[SONGS['danceability_%'] > danceability_threshold]

    # Sort the filtered songs by streams in descending order and take the top 10
    top_songs = filtered_songs.sort_values(by='streams', ascending=False).head(10)

    return top_songs.to_dict(orient='records')

def songs_by_explicitness(explicit:bool=True):
    """
    Get the top 10 songs filtered by their explicitness.

    Parameters:
        explicit (bool): Flag to filter songs by explicit content. Default is True.
        n (int): Number of top songs to retrieve, default is 10.

    Returns:
        list[dict]: A list of dictionaries representing the top 10 songs filtered by the specified explicitness.
    """

    # Assuming SONGS is your dataset with a column 'explicit' for explicit content
    # and a metric like 'streams' or 'popularity' to sort by

    # Filter the songs by explicitness
    filtered_songs = SONGS[SONGS['explicit'] == explicit]

    # Sort the filtered songs by a relevant metric in descending order and take the top 10
    top_songs = filtered_songs.sort_values(
        by='streams', ascending=False).head(10)

    return top_songs.to_dict(orient='records')

# def top_streamed_songs(n=10):
    """
    Returns the top-streamed songs.
    
    Parameters:
        n (int): The number of songs to return. Default is 10.
    
    Returns:
        list[dict]: A list of dictionaries representing the top-streamed songs.
    """
    top_songs = SONGS.sort_values(by='streams', ascending=False).head(n)
    return top_songs.to_dict(orient='records')

# def songs_by_artist(artist_name):
    """
    Returns all songs by a given artist.
    
    Parameters:
        artist_name (str): The name of the artist.
    
    Returns:
        list[dict]: A list of dictionaries representing the songs by the given artist.
    """
    songs = SONGS[SONGS['artist(s)_name'] == artist_name]
    return songs.to_dict(orient='records')

# def artist_by_song(song_name):
    """
    Returns the artist of the song.

    Parameters:
        song_name (str): The name of the song.

    Returns:
        dict: A dictionary with the song name and the artist.
              If the song is not found, the dictionary will be empty.
    """
    artist_dict = {}
    song = SONGS[SONGS['track_name'] == song_name]
    if not song.empty:
        artist_dict['Song'] = song_name
        artist_dict['Artist'] = song.iloc[0]['artist(s)_name']

    return artist_dict

# def songs_by_mode(mode: str):
    """
    Get top 10 songs, filtered by a specific musical mode.

    Parameters:
        mode (str): The musical mode to filter songs by.

    Returns:
        list[dict]: A list of dictionaries representing songs filtered by the specified musical key mode.
    """
    filtered_songs = SONGS[SONGS['mode'] == mode[0].upper(
    ) + mode[1:].lower()].sort_values(by='streams', ascending=False)

    return filtered_songs.to_dict(orient='records')

#### 2 Parameters

- Albums: filter_albums_by_date_range, albums_by_artist_and_year, albums_by_artist_and_genre
- Songs: top_streamed_songs_by_artist, high_rated_and_reviewed_albums, high_energy_songs

In [52]:
def filter_albums_by_date_range(start_year, end_year):
    """
    Filters albums within a specified date range (inclusive).

    Parameters:
        start_year (str): Start year of the range in 'YYYY' or 'YYYY-MM-DD' format.
        end_year (str): End year of the range in 'YYYY' or 'YYYY-MM-DD' format.

    Returns:
        list[dict]: A list of dictionaries, each representing an album released within the specified date range.
    """
    start_year = pd.to_datetime(start_year)
    end_year = pd.to_datetime(end_year)

    ALBUMS['release_date'] = pd.to_datetime(ALBUMS['release_date'])

    filtered_df = ALBUMS[(ALBUMS['release_date'] >= start_year)
                         & (ALBUMS['release_date'] <= end_year)]

    ALBUMS['release_date'] = ALBUMS['release_date'].astype(str)
    filtered_dict = filtered_df.to_dict('records')

    return filtered_dict

def albums_by_artist_and_year(artist_name, release_year):
    """
    Get the albums by a specific artist released in a given year.

    Parameters:
        artist_name (str): Name of the artist.
        release_year (str): Release year to filter by in 'YYYY' format..

    Returns:
        list[dict]: A list of dictionaries representing the albums by the given artist released in the specified year.
    """

    start_date = f'{release_year}-01-01'
    end_date = f'{release_year}-12-31'

    artist_albums_in_year = ALBUMS[(ALBUMS['artist_name'] == artist_name) & (
        (ALBUMS['release_date'] >= start_date) & (ALBUMS['release_date'] <= end_date))]

    return artist_albums_in_year.to_dict(orient='records')

def albums_by_artist_and_genre(artist_name, genre):
    """
    Get the albums by a specific artist and genre.

    Parameters:
        artist_name (str): Name of the artist.
        genre (str): Genre to filter by.

    Returns:
        list[dict]: A list of dictionaries representing the albums by the given artist and genre.
    """

    albums = ALBUMS[(ALBUMS['artist_name'] == artist_name) & (
        ALBUMS['genres'].str.contains(genre, case=False))]

    return albums.to_dict(orient='records')

# --- SONGS ---

def top_streamed_songs_by_artist(artist_name, n=5):
    """
    Get the top N streamed songs by a specific artist.

    Parameters:
        artist_name (str): Name of the artist.
        n (int): Number of top songs to retrieve.

    Returns:
        list[dict]: A list of dictionaries representing the top N streamed songs by the given artist.
    """
    artist_songs = SONGS[SONGS['artist(s)_name'] == artist_name]
    top_songs = artist_songs.sort_values(by='streams', ascending=False).head(n)

    return top_songs.to_dict(orient='records')

def high_rated_and_reviewed_albums(rating_threshold=4, review_threshold=100):
    """
    Returns albums that have a rating higher than the specified threshold
    and a review count higher than the specified threshold.
    
    Parameters:
        rating_threshold (float): The minimum average rating threshold. Default is 4.0.
        review_threshold (int): The minimum review count threshold. Default is 100.
        
    Returns:
        list[dict]: A list of dictionaries representing the albums which have higher average ratings and higher review counts.
        
    """
    filtered_albums = ALBUMS[(ALBUMS['avg_rating'] > rating_threshold) & (
        ALBUMS['review_count'] > review_threshold)]

    return filtered_albums.to_dict(orient='records')

def high_energy_songs(speechiness_threshold=15, energy_threshold=80):
    """
    Returns songs that are explicit, have low speechiness, and high energy.
    
    Parameters:
        speechiness_threshold (int): The maximum speechiness score threshold. Default is 15.
        energy_threshold (int): The minimum energy score threshold. Default is 80.
        
    Returns:
        list[dict]: A list of dictionaries representing the songs that are explicit, have low speechiness, and high energy.
        
    """
    # Filter songs based on the specified conditions
    filtered_songs = SONGS[
        (SONGS['speechiness_%'] < speechiness_threshold) &
        (SONGS['energy_%'] > energy_threshold)
    ]

    return filtered_songs.to_dict(orient='records')

def top_songs_by_explicitness(explicit: bool, n=10):
    """
    Get the top N songs filtered by their explicitness, sorted by number of streams.

    Parameters:
        explicit (bool): Boolean value to filter songs by explicitness. True for explicit songs, False for non-explicit songs.
        n (int): Number of top songs to retrieve.

    Returns:
        list[dict]: A list of dictionaries representing the top N songs filtered by the specified explicitness.
    """

    # Filter the songs by explicitness
    filtered_songs = SONGS[SONGS['explicit'] == explicit]

    # Sort the filtered songs by streams in descending order and take the top N songs
    top_songs = filtered_songs.sort_values(
        by='streams', ascending=False).head(n)

    return top_songs.to_dict(orient='records')

#### 3 Parameters

- Albums: albums_by_year_genre_and_descriptors
- Songs: top_songs_by_artist_and_release_year, high_energy_songs_with_artist_count, unique_albums

In [53]:
def albums_by_year_genre_and_descriptors(release_year, genre, descriptors):
    """
    Get the albums released in a given year, within a genre, and matching specified descriptors.

    Parameters:
        release_year (str): Release year to filter by.
        genre (str): Genre to filter by.
        descriptors (str): Descriptors to match.

    Returns:
        list[dict]: A list of dictionaries representing thealbums by the given criteria.
    """

    filtered_albums = ALBUMS[
        (ALBUMS['release_date'].str.extract(r'(\d{4})')[0] == release_year) &
        (ALBUMS['genres'].str.contains(genre, case=False)) &
        (ALBUMS['descriptors'].str.contains(descriptors, case=False))
    ]

    return filtered_albums.to_dict(orient='records')

# --- SONGS ---

def top_songs_by_artist_and_release_year(artist_name, release_year, n=5):
    """
    Get the top N streamed songs by a specific artist released in a given year.

    Parameters:
        artist_name (str): Name of the artist.
        release_year (str): Release year to filter by.
        n (int): Number of top songs to retrieve. Default is 5.

    Returns:
        list[dict]: A list of dictionaries representing the top N streamed songs by the given artist and release year.
    """
  
    artist_songs_in_year = SONGS[
        (SONGS['artist(s)_name'] == artist_name) &
        (SONGS['release_date'].str.extract(r'(\d{4})')[0] == release_year)
    top_songs = artist_songs_in_year.sort_values(by='bpm', ascending=False).head(n)
    ]

    return artist_songs_in_year.to_dict(orient='records')

def high_energy_songs_with_artist_count(speechiness_threshold=25, energy_threshold=75, artist_count_threshold=2):
    """
    Returns songs that are explicit, have low speechiness, high energy, and are performed by more than 2 artists.
    
    Parameters:
        speechiness_threshold (int): The maximum speechiness score threshold. Default is 25.
        energy_threshold (int): The minimum energy score threshold. Default is 75.
        artist_count_threshold (int): The minimum number of artists threshold. Default is 2.
        
    Returns:
        list[dict]: A list of dictionaries representing the songs that are explicit, have low speechiness, and high energy,and are performed by more than 2 artists.
    """
    # Filter songs based on the specified conditions
    filtered_songs = SONGS[
        (SONGS['speechiness_%'] < speechiness_threshold) & 
        (SONGS['energy_%'] > energy_threshold) & 
        (SONGS['artist_count'] > artist_count_threshold)
    ]

    return filtered_songs.to_dict(orient='records')

def unique_albums(genres_threshold=4, artist_count_threshold=2, avg_rating_threshold=3.5):
    """
    Returns albums that have more than 5 genres, an artist count above 2, and an average rating below 3.5.
    
    Parameters:
        genres_threshold (int): The minimum number of genres threshold.
        artist_count_threshold (float): The minimum average artist count threshold.
        avg_rating_threshold (float): The maximum average rating threshold.
        
    Returns:
        list[dict]: A list of dictionaries representing the albums that have more than 4 genres, an artist count above 2, and an average rating below 3.5.
        
    """
    ALBUMS['artist_count'] = ALBUMS['artist_name'].apply(lambda x: len(x.split(', ')))

    filtered_albums = ALBUMS[
        (ALBUMS['genres'].apply(lambda x: len(x.split(', '))) > genres_threshold) & 
        (ALBUMS['artist_count'] >= artist_count_threshold) & 
        (ALBUMS['avg_rating'] < avg_rating_threshold)
    ]

    return filtered_albums.to_dict(orient='records')

SyntaxError: invalid syntax. Perhaps you forgot a comma? (2292962291.py, line 38)

#### 4 Parameters

- Albums: albums_by_year_genre_and_descriptors
- Songs: top_songs_by_artist_and_release_year, high_energy_songs_with_artist_count, unique_albums

In [54]:
def albums_by_rating_counts_and_year(release_year, rating_threshold, rating_count_threshold, review_count_threshold):
    """
    Get the albums released in a given year with ratings, rating counts, and review counts above specified thresholds.

    Parameters:
        release_year (str): Release year to filter by.
        rating_threshold (float): Minimum average rating threshold.
        rating_count_threshold (int): Minimum rating count threshold.
        review_count_threshold (int): Minimum review count threshold.

    Returns:
        list[dict]: A list of dictionaries representing the albums meeting the specified criteria.
    """

    filtered_albums = ALBUMS[
        (ALBUMS['release_date'].str.extract(r'(\d{4})')[0] == release_year) &
        (ALBUMS['avg_rating'] >= rating_threshold) &
        (ALBUMS['rating_count'] >= rating_count_threshold) &
        (ALBUMS['review_count'] >= review_count_threshold)
    ]

    return filtered_albums.to_dict(orient='records')

def albums_by_year_genre_rating_counts(release_year, genre, rating_count_threshold, review_count_threshold):
    """
    Get the top N albums released in a given year and genre with rating counts and review counts above specified thresholds.

    Parameters:
        release_year (str): Release year to filter by.
        genre (str): Genre to filter by.
        rating_count_threshold (int): Minimum rating count threshold.
        review_count_threshold (int): Minimum review count threshold.

    Returns:
        list[dict]: A list of dictionaries representing the albums meeting the specified criteria.
    """

    filtered_albums = ALBUMS[
        (ALBUMS['release_date'].str.extract(r'(\d{4})')[0] == release_year) &
        (ALBUMS['genres'].str.contains(genre, case=False)) &
        (ALBUMS['rating_count'] >= rating_count_threshold) &
        (ALBUMS['review_count'] >= review_count_threshold)
    ]

    return filtered_albums.to_dict(orient='records')

def albums_by_year_descriptor_rating_counts(year, descriptor, rating_count_threshold, review_count_threshold):
    """
    Get the top N albums released in a given year and descriptor with rating counts and review counts above specified thresholds.

    Parameters:
        release_year (str): Release year to filter by.
        descriptor (str): A string containing descriptors separated by comma.
        rating_count_threshold (int): Minimum rating count threshold.
        review_count_threshold (int): Minimum review count threshold.

    Returns:
        list[dict]: A list of dictionaries representing the albums meeting the specified criteria.
    """

    filtered_albums = ALBUMS[
        (ALBUMS['release_date'].str.extract(r'(\d{4})')[0] == year) &
        (ALBUMS['descriptors'].str.contains(descriptor, case=False)) &
        (ALBUMS['rating_count'] >= rating_count_threshold) &
        (ALBUMS['review_count'] >= review_count_threshold)
    ]

    return filtered_albums.to_dict(orient='records')

# --- SONGS ---

def songs_by_artist_year_and_mode(artist_name, release_year, mode, n=5):
    """
    Get the top N songs by a specific artist released in a given year and with a specified mode.

    Parameters:
        artist_name (str): Name of the artist.
        release_year (str): Release year to filter by.
        mode (str): Mode to filter by (minor, major).
        n (int): Number of top songs to retrieve.

    Returns:
        list[dict]: A list of dictionaries representing the top N songs by the given artist, release year, and mode.
    """

    artist_songs_in_year_and_mode = SONGS[
        (SONGS['artist(s)_name'] == artist_name) &
        (SONGS['release_date'].str.extract(r'(\d{4})')[0] == release_year) &
        (SONGS['mode'].str.lower() == mode.lower())
    ]
    top_songs = artist_songs_in_year_and_mode.sort_values(
        by='streams', ascending=False).head(n)

    return top_songs.to_dict(orient='records')

def songs_by_artist_year_and_mode(artist_name, release_year, mode, n=5):
    """
    Get the top N songs by a specific artist released in a given year and with a specified mode.

    Parameters:
        artist_name (str): Name of the artist.
        release_year (str): Release year to filter by.
        mode (str): Mode to filter by (minor, major).
        n (int): Number of top songs to retrieve.

    Returns:
        list[dict]: A list of dictionaries representing the top N songs by the given artist, release year, and mode.
    """

    artist_songs_in_year_and_mode = SONGS[
        (SONGS['artist(s)_name'] == artist_name) &
        (SONGS['release_date'].str.extract(r'(\d{4})')[0] == release_year) &
        (SONGS['mode'].str.lower() == mode.lower())
    ]
    top_songs = artist_songs_in_year_and_mode.sort_values(
        by='streams', ascending=False).head(n)

    return top_songs.to_dict(orient='records')

#### more than 4 Parameters

In [55]:
def top_albums_by_year_genre_rating_counts(release_year, genre, rating_count_threshold, review_count_threshold, n=5):
    """
    Get the top N albums released in a given year and genre with rating counts and review counts above specified thresholds.

    Parameters:
        release_year (str): Release year to filter by.
        genre (str): Genre to filter by.
        rating_count_threshold (int): Minimum rating count threshold.
        review_count_threshold (int): Minimum review count threshold.
        n (int): Number of top albums to retrieve.

    Returns:
        list[dict]: A list of dictionaries representing the top N albums meeting the specified criteria.
    """

    filtered_albums = ALBUMS[
        (ALBUMS['release_date'].str.extract(r'(\d{4})')[0] == release_year) &
        (ALBUMS['genres'].str.contains(genre, case=False)) &
        (ALBUMS['rating_count'] >= rating_count_threshold) &
        (ALBUMS['review_count'] >= review_count_threshold)
    ]
    top_albums = filtered_albums.sort_values(
        by='avg_rating', ascending=False).head(n)

    return top_albums.to_dict(orient='records')


def albums_by_year_genre_rating_counts_and_rating(year, genre, rating, rating_count, review_count):
    """
    Get the albums released in a given year and genre with a specific rating, rating counts, and review counts above specified thresholds.

    Parameters:
        year (str): Release year to filter by.
        genre (str): Genre to filter by.
        rating (float): Minimum average rating threshold.
        rating_count (int): Minimum rating count threshold.
        review_count (int): Minimum review count threshold.

    Returns:
        list[dict]: A list of dictionaries representing the albums meeting the specified criteria.
    """

    filtered_albums = ALBUMS[
        (ALBUMS['release_date'].str.extract(r'(\d{4})')[0] == year) &
        (ALBUMS['genres'].str.contains(genre, case=False)) &
        (ALBUMS['avg_rating'] >= rating) &
        (ALBUMS['rating_count'] >= rating_count) &
        (ALBUMS['review_count'] >= review_count)
    ]

    return filtered_albums.to_dict(orient='records')

#### Metadata

In [108]:
def filter_functions(functions_list, function_metadata):
    function_names = [func.__name__ for func in functions_list]
    filtered_metadata = [meta for meta in function_metadata if meta.get('name') in function_names]
    return functions_list, filtered_metadata

def describe_function(available_functions):
    return [meta["description"] for meta in available_functions[1]]

## LLM

In [57]:
from enum import Enum
from openai.openai_object import OpenAIObject

class Role(Enum):
    ASSISTANT = "assistant"
    FUNCTION = "function"
    SYSTEM = "system"
    USER = "user"
    
class Model(Enum):
    GPT3 = "gpt-3.5-turbo-0613"
    GPT4 = "gpt-4-0613"
    GPT4_new = "gpt-4-1106-preview"

class FunctionNotFoundError(Exception):
    def __init__(self, function_name, function_args):
        self.function_name = function_name
        self.function_args = function_args
        super().__init__(f"Error finding function {function_name} with arguments {function_args}")

class FunctionExecutionError(Exception):
    def __init__(self, function_name, function_args):
        self.function_name = function_name
        self.function_args = function_args
        super().__init__(f"Error executing function {function_name} with arguments {function_args}")


class Response:
    def __init__(self, message:dict):
        self._message = message
        
    @classmethod
    def from_api(cls, openai_response:OpenAIObject):
        _message = openai_response["choices"][0]["message"]
        return cls(_message)
    
    @property
    def message(self) -> str:
        return self._message["content"]

    @property
    def role(self) -> str:
        return self._message["role"]

    @property
    def function(self) -> dict:
        return self._message.get("function_call")

    @property
    def is_function_call(self) -> bool:
        return self.function is not None
    
    def to_dict(self) -> dict:
        return {**self._message}
    
    def __str__(self):
        return f"Response({self.role}: {self.message}, with function:{self.function})"


class Conversation:
    def __init__(self):
        self._messages = []

    @property
    def messages_as_dicts(self):
        return [message.to_dict() for message in self._messages]

    def send(self, model, functions, temperature=1) -> Response:

        args = {"model":model,
                "temperature": temperature, 
                "messages":self.messages_as_dicts}
        
        if len(functions) > 0:
            args.update({"tool_choice":"auto",
                         "functions":functions})
            
        response = openai.ChatCompletion.create(**args)
        return Response.from_api(response)

    def add(self, message_or_response):
        if isinstance(message_or_response, Response):
            message = message_or_response
        else:
            message = Response(message_or_response)

        self._messages.append(message)        
        return self
    
    def __str__(self):
        return f"{self.messages_as_dicts}"
    

def handle_function(function:dict, functions) -> json:
    """Invoke function and return result"""
    function_name, function_args = function["name"], json.loads(function["arguments"])
    
    def get_function_by_name():
        for func in functions:
            if func.__name__ == function_name:
                return func
        return None
    
    function_to_call = get_function_by_name()
    if function_to_call is not None:
        try:
            result = json.dumps(function_to_call(**function_args))
            return result
        except:
            raise FunctionExecutionError(function_name, function_args)
    else:
        raise FunctionNotFoundError(function_name, function_args)


# TODO
def handle_error(error, retry):
    raise error


max_iterations = 5
def chat(conversation:Conversation, model:str, functions:tuple, temperature:float) -> str:
    iteration = 0
    while iteration < max_iterations:
        iteration +=1
        response = conversation.send(model, functions[1], temperature)
        conversation.add(response)
        
        if response.is_function_call:
            try:
                result = handle_function(response.function, functions[0])
                conversation.add({"role": Role.FUNCTION.value, "content": result, "name":response.function["name"]})
            except (FunctionNotFoundError, FunctionExecutionError) as error:
                handle_error(error=error, retry=False)
        else:
            return response.message
        

def handle_function_on_server(function):
    function_name, function_args = function["name"], json.loads(
        function["arguments"])
    
    import requests
    
    URL = f"http://localhost:5000/function_call/{function_name}?"
    
    response = requests.get(URL, params=function_args)
    if response.ok:
        data = response.json()
        return data["result"]
    else:
        raise FunctionExecutionError # TODO
    
    

PORT = 5000
def chat_web(conversation: Conversation, model: str, function_metadata: list) -> str:
    iteration = 0
    while iteration < max_iterations:
        iteration += 1
        response = conversation.send(model, function_metadata)
        conversation.add(response)

        if response.is_function_call:
            try:
                result = handle_function_on_server(response.function)
                conversation.add(
                    {"role": Role.FUNCTION.value, "content": result, "name": response.function["name"]})
            except (FunctionNotFoundError, FunctionExecutionError) as error:
                handle_error(error=error, retry=False)
        else:
            return response.message

## Benchmark

In [109]:
# all_functions = [top_rated_albums, artist_by_album, albums_by_artist,
             #    top_streamed_songs, songs_by_artist, artist_by_song, filter_albums_by_date_range]

all_functions = [top_rated_albums, albums_by_artist, albums_by_genres, songs_by_release_date,
                 songs_by_longest_duration, songs_by_danceability, songs_by_explicitness]

with open('functions.json', 'r') as file:

    function_metadata = json.load(file)
    
TEMPERATURE = 0

In [110]:
print("Functions: " + str(len(all_functions)))
print(all_functions)

print()
print("Metadata: " + str(len(function_metadata)))
for meta_data in function_metadata:
    print(f"{meta_data['name']}: {meta_data['description']}")

Functions: 7
[<function top_rated_albums at 0x000001D12B855DA0>, <function albums_by_artist at 0x000001D12ABB7A60>, <function albums_by_genres at 0x000001D12ABB7F60>, <function songs_by_release_date at 0x000001D12ABB77E0>, <function songs_by_longest_duration at 0x000001D12ABB76A0>, <function songs_by_danceability at 0x000001D127EB5A80>, <function songs_by_explicitness at 0x000001D127EB7920>]

Metadata: 7
top_rated_albums: Returns the top-rated albums based on average rating.
albums_by_artist: Returns all albums by a given artist.
albums_by_genres: Returns all albums that contain any of the genres specified in the provided genres list.
songs_by_release_date: Retrieves all songs released on a specific date, in a specific month, or in a specific year, based on the provided release date.
songs_by_longest_duration: Returns the top 'n' songs sorted by their duration, from longest to shortest.
songs_by_danceability: Returns the top 10 songs sorted by streams that have a danceability rating ab

#### Functions

In [119]:
available_functions = filter_functions(all_functions, function_metadata)

conversation = Conversation()
conversation.add({"role": Role.SYSTEM.value, "content": "Answer briefly."}) \
            .add({"role": Role.USER.value, "content": "What 1 most popualr song is explicit?"})

result = chat(conversation, model=Model.GPT3.value,
              functions=available_functions, temperature=TEMPERATURE)
print(result)

The most popular explicit song is "3D (feat. Jack Harlow)" by Jung Kook and Jack Harlow.


In [120]:
pprint(conversation.messages_as_dicts)

[{'content': 'Answer briefly.', 'role': 'system'},
 {'content': 'What 1 most popualr song is explicit?', 'role': 'user'},
 {'content': None,
  'function_call': <OpenAIObject at 0x1d12b7b1b50> JSON: {
  "name": "songs_by_explicitness",
  "arguments": "{\n  \"explicit\": true\n}"
},
  'role': 'assistant'},
 {'content': '[{"track_name": "3D (feat. Jack Harlow)", "artist(s)_name": '
             '"Jung Kook, Jack Harlow", "artist_count": 2, "release_date": '
             '"2023-11-03", "streams": NaN, "bpm": 108, "key": "C#", "mode": '
             '"Major", "danceability_%": 86, "valence_%": 89, "energy_%": 83, '
             '"acousticness_%": 4, "instrumentalness_%": 0, "liveness_%": 9, '
             '"speechiness_%": 11, "album": "GOLDEN", "explicit": true, '
             '"popularity": 85.0, "duration_in_min": 3.3635333333333333}, '
             '{"track_name": "Seven (feat. Latto) (Explicit Ver.)", '
             '"artist(s)_name": "Jung Kook, Latto", "artist_count": 2, '
          