## **Function Calling LLMs - Team Project**

In [78]:
import openai
import json
import os
import re

import pandas as pd
from tqdm import tqdm
from pprint import pprint
from dotenv import load_dotenv
from datetime import datetime

load_dotenv()
openai.api_key = os.environ.get("API_KEY")

## Data

##### Dataset 1: Most Popular Albums on Spotify

Source: https://www.kaggle.com/datasets/tobennao/rym-top-5000/

In [79]:
album_columns_to_keep = ["release_name",    # Name of the album 
                            "artist_name",     # Name of the artist/band/group
                            "release_date",    # Date the album was released
                            "primary_genres",  # Primary genre classifications
                            "secondary_genres",# Secondary genre classifications
                            "descriptors",     # Album tags
                            "avg_rating",      # Average rating, on a scale of 0-5
                            "rating_count",    # The number of ratings
                            "review_count"     # The number of reviews
                        ]

albums_popular = pd.read_csv("./data/popular_albums.csv", usecols=album_columns_to_keep)


# one attribute for release_date
def combine_and_deduplicate_genres(primary, secondary):
    combined_genres = set()

    if primary and not isinstance(primary, float):
        combined_genres.update(primary.split(', '))

    if secondary and not isinstance(secondary, float):
        combined_genres.update(secondary.split(', '))

    return ', '.join(combined_genres)


albums_popular['genres'] = albums_popular.apply(lambda row: combine_and_deduplicate_genres(
    row['primary_genres'], row['secondary_genres']), axis=1)

albums_popular.drop(['primary_genres', 'secondary_genres'], axis=1, inplace=True)


# reorder:
new_order = ["release_name", "artist_name", "release_date", "genres",
             "descriptors", "avg_rating", "rating_count", "review_count"]
albums_popular = albums_popular[new_order]

In [80]:
album_columns_to_keep = ["release_name",    # Name of the album
                         "artist_name",     # Name of the artist/band/group
                         "release_date",    # Date the album was released
                         "genres",          # Genre
                         "descriptors",     # Album tags
                         "avg_rating",      # Average rating, on a scale of 0-5
                         "rating_count",    # The number of ratings
                         "review_count"     # The number of reviews
                         ]

albums_long_tail = pd.read_csv(
    "./data/albums_long_tail.csv", usecols=album_columns_to_keep)

In [81]:
ALBUMS = pd.concat([albums_popular, albums_long_tail], ignore_index=True)

print(f"Attributes: {ALBUMS.columns.to_list()}")
print(f"Number of attributes (columns): {ALBUMS.shape[1]}")
print(f"Number of examples (rows): {ALBUMS.shape[0]}")

ALBUMS[-10:].head(n=10)

Attributes: ['release_name', 'artist_name', 'release_date', 'genres', 'descriptors', 'avg_rating', 'rating_count', 'review_count']
Number of attributes (columns): 8
Number of examples (rows): 5119


Unnamed: 0,release_name,artist_name,release_date,genres,descriptors,avg_rating,rating_count,review_count
5109,Dismantled Into Juice,Blawan,2023-05-17,"UK Bass, Wonky Techno, Deconstructed Club, Wonky","mechanical, rhythmic, hypnotic, dissonant, raw, noisy, aggressive, futuristic, anxious, repetiti...",3.34,527,2.0
5110,But Here We Are,Foo Fighters,2023-06-02,"Alternative Rock, Power Pop, Post-Grunge, Shoegaze","death, male vocalist, melodic, anthemic, bittersweet, energetic, sentimental, introspective, rhy...",3.56,5118,76.0
5111,Metro Boomin Presents Spider-Man: Across the Spider-Verse,Metro Boomin,2023-06-02,"Film Soundtrack, Pop Rap, Trap, Afrobeats, Contemporary R&B, Cloud Rap, Alternative R&B","male vocalist, female vocalist, optimistic, triumphant, urban, boastful, rhythmic, melodic, hedo...",2.96,2295,19.0
5112,Formal Growth in the Desert,Protomartyr,2023-06-02,"Post-Punk, Art Punk, Gothic Rock, Noise Rock","male vocalist, dark, rhythmic, atmospheric, dense, melancholic, bittersweet, energetic, abstract...",3.45,2785,29.0
5113,Bunny,Beach Fossils,2023-06-02,"Jangle Pop, Indie Pop, Dream Pop, Indie Surf, Neo-Psychedelia","male vocalist, mellow, calm, soft, ethereal, warm, summer, soothing, urban, melodic, longing, lu...",3.29,929,9.0
5114,Everyone's Crushed,Water From Your Eyes,2023-05-26,"Experimental Rock, Art Pop, Neo-Psychedelia, Post-Industrial, No Wave, Dance-Punk","apathetic, urban, dissonant, noisy, energetic, chaotic, non-binary vocalist, rhythmic, eclectic,...",3.32,1149,15.0
5115,Aperture,Hannah Jadagu,2023-05-19,"Indie Pop, Dream Pop, Bedroom Pop","bittersweet, melancholic, sentimental, energetic, melodic, love, introspective, ethereal, atmosp...",3.44,293,4.0
5116,More Photographs (A Continuum),Kevin Morby,2023-05-26,"Folk Rock, Singer-Songwriter, Indie Folk, Americana, Chamber Pop","lonely, sentimental, melodic, male vocalist, warm, spiritual, soft, philosophical, peaceful, pas...",3.13,143,2.0
5117,Perfume,NCT DOJAEJUNG,2023-04-17,"Contemporary R&B, K-Pop, Dance-Pop, Future Bass, Synth Funk, Alternative R&B","sensual, male vocalist, melodic, rhythmic, romantic, love",3.44,360,3.0
5118,AESTHETIC,tripleS / +(KR)ystal Eyes,2023-05-04,"K-Pop, Dance-Pop, Contemporary R&B, New Jack Swing, Synthpop, Future Bass","female vocalist, warm, rhythmic, lush, melodic, energetic, playful, uplifting",3.5,752,5.0


##### Dataset 2: Most Streamed Tracks on Spotify

Source: https://www.kaggle.com/datasets/nelgiriyewithana/top-spotify-songs-2023

In [82]:
song_columns_to_keep = ['track_name',           # Name of the song
                        'artist(s)_name',       # Name of the artist(s) of the song
                        'artist_count',         # Number of artists contributing to the song
                        'released_year',        # Year when the song was released
                        'released_month',       # Month when the song was released
                        'released_day',         # Day of the month when the song was released
                        'streams',              # Total number of streams on Spotify
                        'bpm',                  # Beats per minute, a measure of song tempo
                        'key',                  # Key of the song
                        'mode',                 # Mode of the song (major or minor)
                        'danceability_%',       # Percentage indicating how suitable the song is for dancing
                        'valence_%',            # Positivity of the song's musical content
                        'energy_%',             # Perceived energy level of the song
                        'acousticness_%',       # Amount of acoustic sound in the song
                        'instrumentalness_%',   # Amount of instrumental content in the song
                        'liveness_%',           # Presence of live performance elements
                        'speechiness_%'         # Amount of spoken words in the song
                        ]


songs_short_tail = pd.read_csv("./data/tracks.csv", encoding_errors="ignore")

# one attribute for release_date
songs_short_tail['release_date'] = pd.to_datetime(songs_short_tail['released_year'].astype(str) + '-' +
                                       songs_short_tail['released_month'].astype(str) + '-' +
                                       songs_short_tail['released_day'].astype(str))

songs_short_tail.drop(['released_year', 'released_month',
        'released_day'], axis=1, inplace=True)


songs_short_tail['release_date'] = songs_short_tail['release_date'].apply(
    lambda x: str(x).replace(" 00:00:00", "") if "00:00:00" in str(x) else x)

# reorder:
new_order = ['track_name', 'artist(s)_name', 'artist_count', 'release_date', 'streams', 'bpm', 'key', 'mode', 
             'danceability_%', 'valence_%', 'energy_%', 'acousticness_%', 'instrumentalness_%', 'liveness_%', 'speechiness_%']
songs_short_tail = songs_short_tail[new_order]

In [83]:
songs_long_tail = pd.read_csv("./data/tracks_long_tail.csv", encoding_errors="ignore")

In [84]:
SONGS = pd.concat([songs_short_tail, songs_long_tail], ignore_index=True)

print(f"Attributes: {SONGS.columns.to_list()}")
print(f"Number of attributes (columns): {SONGS.shape[1]}")
print(f"Number of examples (rows): {SONGS.shape[0]}")

SONGS[495:].head(n=10)

Attributes: ['track_name', 'artist(s)_name', 'artist_count', 'release_date', 'streams', 'bpm', 'key', 'mode', 'danceability_%', 'valence_%', 'energy_%', 'acousticness_%', 'instrumentalness_%', 'liveness_%', 'speechiness_%', 'album', 'explicit', 'popularity', 'duration_in_min']
Number of attributes (columns): 19
Number of examples (rows): 1413


Unnamed: 0,track_name,artist(s)_name,artist_count,release_date,streams,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%,album,explicit,popularity,duration_in_min
495,Run Rudolph Run - Single Version,Chuck Berry,1,1958-01-01,245350949,152,G,Minor,69,94,71,79,0,7,8,,,,
496,Jingle Bells - Remastered 1999,Frank Sinatra,1,1957-01-01,178660459,175,G#,Major,51,94,34,73,0,10,5,,,,
497,Far,SZA,1,2022-12-09,51641685,116,D,Major,61,48,55,67,0,16,8,,,,
498,On Time (with John Legend),"John Legend, Metro Boomin",2,2022-12-02,78139948,80,F,Minor,33,51,59,76,0,44,6,,,,
499,GAT��,"Maldy, Karol G",2,2022-08-25,322336177,93,B,Minor,63,34,86,26,0,21,39,,,,
500,abcdefu,Gayle,1,2021-08-13,1007612429,122,E,Major,70,42,54,30,0,37,5,,,,
501,Sacrifice,The Weeknd,1,2022-01-07,326792833,122,G,Major,70,91,79,3,0,7,10,,,,
502,Is There Someone Else?,The Weeknd,1,2022-01-07,391251368,135,A,Minor,70,60,58,4,0,16,3,,,,
503,Fingers Crossed,"Lauren Spencer Smith, Lauren Spencer Smith, Lauren Spencer Smith",3,2022-01-05,349585590,109,F,Major,60,45,47,62,0,31,5,,,,
504,Out of Time,The Weeknd,1,2022-01-07,339659802,93,,Minor,65,82,74,27,0,32,5,,,,


## Functions

#### 1 Parameters

- Albums: albums_by_artist, albums_by_genres
- Songs: top_streamed_songs, songs_by_release_date, songs_by_longest_duration, songs_by_danceability, songs_by_explicitness

In [71]:
def albums_by_artist(artist_name: str):
    """
    Returns all album titles by a given artist.
    
    Parameters:
        artist_name (str): The name of the artist.
    
    Returns:
        list[str]: A list of album titles.
    """
    albums = ALBUMS[ALBUMS['artist_name'] == artist_name]
    return albums.to_dict(orient="records")

def top_rated_albums(n: int = 10):
    """
    Returns the top-rated albums based on average rating.

    Parameters:
        n (int): The number of albums to return. Default is 10.
    
    Returns:
        list[dict]: A list of dictionaries representing the top-rated albums.
    """
    top_rated = ALBUMS.sort_values(by='avg_rating', ascending=False).head(n)
    return top_rated.to_dict(orient='records')

def artist_by_album(album_name):
    """
    Returns the artist of an album by its name.

    Parameters:
        album_name (str): The name of the album.
        
    Returns:
        dict: A dictionary with the album name and the artist.
              If the album is not found, the dictionary will be empty.
    """
    artist_dict = {}
    album = ALBUMS[ALBUMS['release_name'] == album_name]
    if not album.empty:
        artist_dict['Album name'] = album_name
        artist_dict['artist'] = album.iloc[0]['artist_name']
    return artist_dict

def albums_by_genres(genres: list[str]):
    """
    Get albums that contain any of the specified genres in 'genres_in'.

    Parameters:
        genres_in (list[str]): A list of genre strings the albums must contain.

    Returns:
        list[dict]: A list of dictionaries representing albums that contain any of the specified genres.
    """
    albums = ALBUMS[ALBUMS['genres'].apply(lambda x: any(genre for genre in genres if genre in x))]
    return albums.to_dict(orient='records')

# --- SONGS ---

def top_streamed_songs(n: int = 10):
    """
    Returns the top-streamed songs.

    Parameters:
        n (int): The number of songs to return. Default is 10.

    Returns:
        list[dict]: A list of dictionaries representing the top-streamed songs.
    """
    top_songs = SONGS.sort_values(by='streams', ascending=False).head(n)
    return top_songs.to_dict(orient='records')

def songs_by_release_date(release_date: str):
    """
    Get all songs released on a specific date, month, or year.

    Parameters:
        release_date (str): Release date to filter by. Can be in YYYY, YYYY-MM, or YYYY-MM-DD format.

    Returns:
        list[dict]: A list of dictionaries representing all songs released on the specified date.
    """
    # Determine the length of the release_date string to understand the format
    date_length = len(release_date)

    if date_length == 4:  # Year format YYYY
        filter_condition = (SONGS['release_date'].str.startswith(release_date))
    elif date_length == 7:  # Month format YYYY-MM
        filter_condition = (SONGS['release_date'].str[:7] == release_date)
    elif date_length == 10:  # Day format YYYY-MM-DD
        filter_condition = (SONGS['release_date'] == release_date)
    else:
        raise ValueError("Invalid date format. Please use YYYY, YYYY-MM, or YYYY-MM-DD.")
    
    filtered_songs = SONGS[filter_condition]
    return filtered_songs.to_dict(orient='records')

def songs_by_longest_duration(n: int = 10):
    """
    Returns the top n songs sorted by longest duration.
    
    Parameters:
        n (int): The number of top songs to return based on duration.
    
    Returns:
        list[dict]: A list of dictionaries representing the top n songs sorted by duration.
    """
    sorted_songs = SONGS.sort_values(by="duration_in_min", ascending=False)
    top_n_songs = sorted_songs.head(n)
    return top_n_songs.to_dict(orient='records')

def artist_by_song(song_name):
    """
    Returns the artist of the song.

    Parameters:
        song_name (str): The name of the song.

    Returns:

        dict: A dictionary with the song name and the artist.

              If the song is not found, the dictionary will be empty.
    """
    artist_dict = {}
    song = SONGS[SONGS['track_name'] == song_name]
    if not song.empty:
        artist_dict['Song'] = song_name
        artist_dict['Artist'] = song.iloc[0]['artist(s)_name']
    return artist_dict

def songs_by_artist(artist_name):
    """
    Returns all songs by a given artist.
    
    Parameters:
        artist_name (str): The name of the artist.
    
    Returns:
        list[dict]: A list of dictionaries representing the songs by the given artist.
    """
    # songs = list(set(SONGS[SONGS['artist(s)_name'].filter == artist_name]["track_name"].to_list()))
    songs = SONGS[SONGS['artist(s)_name'].apply(lambda x:
                                                any(artist_name == potential_name.strip()
                                                    for potential_name in str(x).split(',')))]
    return songs.to_dict(orient="records")

#### 2 Parameters

- Albums: filter_albums_by_date_range, albums_by_genres2, albums_by_date_and_genres, high_rated_albums
- Songs: songs_by_danceability_explicitness

In [56]:
def filter_albums_by_date_range(start_date: str, end_date: str):
    """
    Filters albums within a specified date range (inclusive).

    Parameters:
        start_date (str): Start date. Can be in YYYY, YYYY-MM, or YYYY-MM-DD format.
        end_date (str): End date. Can be in YYYY, YYYY-MM, or YYYY-MM-DD format.

    Returns:
        list[dict]: A list of dictionaries, each representing an album released within the specified date range.
    """
    start_year = pd.to_datetime(start_date)
    
    date_length = len(end_date)
    if date_length == 4:  # Year format YYYY
        end_date = pd.to_datetime(end_date) + pd.offsets.YearEnd()
    elif date_length == 7:  # Month format YYYY-MM
        end_date = pd.to_datetime(end_date) + pd.offsets.MonthEnd()
    elif date_length == 10:  # Day format YYYY-MM-DD
        end_date = pd.to_datetime(end_date)
    else:
        raise ValueError("Invalid date format. Please use YYYY, YYYY-MM, or YYYY-MM-DD.")
    
    ALBUMS['release_date'] = pd.to_datetime(ALBUMS['release_date'])
    filtered_df = ALBUMS[(ALBUMS['release_date'] >= start_year) &
                         (ALBUMS['release_date'] <= end_date)].copy()

    filtered_df['release_date'] = filtered_df['release_date'].astype(str)

    filtered_dict = filtered_df.to_dict('records')
    return filtered_dict

def albums_by_genres2(genres_in: list[str], genres_out: list[str]):
    """
    Get albums that contain any of the specified genres in 'genres_in' and do not contain any of the genres in 'genres_out'.

    Parameters:
        genres_in (list[str]): A list of genre strings the albums must contain.
        genres_out (list[str]): A list of genre strings the albums must not contain.

    Returns:
        list[dict]: A list of dictionaries representing albums that meet the specified genre inclusion and exclusion criteria.
    """
    included_genre_albums = ALBUMS[ALBUMS['genres'].copy().apply(
        lambda x: any(str(genre).lower() in str(x).lower() for genre in genres_in))]

    filtered_albums = included_genre_albums[~included_genre_albums['genres'].apply(
        lambda x: any(str(genre).lower() in str(x).lower() for genre in genres_out))]

    filtered_albums = filtered_albums.map(lambda x: x.strftime(
        '%Y-%m-%d') if isinstance(x, pd.Timestamp) else x)

    return filtered_albums.to_dict(orient='records')

def albums_by_date_and_genres(release_date: str, genres: list[str]):
    """
    Get albums released on a specific date that contain any of the specified genres.

    Parameters:
        release_date (str): The release date of the albums in YYYY-MM-DD format.
        genres (list[str]): A list of genre strings to filter albums by.

    Returns:
        list[dict]: A list of dictionaries representing albums released on the specified date that match any of the specified genres.
    """
    
    date_length = len(release_date)

    ALBUMS['release_date'] = ALBUMS['release_date'].astype(str)

    if date_length == 4:  # Year format YYYY
        filter_condition = (ALBUMS['release_date'].str.startswith(release_date))
    elif date_length == 7:  # Month format YYYY-MM
        filter_condition = (ALBUMS['release_date'].str[:7] == release_date)
    elif date_length == 10:  # Day format YYYY-MM-DD
        filter_condition = (ALBUMS['release_date'] == release_date)
    else:
        raise ValueError("Invalid date format. Please use YYYY, YYYY-MM, or YYYY-MM-DD.")

    filtered_albums = ALBUMS[filter_condition]

    filtered_albums = filtered_albums[filtered_albums['genres'].apply(
        lambda x: any(str(genre).lower() in str(x).lower() for genre in genres))]

    return filtered_albums.to_dict(orient='records')

def high_rated_albums(rating_threshold: float = 4.0, min_ratings: int = 100):
    """
    Returns albums with a rating above a specified threshold and with at least a minimum number of ratings.

    Parameters:
        rating_threshold (float): The minimum average rating for the albums. Default is 4.0.
        min_ratings (int): The minimum number of ratings an album must have. Default is 100.

    Returns:
        list[dict]: A list of dictionaries representing albums that meet the rating threshold and minimum number of ratings.
    """
    filtered_albums = ALBUMS[(ALBUMS['avg_rating'] >= rating_threshold) & (
        ALBUMS['rating_count'] >= min_ratings)]

    return filtered_albums.to_dict(orient='records')

# --- SONGS ---

def top_streamed_songs_by_artist(artist_name: str, n: int = 5):
    """
    Returns the top n streamed songs by a specific artist.

    Parameters:
        artist_name (str): The name of the artist.
        n (int): The number of top-streamed songs to return. Default is 5.

    Returns:
        list[dict]: A list of dictionaries representing the top n streamed songs by the specified artist.
    """
    artist_songs = SONGS[SONGS['artist(s)_name'].str.contains(
        artist_name, case=False, na=False)]
    top_songs = artist_songs.sort_values(by='streams', ascending=False).head(n)

    return top_songs.to_dict(orient='records')

def songs_by_danceability_explicitness(danceability_threshold: float, explicit: bool = True):
    """
    Get songs filtered by their danceability rating and explicitness.

    Parameters:
        danceability_threshold (float): The minimum danceability rating to filter songs by.
        explicit (bool): Flag to filter songs by explicit content. Default is True.

    Returns:
        list[dict]: A list of dictionaries representing songs meeting the danceability threshold and explicitness criteria.
    """
    filtered_songs = SONGS[(SONGS['danceability_%'] > danceability_threshold) & (
        SONGS['explicit'] == explicit)]

    return filtered_songs.to_dict(orient='records')

#### 3 Parameters

- Albums: albums_by_year_genres_and_descriptors, high_rated_reviewed_albums, compare_albums_by_years_from_genres
- Songs: songs_by_danceability_explicitness_speechiness, top_streamed_songs_by_artist_date, unique_albums, 

In [58]:
def albums_by_year_genres_and_descriptors(release_date: str, genres: list[str], descriptors: list[str]):
    """
    Get albums released on a specific date, month, or year, within a genres, and matching specified descriptors.

    Parameters:
        release_date (str): Release date to filter by in YYYY, YYYY-MM, or YYYY-MM-DD format.
        genres (list[str]): Genres to filter by.
        descriptors (list[str]): Descriptors to match.

    Returns:
        list[dict]: A list of dictionaries representing albums by the given criteria.
    """

    date_length = len(release_date)

    if date_length == 4:  # Year format YYYY
        filter_condition = (pd.to_datetime(ALBUMS['release_date']).dt.year == int(release_date))
    elif date_length == 7:  # Month format YYYY-MM
        filter_condition = (ALBUMS['release_date'].str.startswith(release_date))
    elif date_length == 10:  # Day format YYYY-MM-DD
        filter_condition = (pd.to_datetime(ALBUMS['release_date']) == pd.to_datetime(release_date))
    else:
        raise ValueError("Invalid date format. Please use YYYY, YYYY-MM, or YYYY-MM-DD.")

    filtered_albums = ALBUMS[
        filter_condition &
        (ALBUMS['genres'].apply(lambda x: any(str(g).lower() in str(x).lower() for g in genres))) &
        (ALBUMS['descriptors'].apply(lambda x: any(str(d).lower() in str(x).lower() for d in descriptors)))
    ]

    return filtered_albums.to_dict(orient='records')


#def albums_by_artist_and_genres_descriptors(artist_name: str, genres: list[str], descriptors: list[str]):
    """
    Get albums by a specific artist that contain any of the specified genres and match any of the given descriptors.

    Parameters:
        artist_name (str): The name of the artist.
        genres (list[str]): A list of genre strings to filter albums by.
        descriptors (list[str]): A list of descriptor strings to filter albums by.

    Returns:
        list[dict]: A list of dictionaries representing albums by the specified artist that match any of the specified genres and descriptors.
    """
    artist_albums = ALBUMS[ALBUMS['artist_name'] == artist_name]

    genre_filtered_albums = artist_albums[artist_albums['genres'].apply(
        lambda x: any(genre in x for genre in genres))]

    final_filtered_albums = genre_filtered_albums[genre_filtered_albums['descriptors'].apply(
        lambda x: any(descriptor in x for descriptor in descriptors))]

    return final_filtered_albums.to_dict(orient='records')

def high_rated_reviewed_albums(rating_threshold: float, min_ratings: int, review_threshold: int):
    """
    Returns albums with a rating above a specified threshold, with at least a minimum number of ratings, and a minimum number of reviews.

    Parameters:
        rating_threshold (float): The minimum average rating for the albums.
        min_ratings (int): The minimum number of ratings an album must have.
        review_threshold (int): The minimum number of reviews an album must have.

    Returns:
        list[dict]: A list of dictionaries representing albums that meet the rating threshold, minimum number of ratings, and minimum number of reviews.
    """
    filtered_albums = ALBUMS[(ALBUMS['avg_rating'] >= rating_threshold) &
                             (ALBUMS['rating_count'] >= min_ratings) &
                             (ALBUMS['review_count'] >= review_threshold)]

    return filtered_albums.to_dict(orient='records')

#def compare_albums_by_years_from_genres(year1: str, year2: str, genres: list[str]):
    """
    Compares albums from two different years filtered by a list of genres.

    Parameters:
        year1 (str): The first year for filtering albums.
        year2 (str): The second year for filtering albums.
        genres (list[str]): A list of genre strings to filter albums by. If empty list, no genre filtering is applied!!!

    Returns:
        list[dict]: A list of dictionaries representing albums released in the specified years and filtered by the specified genres.
    """

    albums_year1 = filter_albums_by_date_range(
        year1 + '-01-01', year1 + '-12-31')
    albums_year2 = filter_albums_by_date_range(
        year2 + '-01-01', year2 + '-12-31')

    df_year1 = pd.DataFrame(albums_year1)
    df_year2 = pd.DataFrame(albums_year2)

    merged_albums = pd.concat([df_year1, df_year2])

    if len(genres) > 0:
        filtered_albums = merged_albums[merged_albums['genres'].apply(
            lambda x: any(genre.lower() in (g.lower() for g in x) for genre in genres))]
        return filtered_albums.to_dict(orient='records')

    return merged_albums.to_dict(orient='records')

# # --- SONGS ---

def songs_by_danceability_explicitness_speechiness(danceability_threshold: float, speechiness_threshold: float, explicit: bool = True):
    """
    Get songs filtered by their danceability rating, speechiness rating, and explicitness.

    Parameters:
        danceability_threshold (float): The minimum danceability rating to filter songs by.
        speechiness_threshold (float): The minimum speechiness rating to filter songs by.
        explicit (bool): Flag to filter songs by explicit content. Default is True.

    Returns:
        list[dict]: A list of dictionaries representing songs meeting the danceability, speechiness threshold, and explicitness criteria.
    """
    filtered_songs = SONGS[
        (SONGS['danceability_%'] > danceability_threshold) &
        (SONGS['speechiness_%'] > speechiness_threshold) &
        (SONGS['explicit'] == explicit)
    ]

    return filtered_songs.to_dict(orient='records')

def top_streamed_songs_by_artist_date(artist_name: str, release_date: str, n: int = 5):
    """
    Returns the top n streamed songs by a specific artist, released on a specific date, month, or year.

    Parameters:
        artist_name (str): The name of the artist.
        release_date (str): Release date to filter by. Can be in YYYY, YYYY-MM, or YYYY-MM-DD format.
        n (int): The number of top-streamed songs to return. Default is 5.

    Returns:
        list[dict]: A list of dictionaries representing the top n streamed songs by the specified artist, released on the specified date.
    """
    date_length = len(release_date)

    artist_songs = SONGS[SONGS['artist(s)_name'] == artist_name]

    if date_length == 4:  # Year format YYYY
        filter_condition = (
            artist_songs['release_date'].str.startswith(release_date))
    elif date_length == 7:  # Month format YYYY-MM
        filter_condition = (
            artist_songs['release_date'].str[:7] == release_date)
    elif date_length == 10:  # Day format YYYY-MM-DD
        filter_condition = (artist_songs['release_date'] == release_date)
    else:
        raise ValueError(
            "Invalid date format. Please use YYYY, YYYY-MM, or YYYY-MM-DD.")

    filtered_songs = artist_songs[filter_condition].sort_values(
        by='streams', ascending=False)

    top_songs = filtered_songs.head(n)
    return top_songs.to_dict(orient='records')

def unique_albums(genres_threshold: int, artist_count_threshold: int, max_rating_threshold: float):
    """
    Retrieves albums that are notable for their genre diversity and number of collaborating artists, yet have average ratings below a specified threshold.

    Parameters:
        genres_threshold (int): Minimum genres an album must have.
        artist_count_threshold (int): Minimum number of artists on an album.
        max_rating_threshold (float): Maximum average rating for an album.

    Returns: 
        list[dict]: Albums meeting the specified criteria for genre diversity, artist collaboration, and average rating.
    """
    ALBUMS['artist_count'] = ALBUMS['artist_name'].apply(lambda x: len(x.split(', ')))

    filtered_albums = ALBUMS[
        (ALBUMS['genres'].apply(lambda x: len(x.split(', '))) > genres_threshold) & 
        (ALBUMS['artist_count'] >= artist_count_threshold) & 
        (ALBUMS['avg_rating'] < max_rating_threshold)
    ]
    return filtered_albums.to_dict(orient='records')

#### 4 Parameters

- Albums: albums_by_dates_genres_rating, high_rated_reviewed_albums_by_date
- Songs: top_streamed_songs_by_artist_date_range, speechiness_songs, instrumental_songs

In [95]:
pd.set_option('display.max_colwidth', 100)

pd.DataFrame(speechiness_songs(speechiness=30, energy=30,
             explicit=True, bpm=120, threshold=24))

Unnamed: 0,track_name,artist(s)_name,artist_count,release_date,streams,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%,album,explicit,popularity,duration_in_min
0,Nightmares (feat. Byron Messia),"Chris Brown, Byron Messia",2,2023-11-10,,101,F#,Minor,65,31,47,27,0,10,18,11:11,True,67.0,2.516617
1,Views,Chris Brown,1,2023-11-10,,143,F,Major,72,40,54,5,0,16,9,11:11,True,63.0,2.50285
2,Don't Hurt Me,YoungBoy Never Broke Again,1,2023-11-10,,130,A,Major,62,36,43,16,0,13,24,Decided 2,True,57.0,3.569233
3,Fuck You Too,Kodak Black,1,2023-11-10,,130,E,Minor,82,16,54,16,0,9,24,When I Was Dead (Original Version),True,51.0,3.44615
4,Scared Of My Money,Kodak Black,1,2023-11-10,,121,E,Minor,63,7,36,9,0,13,40,When I Was Dead (Original Version),True,50.0,2.229517
5,Big Fish,"Larry June, Cardo, Alemán",3,2023-11-10,,96,E,Minor,76,11,53,9,47,15,13,The Night Shift,True,51.0,2.7516


In [89]:
def albums_by_dates_genres_rating(start_date: str, end_date: str, genre_in: list[str], genre_out: list[str], min_rating: float):
    """
    Retrieves albums released within a specific date range, filtered by included and excluded genres, and having an average rating above a specified threshold.

    Parameters:
        start_date (str): The start date of the date range in YYYY-MM-DD format.
        end_date (str): The end date of the date range in YYYY-MM-DD format.
        genre_in (list[str]): List of genres the albums must include.
        genre_out (list[str]): List of genres the albums must not include.
        min_rating (float): Minimum average rating for the albums.

    Returns:
        list[dict]: A list of dictionaries representing albums that meet the specified criteria.
    """
    start_date_dt = pd.to_datetime(start_date)
    end_date_dt = pd.to_datetime(end_date)

    date_filtered_albums = ALBUMS[(pd.to_datetime(ALBUMS['release_date']) >= start_date_dt) &
                                  (pd.to_datetime(ALBUMS['release_date']) <= end_date_dt)]

    genre_filtered_albums = date_filtered_albums[
        date_filtered_albums['genres'].apply(lambda x: any(str(genre).lower() in str(x).lower() for genre in genre_in)) &
        ~date_filtered_albums['genres'].apply(
            lambda x: any(str(genre).lower() in str(x).lower() for genre in genre_out))
    ]

    final_filtered_albums = genre_filtered_albums[genre_filtered_albums['avg_rating'] >= min_rating]

    return final_filtered_albums.to_dict(orient='records')

def high_rated_reviewed_albums_by_date(rating_threshold: float, min_ratings: int, review_threshold: int, release_date: str):
    """
    Retrieves albums with a rating above a specified threshold, a minimum number of ratings, a minimum number of reviews, and released on a specified date.

    Parameters:
        rating_threshold (float): The minimum average rating for the albums.
        min_ratings (int): The minimum number of ratings an album must have.
        review_threshold (int): The minimum number of reviews an album must have.
        release_date (str): The release date of the albums in YYYY, YYYY-MM, or YYYY-MM-DD format.

    Returns:
        list[dict]: A list of dictionaries representing albums that meet the rating, number of ratings, number of reviews criteria, and are released on the specified date.
    """
    release_date_dt = pd.to_datetime(release_date)

    date_length = len(release_date)

    if date_length == 4:  # Year format YYYY
        filter_condition = (pd.to_datetime(ALBUMS['release_date']).dt.year == int(release_date))
    elif date_length == 7:  # Month format YYYY-MM
        filter_condition = (ALBUMS['release_date'].str.startswith(release_date))
    elif date_length == 10:  # Day format YYYY-MM-DD
        filter_condition = (pd.to_datetime(ALBUMS['release_date']) == release_date_dt)
    else:
        raise ValueError("Invalid date format. Please use YYYY, YYYY-MM, or YYYY-MM-DD.")

    filtered_albums = ALBUMS[
        filter_condition &
        (ALBUMS['avg_rating'] >= rating_threshold) &
        (ALBUMS['rating_count'] >= min_ratings) &
        (ALBUMS['review_count'] >= review_threshold)
    ]

    return filtered_albums.to_dict(orient='records')

# --- SONGS ---

def top_streamed_songs_by_artist_date_range(artist_name: str, start_date: str, end_date: str, n: int = 5):
    """
    Returns the top n streamed songs by a specific artist, released within a specified date range.

    Parameters:
        artist_name (str): The name of the artist.
        start_date (str): The start date of the date range in YYYY-MM-DD format.
        end_date (str): The end date of the date range in YYYY-MM-DD format.
        n (int): The number of top-streamed songs to return. Default is 5.

    Returns:
        list[dict]: A list of dictionaries representing the top n streamed songs by the specified artist, released within the specified date range.
    """
    start_date_dt = pd.to_datetime(start_date)
    end_date_dt = pd.to_datetime(end_date)

    artist_songs = SONGS[SONGS['artist(s)_name'] == artist_name].copy()

    artist_songs['release_date'] = pd.to_datetime(artist_songs['release_date'])

    # Filter songs within the date range
    date_range_songs = artist_songs[(artist_songs['release_date'] >= start_date_dt) & 
                                    (artist_songs['release_date'] <= end_date_dt)]

    top_songs = date_range_songs.sort_values(by='streams', ascending=False).head(n)

    return top_songs.to_dict(orient='records')

def speechiness_songs(speechiness: float, energy: float, explicit: bool, bpm: int, threshold: int = 20):
    """
    Retrieves songs with specific speechiness and energy scores, explicit content, and within a BPM range +/- the threshold.

    Parameters:
        speechiness (float): Target threshold for speechiness.
        energy (float): Target threshold for energy.
        explicit (bool): Flag for filtering by explicit content.
        bpm (int): Beats per minute to filter albums by.
        threshold (int): Range value to determine the acceptable deviation from the speechiness and energy thresholds.

    Returns:
        list[dict]: A list of dictionaries representing songs that meet the speechiness, energy, explicit, and BPM criteria within the specified range.
    """
    filtered_albums = SONGS[
        (SONGS['speechiness_%'] >= speechiness - threshold) & (SONGS['speechiness_%'] <= speechiness + threshold) &
        (SONGS['energy_%'] >= energy - threshold) & (SONGS['energy_%'] <= energy + threshold) &
        (SONGS['bpm'] >= bpm - threshold) & (SONGS['bpm'] <= bpm + threshold) &
        (SONGS['explicit'] == explicit)
    ]

    return filtered_albums.to_dict(orient='records')

def instrumental_songs(instrumentalness: float, valence: float, danceability: float, bpm: int, threshold: int):
    """
    Retrieves songs with specific instrumentalness, valence, danceability, and within a BPM range +/- the threshold.

    Parameters:
        instrumentalness (float): Target threshold for instrumentalness.
        valence (float): Target threshold for valence.
        danceability (float): Target threshold for danceability.
        bpm (int): Beats per minute to filter songs by.
        threshold (int): Range value to determine the acceptable deviation from the instrumentalness, valence, and danceability thresholds.

    Returns:
        list[dict]: A list of dictionaries representing songs that meet the instrumentalness, valence, danceability, and BPM criteria within the specified range.
    """
    # Apply filter for instrumentalness, valence, danceability, and BPM
    filtered_songs = SONGS[
        (SONGS['instrumentalness_%'] >= instrumentalness - threshold) & (SONGS['instrumentalness_%'] <= instrumentalness + threshold) &
        (SONGS['valence_%'] >= valence - threshold) & (SONGS['valence_%'] <= valence + threshold) &
        (SONGS['danceability_%'] >= danceability - threshold) & (SONGS['danceability_%'] <= danceability + threshold) &
        (SONGS['bpm'] >= bpm - threshold) & (SONGS['bpm'] <= bpm + threshold)
    ]

    return filtered_songs.to_dict(orient='records')

#### Metadata

In [12]:
def filter_functions(functions_list, function_metadata):
    function_names = [func.__name__ for func in functions_list]
    filtered_metadata = [meta for meta in function_metadata if meta.get('name') in function_names]
    return functions_list, filtered_metadata

def describe_function(available_functions):
    return [meta["description"] for meta in available_functions[1]]

## LLM

In [13]:
from enum import Enum
from openai.openai_object import OpenAIObject

class Role(Enum):
    ASSISTANT = "assistant"
    FUNCTION = "function"
    SYSTEM = "system"
    USER = "user"
    
class Model(Enum):
    GPT3 = "gpt-3.5-turbo-1106"
    GPT4 = "gpt-4-0613"
    GPT4_new = "gpt-4-1106-preview"

class FunctionNotFoundError(Exception):
    def __init__(self, function_name, function_args):
        self.function_name = function_name
        self.function_args = function_args
        super().__init__(f"Error finding function {function_name} with arguments {function_args}")

class FunctionExecutionError(Exception):
    def __init__(self, function_name, function_args):
        self.function_name = function_name
        self.function_args = function_args
        super().__init__(f"Error executing function {function_name} with arguments {function_args}")


class Response:
    def __init__(self, message:dict):
        self._message = message
        
    @classmethod
    def from_api(cls, openai_response:OpenAIObject):
        _message = openai_response["choices"][0]["message"]
        return cls(_message)
    
    @property
    def message(self) -> str:
        return self._message["content"]

    @property
    def role(self) -> str:
        return self._message["role"]

    @property
    def function(self) -> dict:
        return self._message.get("function_call")

    @property
    def is_function_call(self) -> bool:
        return self.function is not None
    
    def to_dict(self) -> dict:
        return {**self._message}
    
    def __str__(self):
        return f"Response({self.role}: {self.message}, with function:{self.function})"


class Conversation:
    def __init__(self):
        self._messages = []

    @property
    def messages_as_dicts(self):
        return [message.to_dict() for message in self._messages]

    def send(self, model, functions, temperature=1) -> Response:

        args = {"model":model,
                "temperature": temperature, 
                "messages":self.messages_as_dicts}
        
        if len(functions) > 0:
            args.update({"function_call":"auto",
                         "functions":functions})
            
        response = openai.ChatCompletion.create(**args)
        return Response.from_api(response)

    def add(self, message_or_response):
        if isinstance(message_or_response, Response):
            message = message_or_response
        else:
            message = Response(message_or_response)

        self._messages.append(message)        
        return self
    
    def __str__(self):
        return f"{self.messages_as_dicts}"
    

def handle_function(function:dict, functions) -> json:
    """Invoke function and return result"""
    function_name, function_args = function["name"], json.loads(function["arguments"])
    
    def get_function_by_name():
        for func in functions:
            if func.__name__ == function_name:
                return func
        return None
    
    function_to_call = get_function_by_name()
    if function_to_call is not None:
        try:
            result = json.dumps(function_to_call(**function_args))
            return result
        except:
            raise FunctionExecutionError(function_name, function_args)
    else:
        raise FunctionNotFoundError(function_name, function_args)


# TODO
def handle_error(error, retry):
    raise error


max_iterations = 10
def chat(conversation:Conversation, model:str, functions:tuple, temperature:float) -> str:
    iteration = 0
    while iteration < max_iterations:
        iteration +=1
        response = conversation.send(model, functions[1], temperature)
        conversation.add(response)
        
        if response.is_function_call:
            try:
                result = handle_function(response.function, functions[0])
                conversation.add({"role": Role.FUNCTION.value, "content": result, "name":response.function["name"]})
            except (FunctionNotFoundError, FunctionExecutionError) as error:
                handle_error(error=error, retry=False)
        else:
            return response.message
        

# def handle_function_on_server(function):
#     function_name, function_args = function["name"], json.loads(
#         function["arguments"])
    
#     import requests
    
#     URL = f"http://localhost:5000/function_call/{function_name}?"
    
#     response = requests.get(URL, params=function_args)
#     if response.ok:
#         data = response.json()
#         return data["result"]
#     else:
#         raise FunctionExecutionError # TODO


# PORT = 5000
# def chat_web(conversation: Conversation, model: str, function_metadata: list) -> str:
#     iteration = 0
#     while iteration < max_iterations:
#         iteration += 1
#         response = conversation.send(model, function_metadata)
#         conversation.add(response)

#         if response.is_function_call:
#             try:
#                 result = handle_function_on_server(response.function)
#                 conversation.add(
#                     {"role": Role.FUNCTION.value, "content": result, "name": response.function["name"]})
#             except (FunctionNotFoundError, FunctionExecutionError) as error:
#                 handle_error(error=error, retry=False)
#         else:
#             return response.message

##### Prepare Test

In [43]:
all_functions = [top_streamed_songs, songs_by_release_date, songs_by_longest_duration,
                 albums_by_artist, songs_by_danceability_explicitness, filter_albums_by_date_range,
                 albums_by_genres2, albums_by_date_and_genres, high_rated_albums, 
                 songs_by_danceability_explicitness_speechiness, top_streamed_songs_by_artist_date, 
                 unique_albums, albums_by_year_genres_and_descriptors, high_rated_reviewed_albums, 
                 top_streamed_songs_by_artist_date_range, speechiness_songs, instrumental_songs, 
                 albums_by_dates_genres_rating, high_rated_reviewed_albums_by_date]

In [46]:
from itertools import zip_longest

QUESTIONS_FILE_PATH = os.getcwd() + "\questions copy.json"
TEST_CONFIG_FILE_PATH = os.getcwd() + r"\test_config.json"
OUTPUT_FILE_PATH = os.getcwd() + "\output_test1.json"

def process_question(question, model, available_functions, hyperparameters):
    conversation = Conversation()
    conversation.add({"role": Role.SYSTEM.value, "content": prompt})
    conversation.add({"role": Role.USER.value, "content": question})

    try:
        chat(conversation, model=model,
             functions=available_functions, temperature=hyperparameters["temperature"])
        
        final_response = conversation.messages_as_dicts[-1]["content"] # TODO: Exchange "" with ' in regex
        final_response = re.sub('"',"", final_response, flags=re.M)
    
    except (ValueError, FunctionExecutionError, FunctionNotFoundError) as e:
        print(f"Failed to answer due to error: {e}")
        final_response = None

    function_calls = [message.get("function_call") for message in conversation.messages_as_dicts]
    function_names = [func_call["name"] for func_call in function_calls if func_call]
    function_arguments = [json.loads(func_call["arguments"]) for func_call in function_calls if func_call]

    return {
        "final_response": final_response,
        "function_names": function_names,
        "function_arguments": function_arguments
    }

def eval_parameters(called_parameters: list[dict], correct_paths: list[dict]):
    best_matches = 0
    best_status = "Incorrect"

    # Edge Case: We expect no function to be called (and model calls none)    
    if len(called_parameters) == 0 and len(correct_paths) == 1 and len(correct_paths[0]["parameters"]) == 0:
        best_matches = 0
        best_status = "Correct"
    
    for path in correct_paths:
        correct_parameters = path["parameters"]
        if len(correct_parameters) == 0: continue
        
        matches = 0
        for called, correct in zip_longest(called_parameters, correct_parameters):
        
            common_keys = called.keys() & correct.keys()
            for key in common_keys:
                if called[key] == correct[key]:
                    matches+=1
                    
        if matches > best_matches:
            best_matches = matches
            
        if best_status == "Incorrect" or best_status == "Partially Correct":
            distinct_keys = set(called.keys()).symmetric_difference(set(correct.keys()))
            if len(distinct_keys) > 0 and matches > 0:
                best_status = "Partially Correct"
            elif len(distinct_keys) == 0:
                best_status = "Correct"
                    
    return best_matches, best_status
    
def eval_functions(called_functions: list[str], correct_paths: list[dict]):
    best_status = "Incorrect"
    best_match_count = 0
    
    for path in correct_paths:
        correct_functions = path["functions"]
        match_count = sum(1 for a, b in zip(called_functions, correct_functions) if a == b)

        if match_count > best_match_count:
            best_match_count = match_count
            if match_count == len(called_functions) == len(correct_functions):
                best_status = "Correct"
            elif match_count > 0:
                best_status = "Partly Correct"

    return best_match_count, best_status

# read files
with open(TEST_CONFIG_FILE_PATH) as f:
    config = json.load(f)
    
with open(QUESTIONS_FILE_PATH) as f:
    runnable_sets = json.load(f)

with open(OUTPUT_FILE_PATH) as f:
    output = json.load(f)

# extract data
question_set = [question_set for question_set in runnable_sets["question_sets"] 
                if question_set["id"] == config["question_set"]][0]
function_set = [function_set for function_set in runnable_sets["function_sets"]
                if function_set["id"] == config["function_set"]][0]

prompt, model, hyperparameters = config["prompt"], config["model"], config["hyperparameters"]
available_functions = filter_functions(all_functions, function_set["functions"])

results = []
statistics = {
    "number_of_functions": {
        "total": 0,
        "correct": 0,
        "accuracy": 0.0
    },
    "number_of_parameters": {
        "total": 0,
        "correct": 0,
        "accuracy": 0.0
    },
    "number_of_answers": {
        "total": 0,
        "correct": 0,
        "accuracy": 0.0
    },
    "number_of_tokens": {
        "input": 0,
        "output": 0
    }
}

# run tests
for question in tqdm(question_set["questions"], desc="Test Run", unit="Question"):
    result = process_question(question["question"], model, available_functions, hyperparameters)

    # analyze results
    correct_functions, function_eval = eval_functions(result["function_names"], question["target"]["solution_paths"])
    correct_parameters, parameter_eval = eval_parameters(result["function_arguments"], question["target"]["solution_paths"])
    answer_eval = "<<UNKNOWN - PLEASE INSERT YOURSELF>>"  # TODO: Correct, PartlyCorrect, Incorrect
    
    overall_match = True if (function_eval == "Correct" and parameter_eval == "Correct" and answer_eval == "Correct") else False
    error_category = None
    
    output_result = {
        "question_id": question["id"],
        "category": question["category"],
        "question": question["question"],
        "overall_match": overall_match,
        "error_category": error_category,
        "correct_paths": question["target"]["solution_paths"],
        "model_solution": {
            "functions": result["function_names"],
            "parameters": result["function_arguments"],
            "answer": result["final_response"]
        },
        "correct_answers": question["target"]["expected_answers"],
        "evaluation": {
            "functions": function_eval,
            "parameters": parameter_eval,
            "answer": answer_eval
        }
    }
    results.append(output_result)
    
    # statistics
    total_functions = len(result["function_names"] if function_eval == "Correct" else question["target"]["solution_paths"][0]["functions"])
    total_parameters = len(result["function_names"] if function_eval == "Correct" else question["target"]["solution_paths"][0]["parameters"])
    correct_answer = 1 if answer_eval == "Correct" else 0
    
    statistics["number_of_functions"]["total"] += total_functions
    statistics["number_of_functions"]["correct"] += correct_functions
    statistics["number_of_functions"]["accuracy"] = (statistics["number_of_functions"]["correct"] / statistics["number_of_functions"]["total"])
    
    statistics["number_of_parameters"]["total"] += total_parameters
    statistics["number_of_parameters"]["correct"] += correct_parameters
    statistics["number_of_parameters"]["accuracy"] = (statistics["number_of_parameters"]["correct"] / statistics["number_of_parameters"]["total"])

    statistics["number_of_answers"]["total"] += 1
    statistics["number_of_answers"]["correct"] += correct_answer
    statistics["number_of_answers"]["accuracy"] = (statistics["number_of_answers"]["correct"] / statistics["number_of_answers"]["total"])


metadata = {
    "id": config["id"],
    "name": config["name"],
    "question_set_id": question_set["id"],
    "function_set_id": function_set["id"],
    "model": config["model"],
    "hyperparameters": config["hyperparameters"],
    "run_timestamp": str(datetime.now()),
    "test_driver_version": "1.0.0"
}

run = {
    "metadata": metadata,
    "statistics": statistics,
    "results": results
}
output["runs"].append(run)

with open(OUTPUT_FILE_PATH, "w") as f:
    f.write(json.dumps(output))

Test Run: 100%|██████████| 2/2 [00:11<00:00,  5.76s/Question]


## Benchmark

In [88]:
all_functions = []
# all_functions = [albums_by_artist, albums_by_genres, top_streamed_songs, songs_by_release_date,

#                  songs_by_longest_duration, songs_by_danceability, songs_by_explicitness]


all_functions = [albums_by_date_and_genres]
# all_functions += [filter_albums_by_date_range, albums_by_genres2,
#                   albums_by_date_and_genres, high_rated_albums, songs_by_danceability_explicitness]


with open('functions.json', 'r') as file:

    function_metadata = json.load(file)
    

TEMPERATURE = 0

In [89]:
print("Functions: " + str(len(all_functions)))
print(all_functions)

print()
print("Metadata: " + str(len(function_metadata)))
for meta_data in function_metadata:
    print(f"{meta_data['name']}: {meta_data['description']}")

Functions: 1
[<function albums_by_date_and_genres at 0x000001B9FF9F79C0>]

Metadata: 23
albums_by_artist: Returns all albums by a given artist.
albums_by_genres: Returns all albums that contain any of the genres specified in the provided genres list.
top_streamed_songs: Returns the n most streamed songs.
songs_by_release_date: Retrieves all songs released on a specific date, in a specific month, or in a specific year, based on the provided release date.
songs_by_longest_duration: Returns the top 'n' songs sorted by their duration, from longest to shortest.
songs_by_danceability: Returns the top 10 songs sorted by streams that have a danceability rating above a specified threshold percentage.
songs_by_explicitness: Retrieves the top 10 songs filtered by their explicit content, sorted by a metric such as streams or popularity.
filter_albums_by_date_range: Filters and retrieves albums released within a specified date range. The range is inclusive of the start and end dates.
albums_by_genr

#### Functions

In [90]:
# df = pd.DataFrame(albums_by_genres(genres=["Wonky"]))
# # df['genres'] = df['genres'].str.split(',')
# # exploded_genres = df.explode('genres')

# # exploded_genres
pd.set_option('display.max_colwidth', 100)
# df["genres"]

# # pd.DataFrame(filter_albums_by_date_range(start_date="2000", end_date="2001"))


pd.DataFrame(albums_by_date_and_genres(
    release_date="2023", genres=["adult", "television"]))

# pd.DataFrame(albums_by_genres2(genres_in=["Wonky"], genres_out=["Deconstructed Club"]))

Unnamed: 0,release_name,artist_name,release_date,genres,descriptors,avg_rating,rating_count,review_count
0,Autumn Variations,Ed Sheeran,2023-09-29,"Singer-Songwriter, Folk Pop,Adult Contemporary, Indie Folk","autumn, male vocalist, introspective, longing, melancholic, optimistic, mellow, anxious, warm, m...",2.5,405,6.0
1,Bless This Mess,U.S. Girls,2023-02-24,"Pop Soul, Synthpop,Electro-Disco, Minneapolis Sound, Adult Contemporary","female vocalist, energetic, rhythmic, humorous, quirky, eclectic, rebellious, lush, mellow, hypn...",2.7,1076,14.0
2,TRUSTFALL,P!nk,2023-02-17,"Pop, Adult Contemporary, Pop Rock, Dance-Pop, Adult Contemporary","emale vocalist, introspective, longing, melodic, passionate, optimistic",2.29,422,9.0
3,The Idol Episode 5 Part 2,The Weeknd & Lily-Rose Depp / Troye Sivan / Suzanna Son Television Soundtracks,2023-07-03,"Television Music, Alternative R&B, Alt-Pop, Adult Contemporary, Ambient Pop",,2.48,84,
4,The Idol Episode 5 Part 1,The Weeknd,2023-06-30,"Trap, Alternative R&B, Television Music, Synthpop, Progressive Electronic, Trap Soul",male vocalist,3.07,96,1.0
5,The Idol Episode 4,The Weeknd,2023-06-23,"Television Music, Alternative R&B, Art Pop, Alt-Pop, Synthpop, Progressive Electronic","female vocalist, male vocalist",2.87,122,1.0
6,The Idol Episode 3,The Weeknd / Moses Sumney,2023-06-19,"Television Music, Alternative R&B, Ambient Pop, Synthpop, Progressive Electronic, Contemporary R...","melodic, male vocalist",2.98,115,
7,The Idol Episode 2,The Weeknd / Mike Dean / Suzanna Son Television Soundtracks,2023-06-12,"Television Music, Alternative R&B, Progressive Electronic, Synthpop, Dark Jazz, Adult Contempora...",,2.81,119,
8,The Idol Episode 1,The Weeknd & Mike Dean / Lily Rose Depp Television Soundtracks,2023-06-09,"Television Music, Dance-Pop, Cinematic Classical, Alt-Pop, Progressive Electronic, Synthpop","male vocalist, female vocalist, sexual",2.39,153,
9,Chemistry,Kelly Clarkson,2023-06-23,"Pop Soul, Pop Rock, Singer-Songwriter, Adult Contemporary, Synthpop, Alt-Pop, Country Pop, Singe...","female vocalist, breakup, passionate, sentimental, anthemic, melodic",2.79,210,3.0


##### 1. Parameter

In [91]:
pd.set_option('display.float_format', '{:.2f}'.format)

df = pd.DataFrame(albums_by_genres(genres=["Wonky"]))

df['genres'] = df['genres'].str.split(',')
exploded_genres = df.explode('genres')
# len(exploded_genres["genres"].to_list())
len(exploded_genres["genres"].drop_duplicates())

28

In [92]:
available_functions = filter_functions(all_functions, function_metadata)

conversation = Conversation()
conversation.add({"role": Role.SYSTEM.value, "content": "Answer briefly."}) \
            .add({"role": Role.USER.value, "content": "What adult, television songs but without the trap genre was released between 20. Juni 2023 to 2. July 2023?"})

result = chat(conversation, model=Model.GPT4.value,
              functions=available_functions, temperature=TEMPERATURE)
print(result)

FunctionExecutionError: Error executing function albums_by_date_and_genres with arguments {'release_date': '2023-06-20/2023-07-02', 'genres': ['adult', 'television']}

In [93]:
pprint(conversation.messages_as_dicts)

[{'content': 'Answer briefly.', 'role': 'system'},
 {'content': 'What adult, television songs but without the trap genre was '
             'released between 20. Juni 2023 to 2. July 2023?',
  'role': 'user'},
 {'content': None,
  'function_call': <OpenAIObject at 0x1b98deefdd0> JSON: {
  "name": "albums_by_date_and_genres",
  "arguments": "{\n  \"release_date\": \"2023-06-20/2023-07-02\",\n  \"genres\": [\"adult\", \"television\"]\n}"
},
  'role': 'assistant'}]
