In [1]:
# Install libraries required
!pip install --upgrade --quiet langchain langchain-community langchain-openai faiss-cpu

In [2]:
# import libraries required
import getpass
import os
from langchain.chains import create_sql_query_chain
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
from langchain_community.utilities import SQLDatabase
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI
from operator import itemgetter

In [3]:
# connect to chat gpt
if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass()

In [4]:
# connect and test database
db = SQLDatabase.from_uri("sqlite:///spotify.db")

print(db.dialect)
print(db.get_usable_table_names())
db.run("SELECT * FROM top_streams LIMIT 1;")
print("\nColumn names:")
db.run("SELECT name FROM PRAGMA_TABLE_INFO('top_streams');")

sqlite
['top_streams']

Column names:


"[('Track',), ('Album Name',), ('Artist',), ('Release Date',), ('ISRC',), ('All Time Rank',), ('Track Score',), ('Spotify Streams',), ('Spotify Playlist Count',), ('Spotify Playlist Reach',), ('Spotify Popularity',), ('YouTube Views',), ('YouTube Likes',), ('TikTok Posts',), ('TikTok Likes',), ('TikTok Views',), ('YouTube Playlist Reach',), ('Apple Music Playlist Count',), ('AirPlay Spins',), ('SiriusXM Spins',), ('Deezer Playlist Count',), ('Deezer Playlist Reach',), ('Amazon Playlist Count',), ('Pandora Streams',), ('Pandora Track Stations',), ('Soundcloud Streams',), ('Shazam Counts',), ('TIDAL Popularity',), ('Explicit Track',)]"

In [5]:
# choose model
llm = ChatOpenAI(model="gpt-3.5-turbo")

In [6]:
# chain the response with the query
execute_query = QuerySQLDataBaseTool(db=db)
write_query = create_sql_query_chain(llm, db)
chain = write_query | execute_query
chain.invoke({"question": "How many tracks are there"})

'[(4600,)]'

In [7]:
# prompt
answer_prompt = PromptTemplate.from_template(
"""You are an agent that uses data from a Spotify Database about the most streamed songs on spotify in 2024.
Here is a description of each column;


Track Name: Name of the song.
Album Name: Name of the album the song belongs to.
Artist: Name of the artist(s) of the song.
Release Date: Date when the song was released.
ISRC: International Standard Recording Code for the song.
All Time Rank: Ranking of the song based on its all-time popularity.
Track Score: Score assigned to the track based on various factors.
Spotify Streams: Total number of streams on Spotify.
Spotify Playlist Count: Number of Spotify playlists the song is included in.
Spotify Playlist Reach: Reach of the song across Spotify playlists.
Spotify Popularity: Popularity score of the song on Spotify.
YouTube Views: Total views of the song's official video on YouTube.
YouTube Likes: Total likes on the song's official video on YouTube.
TikTok Posts: Number of TikTok posts featuring the song.
TikTok Likes: Total likes on TikTok posts featuring the song.
TikTok Views: Total views on TikTok posts featuring the song.
YouTube Playlist Reach: Reach of the song across YouTube playlists.
Apple Music Playlist Count: Number of Apple Music playlists the song is included in.
AirPlay Spins: Number of times the song has been played on radio stations.
SiriusXM Spins: Number of times the song has been played on SiriusXM.
Deezer Playlist Count: Number of Deezer playlists the song is included in.
Deezer Playlist Reach: Reach of the song across Deezer playlists.
Amazon Playlist Count: Number of Amazon Music playlists the song is included in.
Pandora Streams: Total number of streams on Pandora.
Pandora Track Stations: Number of Pandora stations featuring the song.
Soundcloud Streams: Total number of streams on Soundcloud.
Shazam Counts: Total number of times the song has been Shazamed.
TIDAL Popularity: Popularity score of the song on TIDAL.
Explicit Track: Indicates whether the song contains explicit content.
    
    
Given the following user question, corresponding SQL query, and SQL result, answer the user question in a human friendly tone. 
Take each column description into consideration when making a decision.

Question: {question}
SQL Query: {query}
SQL Result: {result}
Answer: """
)

In [8]:
# chain

# execute_query = QuerySQLDataBaseTool(db=db)
# # create_sql_query_chain to have llm generate sql query from natural language
# write_query = create_sql_query_chain(llm, db)

chain = (
    RunnablePassthrough.assign(query=write_query).assign(
        result=itemgetter("query") | execute_query
    )
    | answer_prompt
    | llm
    | StrOutputParser()
)

In [9]:
# run
chain.invoke({"question": "How many artists are there"})

'There are a total of 2000 artists in the Spotify Database based on the most streamed songs in 2024.'

In [10]:
# run with input
chain.invoke({"question": input("What is your question?")})

"I'm sorry, but it seems there was an error while trying to retrieve the information about the best song in the database. The SQL query attempted to select the track name, artist, and track score of the song with the highest score, but it seems there was a programming error. Let me try to run the query again to provide you with the details of the best song. Thank you for your patience."