# EXAMPLE PIPELINE FOR SPOTIFY API INGESSION

In [1]:
import sys
from pathlib import Path

# Get project root (2 levels up from current notebook)
project_root = Path.cwd().parent.parent
sys.path.insert(0, str(project_root))

In [2]:
from src.data_ingestion.auth import Auth
from src.data_ingestion.spotify_fetch_artist import fetch_artist  
from src.data_ingestion.spotify_fetch_playlist import fetch_top_artist_playlists 

In [3]:
artist_id = "1ukmGETCwXTbgrTrkRDnmn"  
artist_data = fetch_artist(artist_id)

print(artist_data["name"])
print(artist_data["followers"]["total"])
print(artist_data["popularity"])
print(artist_data["genres"])

2025-12-29 10:59:59 - INFO - Attempting to get Spotify access token
2025-12-29 11:00:00 - INFO - Successfully obtained access token
2025-12-29 11:00:00 - INFO - Fetching artist data for artist_id: 1ukmGETCwXTbgrTrkRDnmn
2025-12-29 11:00:01 - INFO - Successfully fetched artist data for 1ukmGETCwXTbgrTrkRDnmn


Nathaniel Bassey
3699444
63
['gospel', 'worship', 'afrogospel']


In [14]:
from src.data_ingestion.tidy_json import tidy_artist_data

In [15]:
test_record = tidy_artist_data(artist_data,source='spotify')

2025-12-29 10:07:07 - INFO - Successfully Spotify Artist Json tidy up


In [16]:
test_record

{'artist_id': '1ukmGETCwXTbgrTrkRDnmn',
 'name': 'Nathaniel Bassey',
 'followers': 3699444,
 'popularity': 63,
 'genres': '["gospel", "worship", "afrogospel"]',
 'collected_at': '2025-12-29T15:06:52.041512+00:00',
 'source': 'spotify'}

In [17]:
from src.data_ingestion.ingession import save_artist

In [18]:
from src.storage.init_db import init_db
init_db()  # Create the tables first

# Then test connection
from src.storage.db import get_connection
conn = get_connection()
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(cursor.fetchall())  # Should now show [('artists',), ('tracks',)]
conn.close()

2025-12-29 10:07:07 - INFO - Attempting to create DB at C:\Users\Chidi\Documents\GitHub\Worship-Engagement-Prediction-System-for-Nigeria-Spotify-Driven-ML-App-\data\spotify_data.db
2025-12-29 10:07:07 - INFO - Created DB successfully at C:\Users\Chidi\Documents\GitHub\Worship-Engagement-Prediction-System-for-Nigeria-Spotify-Driven-ML-App-\data\spotify_data.db
2025-12-29 10:07:07 - INFO - Initializing DB connection at C:\Users\Chidi\Documents\GitHub\Worship-Engagement-Prediction-System-for-Nigeria-Spotify-Driven-ML-App-\data\spotify_data.db
2025-12-29 10:07:07 - INFO - DB Connection Successful


[('artists',), ('sqlite_sequence',), ('tracks',)]


In [19]:
save_artist(test_record)

2025-12-29 10:07:07 - INFO - Attempting to save records to DB
2025-12-29 10:07:07 - INFO - Initializing DB connection at C:\Users\Chidi\Documents\GitHub\Worship-Engagement-Prediction-System-for-Nigeria-Spotify-Driven-ML-App-\data\spotify_data.db
2025-12-29 10:07:07 - INFO - DB Connection Successful
2025-12-29 10:07:07 - INFO - Successfully saved record to DB


In [20]:
conn = get_connection()
cursor = conn.cursor()

cursor.execute("SELECT artist_id, name, popularity, followers, collected_at FROM artists")
for row in cursor.fetchall():
    print(row)

conn.close()

2025-12-29 10:07:07 - INFO - Initializing DB connection at C:\Users\Chidi\Documents\GitHub\Worship-Engagement-Prediction-System-for-Nigeria-Spotify-Driven-ML-App-\data\spotify_data.db
2025-12-29 10:07:07 - INFO - DB Connection Successful


('1ukmGETCwXTbgrTrkRDnmn', 'Nathaniel Bassey', 63, 3699444, '2025-12-29T15:06:52.041512+00:00')


In [5]:
track_data = fetch_top_artist_playlists(artist_id, "NG") 

2025-12-29 11:00:18 - INFO - Attempting to get Spotify access token
2025-12-29 11:00:19 - INFO - Successfully obtained access token
2025-12-29 11:00:19 - INFO - Fetching artist data for artist_id: 1ukmGETCwXTbgrTrkRDnmn
2025-12-29 11:00:20 - INFO - Successfully fetched top tracks data for 1ukmGETCwXTbgrTrkRDnmn


In [6]:
print(track_data.keys())

dict_keys(['tracks'])


In [7]:
tracks = track_data['tracks']

In [12]:
for track in tracks:
    print(track.keys())

dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'is_playable', 'name', 'popularity', 'preview_url', 'track_number', 'type', 'uri'])
dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'is_playable', 'name', 'popularity', 'preview_url', 'track_number', 'type', 'uri'])
dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'is_playable', 'name', 'popularity', 'preview_url', 'track_number', 'type', 'uri'])
dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'is_playable', 'name', 'popularity', 'preview_url', 'track_number', 'type', 'uri'])
dict_keys(['album', 'artists', 'available_markets', 