In [2]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from credentials import cid, secret
import pandas as pd
import time
from datetime import datetime
from sqlalchemy import create_engine



In [3]:
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)



##### Setting up database connection

In [5]:
conns = 'postgresql://postgres:11223344@database-1.c10qo0hdky8j.eu-central-1.rds.amazonaws.com:5432/postgres'

db = create_engine(conns, encoding='latin1', echo=False)


In [7]:

query = "SELECT album_id FROM new_albums;"

existing_ids = pd.read_sql(query, db)

In [8]:
new_albums = pd.DataFrame()


for i in range(0,51,50):
    response = sp.new_releases(country="DE",limit=50, offset=i)
    print(response)
    albums = response['albums']
    for item in albums['items']:
        if item["id"] not in existing_ids.values:
            new_albums = new_albums.append({'album_id': item["id"],
                                'artist_name': item["artists"][0]["name"],
                                'album_name': item["name"],
                                'release_date': datetime.strptime((item["release_date"]),"%Y-%m-%d"),
                                'total_tracks': item["total_tracks"],
                                'release_date_precision': item["release_date_precision"],
                                'survey_date': datetime(*time.localtime(time.time())[0:3])
                                },
                                ignore_index=True)

new_albums

{'albums': {'href': 'https://api.spotify.com/v1/browse/new-releases?country=DE&offset=0&limit=50', 'items': [{'album_type': 'album', 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/2cFrymmkijnjDg9SS92EPM'}, 'href': 'https://api.spotify.com/v1/artists/2cFrymmkijnjDg9SS92EPM', 'id': '2cFrymmkijnjDg9SS92EPM', 'name': 'blackbear', 'type': 'artist', 'uri': 'spotify:artist:2cFrymmkijnjDg9SS92EPM'}], 'available_markets': ['AD', 'AE', 'AL', 'AR', 'AT', 'AU', 'BA', 'BE', 'BG', 'BH', 'BO', 'BR', 'BY', 'CA', 'CH', 'CL', 'CO', 'CR', 'CY', 'CZ', 'DE', 'DK', 'DO', 'DZ', 'EC', 'EE', 'EG', 'ES', 'FI', 'FR', 'GB', 'GR', 'GT', 'HK', 'HN', 'HR', 'HU', 'ID', 'IE', 'IL', 'IN', 'IS', 'IT', 'JO', 'JP', 'KW', 'KZ', 'LB', 'LI', 'LT', 'LU', 'LV', 'MA', 'MC', 'MD', 'ME', 'MK', 'MT', 'MX', 'MY', 'NI', 'NL', 'NO', 'NZ', 'OM', 'PA', 'PE', 'PH', 'PL', 'PS', 'PT', 'PY', 'QA', 'RO', 'RS', 'RU', 'SA', 'SE', 'SG', 'SI', 'SK', 'SV', 'TH', 'TN', 'TR', 'TW', 'UA', 'US', 'UY', 'VN', 'XK', 'ZA'], '

In [9]:
new_albums.to_sql("new_albums", db, if_exists='append')


In [10]:
##### Get the tracks from each album

query = "SELECT album_id, album_name, release_date, release_date_precision, survey_date, total_tracks FROM new_albums;"

album_ids = pd.read_sql(query, db)

0     0fxhOwMkj9lfSIAyTJVnfc
1     1uROBP2G4MP0O4w1v5Cpbg
2     3a17jUttRFcNPkcQfHcVnP
3     0y3vj8uCV9zNMznlmDMFe4
4     5ZQjqg9obFzyGuxGj0mjSi
               ...          
95    0RFBLO8iLNs6UB7tT1oBNu
96    2FFoPd5bHBr7D2kbxcHmfM
97    3KlZLvk9OzpJdHDvQsFF1I
98    1zFfHvbN6fDhmO0EeP8KvC
99    6ay4qFjg5VgZFqJmiGcIIm
Name: album_id, Length: 100, dtype: object

In [12]:

query = "SELECT track_id FROM new_songs;"

existing_song_ids = pd.read_sql(query, db)



In [13]:
song_ids = pd.DataFrame()

for id in album_ids["album_id"].values:
    response = sp.album_tracks(id, limit=50)
    tracks = response['items']
    for track in tracks:
        song_ids = song_ids.append({'track_id': track["id"]}, ignore_index=True)

In [236]:
new_songs = pd.DataFrame()

for id in song_ids.values:
    track = sp.track(id[0])
    if track["id"] not in existing_song_ids.values:
        new_songs = new_songs.append({'track_id': track["id"],
                            'new_entry': True,
                            'artist_name': track["album"]["artists"][0]["name"],
                            'album_name': track["album"]["name"],
                            'track_name': track["name"],
                            'release_date': datetime.strptime((track["album"]["release_date"]),"%Y-%m-%d"),
                            'release_date_precision': track["album"]["release_date_precision"],
                            'total_tracks': track["album"]["total_tracks"],
                            'survey_date': datetime(*time.localtime(time.time())[0:3]),
                            'explicit': track["explicit"],
                            'duration_ms': track["duration_ms"],
                            'track_number': track["track_number"],
                            'popularity': track["popularity"],
                            'disc_number': track["disc_number"]
                            },
                            ignore_index=True)
    else:
        new_songs = new_songs.append({'track_id': track["id"],
                            'new_entry': False,
                            'artist_name': track["album"]["artists"][0]["name"],
                            'album_name': track["album"]["name"],
                            'track_name': track["name"],
                            'release_date': datetime.strptime((track["album"]["release_date"]),"%Y-%m-%d"),
                            'release_date_precision': track["album"]["release_date_precision"],
                            'total_tracks': track["album"]["total_tracks"],
                            'survey_date': datetime(*time.localtime(time.time())[0:3]),
                            'explicit': track["explicit"],
                            'duration_ms': track["duration_ms"],
                            'track_number': track["track_number"],
                            'popularity': track["popularity"],
                            'disc_number': track["disc_number"]
                            },
                            ignore_index=True)


In [238]:
new_songs.to_sql("new_songs", db, if_exists='append')

##### Get Audio Analysis for each track

In [14]:
song_analysis = pd.DataFrame()

for id in song_ids.values:
    track = sp.audio_analysis(id[0])
    song_analysis = song_analysis.append(
            'num_samples': 4152979,
                           'duration': 188.34372,
            'sample_md5': '',
            'offset_seconds': 0,
            'window_seconds': 0,
            'analysis_sample_rate': 22050,
            'analysis_channels': 1,
            'end_of_fade_in': 0.22045,
             'start_of_fade_out': 181.53362, 'loudness': -7.106, 'tempo': 129.992, 'tempo_confidence': 0.518, 'time_signature': 1, 'time_signature_confidence': 0.461, 'key': 6, 'key_confidence': 0.648, 'mode': 0,
            'mode_confidence': 0.805
    )

{'meta': {'analyzer_version': '4.0.0', 'platform': 'Linux', 'detailed_status': 'OK', 'status_code': 0, 'timestamp': 1597972642, 'analysis_time': 15.34253, 'input_process': 'libvorbisfile L+R 44100->22050'}, 'track': {'num_samples': 4152979, 'duration': 188.34372, 'sample_md5': '', 'offset_seconds': 0, 'window_seconds': 0, 'analysis_sample_rate': 22050, 'analysis_channels': 1, 'end_of_fade_in': 0.22045, 'start_of_fade_out': 181.53362, 'loudness': -7.106, 'tempo': 129.992, 'tempo_confidence': 0.518, 'time_signature': 1, 'time_signature_confidence': 0.461, 'key': 6, 'key_confidence': 0.648, 'mode': 0, 'mode_confidence': 0.805, 'codestring': 'eJxVmolxJDkMBF0ZE_g__jt2mcXRShe3ESf29EMSQKEK4KjlrDvn-pTPXm3XeW_91Ns-tbRaym1nf-YYDHdft_S2P2u3z1p1zTrK9L7x6fX0Ndbm0XbKZ7Rb9xlnfeoq5bPG7rfMyq-Hmy8XGa_5aZU3n3tnG63UT2u8udc9x-m3f9ps5zMKc9rT4TqHrxXevNflm6WcTxtlrMsne-NDY6_Cb0y5Dz40Sissrq0Pnxv-Osaqk_uYJd9lqn3M-Rn9sMC7x2pjfcbhTfscZjH6_EzXVy-Pjj4-zPR87jrsxODzc_b1YQMrS2_tMzfvbfOescviV_5wyEIKP6y6F5u8722ts4PTR5n2G

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



KeyboardInterrupt: 