---

Data Collection

---

In [None]:
import requests
import base64

# Your client ID and client secret obtained from the Spotify Developer Dashboard
client_id = 'with open("data.txt", "r") as f:
    client_secret = f.read()

# Spotify Accounts service endpoint for getting access tokens
token_url = 'https://accounts.spotify.com/api/token'

# Parameters for the token request
data = {
    'grant_type': 'client_credentials'
}

# Base64 encode the client ID and client secret
client_credentials = f"{client_id}:{client_secret}"
base64_credentials = base64.b64encode(client_credentials.encode()).decode('utf-8')

# HTTP Basic Auth header with base64-encoded client ID and client secret
headers = {
    'Authorization': f'Basic {base64_credentials}'
}

# Sending a POST request to the token endpoint
response = requests.post(token_url, data=data, headers=headers)

# Checking if the request was successful
if response.status_code == 200:
    # Parsing the JSON response
    token_data = response.json()
    # Extracting the access token
    access_token = token_data['access_token']
    print("Access Token:", access_token)
else:
    print(f"Error: {response.status_code}")

In [None]:
api_url = "https://api.spotify.com/v1/search/"
header = {
    'Authorization': 'Bearer ' + access_token
    }
param = {
    'q': 'artist:David Bowie',
    'type': 'track'
}


resp = requests.get(api_url, params=param, headers=header)

print('GET Response Status Code:', resp.status_code)
print(resp.text)

In [None]:
if (resp.status_code == 200):
    data = resp.json()

    for track in data['tracks']['items']:
        print(track['name'])   

In [None]:
api_url = "https://api.spotify.com/v1/browse/categories"

param = {
    'country': 'US',
    'type': 'regional'
}


resp = requests.get(api_url, params=param, headers=header)

print('GET Response Status Code:', resp.status_code , '\n')
print(resp.text)

In [None]:
if (resp.status_code == 200):
    data = resp.json()

    for item in data['categories']['items']:
        print(item['name'])

---

Project Start

---

Question/Problem addressed:

How does Spotify's and Apple Music's top 100 tracks compare?

In [None]:
playlist_id = '5ABHKGoOzxkaa28ttQV9sE'

api_url = "https://api.spotify.com/v1/search"

param = {
    'q': 'Top 100 most streamed songs on Spotify',
    'type': 'playlist',
    'limit': 10
}


resp = requests.get(api_url, params=param, headers=header)

print('GET Response Status Code:', resp.status_code , '\n')
print(resp.text)

In [None]:
#API string found from previous response

api_url = "https://api.spotify.com/v1/playlists/5ABHKGoOzxkaa28ttQV9sE/tracks"


resp = requests.get(api_url, headers=header)

print('GET Response Status Code:', resp.status_code , '\n')
print(resp.text)

In [None]:
spotifyTop100 = []

if (resp.status_code == 200):
    data = resp.json()

    for item in data['items']:
        track = item['track']
        spotifyTop100.append(track['name'])

print(spotifyTop100)

In [None]:
# Read CSV to gather Apple Music Data

import pandas as pd
import numpy as np

musicData2023 = pd.read_csv('spotify-2023.csv', encoding='latin1')

musicData2023.head()

musicData2023 = musicData2023.dropna()

In [None]:
appleMusic = musicData2023.loc[musicData2023['in_apple_charts'] < 101]
appleMusic = appleMusic.loc[appleMusic['in_apple_charts'] > 0]
appleMusic = appleMusic[['track_name', 'in_apple_charts']]
appleMusic = appleMusic.drop_duplicates(subset=['in_apple_charts'])

appleMusicTop100 = []
for item in appleMusic['track_name']:
    appleMusicTop100.append(item)

print(appleMusicTop100)

In [None]:
numStreamData = musicData2023[['streams']]

totalStreams = 0

for item in numStreamData.streams:
    totalStreams += item

formatted_number = '{:,}'.format(totalStreams)
print(formatted_number + ": Total streams of songs on Spotify charts")

In [None]:
yearVsStreams = musicData2023[['released_year', 'streams']]

yearVsStreams.head()

---

Analysis

---

In [None]:
import matplotlib.pyplot as plt

In [None]:
commonTopSongs = []

for item in spotifyTop100:
    if item in appleMusicTop100:
        commonTopSongs.append(item)

print(commonTopSongs, '\n', len(commonTopSongs))

In [None]:
yearVsStreams = yearVsStreams.groupby('released_year')['streams'].sum()

In [None]:
print(yearVsStreams)

In [None]:
years = list(yearVsStreams.keys())
streams= []
for thing in yearVsStreams:
    streams.append(thing)

plt.figure(figsize=[16,8])
plt.bar(years, streams)
plt.xlabel('Year of release')
plt.ylabel('Number of streams')
plt.title("Year of songs release vs number of Spotify streams (2023)")
plt.yticks([100000000, 25000000000, 50000000000, 75000000000, 100000000000, 125000000000], ['100,000,000', '25,000,000,000', '50,000,000,000', '75,000,000,000', '100,000,000,000', '125,000,000,000'])

plt.show()

In [None]:
streamBattleData = musicData2023[['track_name', 'in_spotify_charts', 'in_apple_charts']]
streamBattleData = streamBattleData.drop_duplicates(subset=['track_name'])

trackNames = streamBattleData['track_name'].unique()

print(streamBattleData)

In [None]:
spotifyValues = []
for i in streamBattleData['in_spotify_charts'].values:
    spotifyValues.append(i)

appleValues = []
for i in streamBattleData['in_apple_charts'].values:
    appleValues.append(i)

arrayDifference = np.array(spotifyValues) - np.array(appleValues)

plt.figure(figsize=[16,8])

plt.scatter(trackNames[:200], arrayDifference[:200])
plt.axhline(y=0, color='r')

plt.show()

In [None]:
spotifyGreaterStreams = 0
equalStreams = 0

for item in arrayDifference:
    if item > 1:
        spotifyGreaterStreams += 1

for item in arrayDifference:
    if item == 0:
        equalStreams += 1



print(spotifyGreaterStreams , "of the most streamed songs on Spotify in 2023 have higher stream numbers than Apple Music")
print(len(arrayDifference) - spotifyGreaterStreams - equalStreams, "of the most streamed songs on Spotify in 2023 have lower stream numbers than Apple Music")
print(equalStreams, "songs that have equal streams on both platforms")


In [None]:
danceData = musicData2023[['danceability_%', 'bpm']]

danceData.head()

In [None]:
plt.figure(figsize=[16,8])

plt.scatter(danceData['bpm'], danceData['danceability_%'])

plt.xlabel('BPM')
plt.ylabel('Danceability %')
plt.title("Danceability of top Spotify songs")

plt.show()

In [None]:
from sklearn.cluster import KMeans

wcss = []
for i in range(1, 9):
    km = KMeans(n_clusters=i, init='k-means++', n_init=10, random_state=0)
    km.fit(danceData)
    wcss.append(km.inertia_)

In [None]:
plt.plot(range(1,9), wcss, '-ro')
plt.title('Elbow')
plt.xlabel('Num Clsuters')
plt.ylabel('WCSS')
plt.show()

In [None]:
k = 3
km = KMeans(n_clusters=k, init='k-means++', n_init=10, random_state=0)
y_km = km.fit_predict(danceData)

In [None]:
danceData = danceData.rename(columns={'danceability_%': 'dance'})

print(danceData.columns)

In [None]:
plt.figure(figsize=[16,8])
for i in range(0, k):
    plt.scatter(danceData[y_km == i].bpm, danceData[y_km == i].dance)
plt.scatter(km.cluster_centers_[:, 1], km.cluster_centers_[:, 0], marker=',',s=50)

plt.xlabel('BPM')
plt.ylabel('Danceability %')
plt.title("Danceability of top Spotify songs")


plt.show()