## Importing the dataset ready:

In [1]:
# bringing some necessary libraries
import pandas as pd
import numpy
import json
import chardet

In [2]:
# Function to read local files
def read_txt(file_path):
    with open(file_path,'r') as f:
        text = f.read().strip()
    return text

# funtion to identify encoding type
def detect_encoding(file_path):
    with open(file_path, 'rb') as f:
        rawdata = f.read()
        result = chardet.detect(rawdata)
    return result['encoding']

# function to tabularly structure jsonfiles  
def load_json(path, encode):
    with open(path, encoding=encode) as f:
        decoded = json.load(f)
    df = pd.DataFrame(decoded)
    return df


In [3]:
# Finding encode of the file
encoding = detect_encoding(read_txt('path1.txt'))
print("Detected encoding:", encoding)

Detected encoding: utf-8


In [4]:
# importing files with our function
df1 = load_json(read_txt('path1.txt'),'utf-8')
df2 = load_json(read_txt('path2.txt'),'utf-8')

# creating a combined dataframe
df_spotify = pd.concat([df1, df2])

df_spotify

Unnamed: 0,endTime,artistName,trackName,msPlayed
0,2023-02-14 18:41,Roberto Carlos,Cama y Mesa - Cama e Mesa,21721
1,2023-02-15 13:16,Kanye West,Everything I Am,227788
2,2023-02-15 13:19,J Dilla,Welcome To The Show,10709
3,2023-02-15 13:19,Kanye West,Good Morning,195019
4,2023-02-15 13:22,Maroon 5,Maps,189939
...,...,...,...,...
4198,2024-02-15 18:08,Rick and Morty,Don't Look Back (feat. Kotomi & Ryan Elder) [F...,207000
4199,2024-02-15 18:12,Sting,What Could Have Been feat. Ray Chen (from the ...,213474
4200,2024-02-15 18:15,Skid Row,18 and Life,229960
4201,2024-02-15 18:20,Linkin Park,Not Alone,252840


## Requesting data from WEB API

In [5]:
# Some usuful libs to extract data from spotify API
import spotipy
import requests
import urllib.parse
from spotipy.oauth2 import SpotifyClientCredentials

In [6]:
# Setting up API Credentials
cid = read_txt('client_id.txt')
csecret = read_txt('client_secret.txt')

sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id=cid, client_secret=csecret))


In [7]:
# Defining a function to retrive data from the Web API
def get_track_info(artist_name, track_name):
    query = f'artist:{artist_name} track:{track_name}'
    
    # Check if the track info is already cached
    if query in track_cache:
        return track_cache[query]

    try:
        # Make request to Spotify API
        result = sp.search(q=query, type='track', limit=1)
        if result['tracks']['items']:
            track_info = result['tracks']['items'][0]
            
            # Extract desired attributes
            album_name = track_info['album']['name']
            image_url = track_info['album']['images'][0]['url']
            release_date = track_info['album']['release_date']
            popularity = track_info['popularity']
            acousticness = track_info.get('acousticness', None)
            liveness = track_info.get('liveness', None)
            danceability = track_info.get('danceability', None)
            speechiness = track_info.get('speechiness', None)
            valence = track_info.get('valence', None)
            duration_ms = track_info.get('duration_ms', None)
            explicit = track_info.get('explicit', None)

            # Cache the track info
            track_cache[query] = (album_name, image_url, release_date, popularity, acousticness, liveness, danceability, speechiness, valence, duration_ms, explicit)
            return album_name, image_url, release_date, popularity, acousticness, liveness, danceability, speechiness, valence, duration_ms, explicit
    except Exception as e:
        print(f"Error retrieving track info for '{query}': {e}")
    
    # Return None if track info couldn't be retrieved
    return None, None, None, None, None, None, None, None, None, None, None

# Initialize track cache
track_cache = {}

# Apply the function to each row in the DataFrame
df_spotify['albumName'], df_spotify['image_url'], df_spotify['release_date'], df_spotify['popularity'], df_spotify['acousticness'], df_spotify['liveness'], df_spotify['danceability'], df_spotify['speechiness'], df_spotify['valence'],df_spotify['duration_ms'], df_spotify['explicit'] = zip(*df_spotify.apply(lambda row: get_track_info(row['artistName'], row['trackName']), axis=1))
df_spotify

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': 'artist:Sidonie track:Fascinados (feat. Joan Manuel Serrat, Leiva, Vetusta Morla, Iván Ferreiro, Loquillo, Zahara, Dani Martin, Albert Pla, Mikel (Izal), Noni (Lori Meyers), Santi Balmes, Xoel López, Anni B Sweet, Jeanette, Carlos Sadness, Nina (Morgan), Juan Alberto (Niños Mutantes), Miri Ros, Javiera Mena, Jorge Martí (La Habitación Roja), Rafa Val (Viva Suecia), Marc (Dorian), Alondra Bentley, Abraham Boba, Carlangas (Novedades Carminha), La Bien Querida, Martí Perarnau IV (Mucho), Nita (Fuel Fandango) & Shuarma (Elefantes))', 'limit': 1, 'offset': 0, 'type': 'track', 'market': None} returned 400 due to Bad request.


Error retrieving track info for 'artist:Sidonie track:Fascinados (feat. Joan Manuel Serrat, Leiva, Vetusta Morla, Iván Ferreiro, Loquillo, Zahara, Dani Martin, Albert Pla, Mikel (Izal), Noni (Lori Meyers), Santi Balmes, Xoel López, Anni B Sweet, Jeanette, Carlos Sadness, Nina (Morgan), Juan Alberto (Niños Mutantes), Miri Ros, Javiera Mena, Jorge Martí (La Habitación Roja), Rafa Val (Viva Suecia), Marc (Dorian), Alondra Bentley, Abraham Boba, Carlangas (Novedades Carminha), La Bien Querida, Martí Perarnau IV (Mucho), Nita (Fuel Fandango) & Shuarma (Elefantes))': http status: 400, code:-1 - https://api.spotify.com/v1/search?q=artist%3ASidonie+track%3AFascinados+%28feat.+Joan+Manuel+Serrat%2C+Leiva%2C+Vetusta+Morla%2C+Iv%C3%A1n+Ferreiro%2C+Loquillo%2C+Zahara%2C+Dani+Martin%2C+Albert+Pla%2C+Mikel+%28Izal%29%2C+Noni+%28Lori+Meyers%29%2C+Santi+Balmes%2C+Xoel+L%C3%B3pez%2C+Anni+B+Sweet%2C+Jeanette%2C+Carlos+Sadness%2C+Nina+%28Morgan%29%2C+Juan+Alberto+%28Ni%C3%B1os+Mutantes%29%2C+Miri+Ros%2C+

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': 'artist:Sidonie track:Fascinados (feat. Joan Manuel Serrat, Leiva, Vetusta Morla, Iván Ferreiro, Loquillo, Zahara, Dani Martin, Albert Pla, Mikel (Izal), Noni (Lori Meyers), Santi Balmes, Xoel López, Anni B Sweet, Jeanette, Carlos Sadness, Nina (Morgan), Juan Alberto (Niños Mutantes), Miri Ros, Javiera Mena, Jorge Martí (La Habitación Roja), Rafa Val (Viva Suecia), Marc (Dorian), Alondra Bentley, Abraham Boba, Carlangas (Novedades Carminha), La Bien Querida, Martí Perarnau IV (Mucho), Nita (Fuel Fandango) & Shuarma (Elefantes))', 'limit': 1, 'offset': 0, 'type': 'track', 'market': None} returned 400 due to Bad request.


Error retrieving track info for 'artist:Sidonie track:Fascinados (feat. Joan Manuel Serrat, Leiva, Vetusta Morla, Iván Ferreiro, Loquillo, Zahara, Dani Martin, Albert Pla, Mikel (Izal), Noni (Lori Meyers), Santi Balmes, Xoel López, Anni B Sweet, Jeanette, Carlos Sadness, Nina (Morgan), Juan Alberto (Niños Mutantes), Miri Ros, Javiera Mena, Jorge Martí (La Habitación Roja), Rafa Val (Viva Suecia), Marc (Dorian), Alondra Bentley, Abraham Boba, Carlangas (Novedades Carminha), La Bien Querida, Martí Perarnau IV (Mucho), Nita (Fuel Fandango) & Shuarma (Elefantes))': http status: 400, code:-1 - https://api.spotify.com/v1/search?q=artist%3ASidonie+track%3AFascinados+%28feat.+Joan+Manuel+Serrat%2C+Leiva%2C+Vetusta+Morla%2C+Iv%C3%A1n+Ferreiro%2C+Loquillo%2C+Zahara%2C+Dani+Martin%2C+Albert+Pla%2C+Mikel+%28Izal%29%2C+Noni+%28Lori+Meyers%29%2C+Santi+Balmes%2C+Xoel+L%C3%B3pez%2C+Anni+B+Sweet%2C+Jeanette%2C+Carlos+Sadness%2C+Nina+%28Morgan%29%2C+Juan+Alberto+%28Ni%C3%B1os+Mutantes%29%2C+Miri+Ros%2C+

HTTP Error for GET to https://api.spotify.com/v1/search with Params: {'q': 'artist:Sidonie track:Fascinados (feat. Joan Manuel Serrat, Leiva, Vetusta Morla, Iván Ferreiro, Loquillo, Zahara, Dani Martin, Albert Pla, Mikel (Izal), Noni (Lori Meyers), Santi Balmes, Xoel López, Anni B Sweet, Jeanette, Carlos Sadness, Nina (Morgan), Juan Alberto (Niños Mutantes), Miri Ros, Javiera Mena, Jorge Martí (La Habitación Roja), Rafa Val (Viva Suecia), Marc (Dorian), Alondra Bentley, Abraham Boba, Carlangas (Novedades Carminha), La Bien Querida, Martí Perarnau IV (Mucho), Nita (Fuel Fandango) & Shuarma (Elefantes))', 'limit': 1, 'offset': 0, 'type': 'track', 'market': None} returned 400 due to Bad request.


Error retrieving track info for 'artist:Sidonie track:Fascinados (feat. Joan Manuel Serrat, Leiva, Vetusta Morla, Iván Ferreiro, Loquillo, Zahara, Dani Martin, Albert Pla, Mikel (Izal), Noni (Lori Meyers), Santi Balmes, Xoel López, Anni B Sweet, Jeanette, Carlos Sadness, Nina (Morgan), Juan Alberto (Niños Mutantes), Miri Ros, Javiera Mena, Jorge Martí (La Habitación Roja), Rafa Val (Viva Suecia), Marc (Dorian), Alondra Bentley, Abraham Boba, Carlangas (Novedades Carminha), La Bien Querida, Martí Perarnau IV (Mucho), Nita (Fuel Fandango) & Shuarma (Elefantes))': http status: 400, code:-1 - https://api.spotify.com/v1/search?q=artist%3ASidonie+track%3AFascinados+%28feat.+Joan+Manuel+Serrat%2C+Leiva%2C+Vetusta+Morla%2C+Iv%C3%A1n+Ferreiro%2C+Loquillo%2C+Zahara%2C+Dani+Martin%2C+Albert+Pla%2C+Mikel+%28Izal%29%2C+Noni+%28Lori+Meyers%29%2C+Santi+Balmes%2C+Xoel+L%C3%B3pez%2C+Anni+B+Sweet%2C+Jeanette%2C+Carlos+Sadness%2C+Nina+%28Morgan%29%2C+Juan+Alberto+%28Ni%C3%B1os+Mutantes%29%2C+Miri+Ros%2C+

Unnamed: 0,endTime,artistName,trackName,msPlayed,albumName,image_url,release_date,popularity,acousticness,liveness,danceability,speechiness,valence,duration_ms,explicit
0,2023-02-14 18:41,Roberto Carlos,Cama y Mesa - Cama e Mesa,21721,Pra Sempre Em Espanhol - Vol. 2,https://i.scdn.co/image/ab67616d0000b273034a90...,2007,67.0,,,,,,195080.0,False
1,2023-02-15 13:16,Kanye West,Everything I Am,227788,Graduation,https://i.scdn.co/image/ab67616d0000b27326f7f1...,2007-09-11,78.0,,,,,,227893.0,True
2,2023-02-15 13:19,J Dilla,Welcome To The Show,10709,Donuts,https://i.scdn.co/image/ab67616d0000b27383bb78...,2006-02-07,55.0,,,,,,71666.0,False
3,2023-02-15 13:19,Kanye West,Good Morning,195019,Graduation,https://i.scdn.co/image/ab67616d0000b27326f7f1...,2007-09-11,77.0,,,,,,195093.0,True
4,2023-02-15 13:22,Maroon 5,Maps,189939,V,https://i.scdn.co/image/ab67616d0000b273442b53...,2014-09-02,88.0,,,,,,189960.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4198,2024-02-15 18:08,Rick and Morty,Don't Look Back (feat. Kotomi & Ryan Elder) [F...,207000,,,,,,,,,,,
4199,2024-02-15 18:12,Sting,What Could Have Been feat. Ray Chen (from the ...,213474,Arcane League of Legends (Soundtrack from the ...,https://i.scdn.co/image/ab67616d0000b273d10169...,2021-11-20,66.0,,,,,,213474.0,False
4200,2024-02-15 18:15,Skid Row,18 and Life,229960,Skid Row,https://i.scdn.co/image/ab67616d0000b273bb6398...,1989-01-24,64.0,,,,,,229960.0,False
4201,2024-02-15 18:20,Linkin Park,Not Alone,252840,Not Alone,https://i.scdn.co/image/ab67616d0000b2734d6da9...,2011-10-21,50.0,,,,,,252840.0,False


## Let's do some feature engineering

In [8]:
# One of the artist whose name has japanese kajis. Let's replace that!
df_spotify['artistName'] = df_spotify['artistName'].apply(lambda x: x.replace("アトラスサウンドチーム", "Atlus"))

# Transforming milliseconds played into minutes
df_spotify['minPlayed'] = round(df_spotify['msPlayed']/60000,2)
df_spotify['songLength'] = round(df_spotify['duration_ms']/60000,2)

# Extracting streaming hours from the timestamp
df_spotify['endTime'] = pd.to_datetime(df_spotify['endTime'])
df_spotify['hourActive'] = df_spotify['endTime'].dt.floor('h').dt.hour

# Obtaining the days of the week and labeling them accordingly
df_spotify['day_of_week'] = df_spotify['endTime'].dt.dayofweek
day_names = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
df_spotify['day_of_week'] = df_spotify['day_of_week'].map(lambda x: day_names[x])

# Breaking down the date
df_spotify['day'] = df_spotify['endTime'].dt.day
df_spotify['month'] = df_spotify['endTime'].dt.month
df_spotify['year'] = df_spotify['endTime'].dt.year

#saving output
df_spotify.to_csv('spotifydata.csv', index=False)
df_spotify

Unnamed: 0,endTime,artistName,trackName,msPlayed,albumName,image_url,release_date,popularity,acousticness,liveness,...,valence,duration_ms,explicit,minPlayed,songLength,hourActive,day_of_week,day,month,year
0,2023-02-14 18:41:00,Roberto Carlos,Cama y Mesa - Cama e Mesa,21721,Pra Sempre Em Espanhol - Vol. 2,https://i.scdn.co/image/ab67616d0000b273034a90...,2007,67.0,,,...,,195080.0,False,0.36,3.25,18,Tue,14,2,2023
1,2023-02-15 13:16:00,Kanye West,Everything I Am,227788,Graduation,https://i.scdn.co/image/ab67616d0000b27326f7f1...,2007-09-11,78.0,,,...,,227893.0,True,3.80,3.80,13,Wed,15,2,2023
2,2023-02-15 13:19:00,J Dilla,Welcome To The Show,10709,Donuts,https://i.scdn.co/image/ab67616d0000b27383bb78...,2006-02-07,55.0,,,...,,71666.0,False,0.18,1.19,13,Wed,15,2,2023
3,2023-02-15 13:19:00,Kanye West,Good Morning,195019,Graduation,https://i.scdn.co/image/ab67616d0000b27326f7f1...,2007-09-11,77.0,,,...,,195093.0,True,3.25,3.25,13,Wed,15,2,2023
4,2023-02-15 13:22:00,Maroon 5,Maps,189939,V,https://i.scdn.co/image/ab67616d0000b273442b53...,2014-09-02,88.0,,,...,,189960.0,False,3.17,3.17,13,Wed,15,2,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4198,2024-02-15 18:08:00,Rick and Morty,Don't Look Back (feat. Kotomi & Ryan Elder) [F...,207000,,,,,,,...,,,,3.45,,18,Thu,15,2,2024
4199,2024-02-15 18:12:00,Sting,What Could Have Been feat. Ray Chen (from the ...,213474,Arcane League of Legends (Soundtrack from the ...,https://i.scdn.co/image/ab67616d0000b273d10169...,2021-11-20,66.0,,,...,,213474.0,False,3.56,3.56,18,Thu,15,2,2024
4200,2024-02-15 18:15:00,Skid Row,18 and Life,229960,Skid Row,https://i.scdn.co/image/ab67616d0000b273bb6398...,1989-01-24,64.0,,,...,,229960.0,False,3.83,3.83,18,Thu,15,2,2024
4201,2024-02-15 18:20:00,Linkin Park,Not Alone,252840,Not Alone,https://i.scdn.co/image/ab67616d0000b2734d6da9...,2011-10-21,50.0,,,...,,252840.0,False,4.21,4.21,18,Thu,15,2,2024
