In [None]:
### Importing libraries
import os
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('darkgrid')

In [None]:
### Setting pandas options to display max 100 rows
### Change according to your need
pd.set_option('display.max_rows', 10)

In [None]:
### Reading spotify data file
DATASET_DIR = os.path.join('datasets', 'MyData')
file_name = 'StreamingHistory0.json'

data = pd.read_json(os.path.join(DATASET_DIR, file_name))

In [None]:
### Getting first 5 values
data.head()

In [None]:
data.info()

# Analysis Part

In [None]:
### Total Mili Seconds Listened
total_msPlayed = data['msPlayed'].sum()
total_minutesPlayed = total_msPlayed/60000   # Converting mili-seconds into minutes
print(f'You listened {round(total_minutesPlayed,2)} minutes of total playback.')

In [None]:
### Finding Maximum Duration Song
indexMaxDurationListened = data.msPlayed.argmax()

print('You maximum duration listened track is ',data.trackName[indexMaxDurationListened] ,'by ', data.artistName[indexMaxDurationListened],'.')

In [None]:
### Counting number of times you played a song or listened to a artist
song_counts=pd.DataFrame(data['trackName'].value_counts())
artist_counts=pd.DataFrame(data['artistName'].value_counts())

song_counts.reset_index(inplace=True)
artist_counts.reset_index(inplace=True)

song_counts.rename(columns = {'index':'trackName','trackName':'playedFrequency'}, inplace = True)
artist_counts.rename(columns = {'index':'artistName','artistName':'playedFrequency'}, inplace = True)

print('Your most frequently played Song is',  song_counts.iloc[0]['trackName'], '\nYou heard it',song_counts.iloc[0]['playedFrequency'],'times')
print('Your most frequently played Artist is',  artist_counts.iloc[0]['artistName'], '\nYou heard it',artist_counts.iloc[0]['playedFrequency'],'times')

In [None]:
### Top 10 Songs
print('Your top 10 Songs:')
print(song_counts.iloc[:11, 0].reindex(range(1, 11)))

In [None]:
### Top Songs By User
n = int(input('How many top song of yours you want: '))
print(f"{song_counts.iloc[:n + 1, 0].reindex(range(1, n + 1))}")

In [None]:
### Top Artist By User
n = int(input('How many top artist of yours you want: '))
print(f"{artist_counts.iloc[:n + 1, 0].reindex(range(1, n + 1))}")

In [None]:
### Top 10 Artists 
top_10_artists = artist_counts.iloc[:10, 0]
print(top_10_artists)

# Visualization

In [None]:
### Bar plot of top 10 songs

## Generating truncated song names
song_names = song_counts.trackName.iloc[:10].apply(lambda name : name[:16] + "...")

## Generating plot
plt.figure(figsize = (18, 8))
ax = sns.barplot(x = 'trackName', y = 'playedFrequency', data = song_counts.iloc[:10, :])
ax.set_xticklabels(song_names, rotation = 30, fontsize = 11)
ax.set_xlabel('Song name', fontsize =  15)
ax.set_ylabel('Times played', fontsize =  15)
ax.set_title('Top 10 Songs', fontsize = 20);

In [None]:
### Pie Chart of top 10 artists

plt.figure(figsize = (14, 10))
plt.pie(
    x = artist_counts.iloc[:10, 1],
    labels = top_10_artists,
    autopct = '%.2f%%',
    startangle = 90,
    counterclock = False,
    textprops = {
        "fontsize": 13,
    }
)
plt.title('Top 10 Artists', fontsize = 18);

In [None]:
### Handling date field in dataset
d_data = data.copy()

d_data.endTime = d_data.endTime.apply(lambda d: datetime.strptime(d, '%Y-%m-%d %H:%M').date())
d_data.sort_values(by = 'endTime', ascending = False).reset_index(drop = True)
d_data.msPlayed = d_data.msPlayed.apply(lambda t: t/60000)
datewise_listening = d_data.groupby(by = 'endTime').sum().reset_index()
datewise_listening.sort_values(by = 'endTime')

In [None]:
### Plotting daywise listening activity
plt.figure(figsize = (20, 8))
ax = sns.lineplot(x = 'endTime', y = 'msPlayed', data = datewise_listening.iloc[::2, :])
ax.set_xlabel('Date', fontsize =  15)
ax.set_ylabel('Minutes', fontsize =  15)
ax.set_title('Datewise Listening', fontsize = 20);