## Making the Imports

In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf
init_notebook_mode(connected=True)
cf.go_offline()
import plotly.graph_objects as go
import plotly.express as px

## Loading The DataFrames

In [27]:
songs_by_time = pd.read_csv('MyData\songs_by_time.csv')
songs_raw = pd.read_csv('MyData\songs_raw.csv')
songs_by_artists = pd.read_csv('MyData\songs_by_artists.csv')

## 1. What Fraction of Time is Spent on Listening to Spotify?

In [28]:
#Total Time
total_time_minutes= songs_raw['Minutes Played'].sum()
total_time_days = total_time_minutes/(60*24)

#Total Days of listening to Spotify
first_date = pd.to_datetime(songs_raw['Date'].iloc[0])
last_date =pd.to_datetime(songs_raw['Date'].iloc[-1])
number_of_days = (last_date-first_date).days

#Perecentage Calculation
percent = (total_time_days/number_of_days)*100

#Result
print(f"I've spent {np.round(percent, 2)}% of my time on Spotify!")

I've spent 11.01% of my time on Spotify!


### Result
It turns out I have used 11.01% time of my day listening to Spotify. That is a 2.5 hours daily average!

## 2. Number of Songs and Total Songs Played

In [29]:
total_played = songs_raw.shape[0]
start_date = songs_raw['Date'].iloc[0]
final_date = songs_raw['Date'].iloc[-1]
print(f"I've played {total_played} songs from {start_date} to {final_date}!")

I've played 13550 songs from 06 Jan 2021 to 21 Sep 2021!


In [30]:
indi_songs = len(songs_raw.groupby('Song').sum())
print(f"I've played {indi_songs} different songs from {start_date} to {final_date}!")

I've played 599 different songs from 06 Jan 2021 to 21 Sep 2021!


### Result
A. Total Songs Played = <b>13550</b> <br>
B Individual Songs Played = <b>599</b> <br>
This means that on average a song has been played about 22 times!

## 3. Day Wise Summary

In [31]:
#Day Wise Usage
day_wise = songs_raw.groupby('Date').sum().sort_values(by='Minutes Played', ascending=False)
day_wise['Hours Played'] = day_wise['Minutes Played']/60

#Rounding the numbers to two decimal places
day_wise['Hours Played'] = day_wise['Hours Played'].apply(lambda x: np.round(x,2))
day_wise['Minutes Played'] = day_wise['Minutes Played'].apply(lambda x: np.round(x,2))

In [32]:
#Top 10 Days with maximum number of hours
day_wise.head(10)

Unnamed: 0_level_0,Minutes Played,Hours Played
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
20 Feb 2021,496.43,8.27
23 Jan 2021,462.33,7.71
06 Feb 2021,457.45,7.62
23 Mar 2021,448.84,7.48
25 Jan 2021,446.96,7.45
25 Feb 2021,444.57,7.41
18 Jan 2021,439.94,7.33
08 Jan 2021,412.15,6.87
21 Feb 2021,394.75,6.58
17 Mar 2021,376.28,6.27


In [33]:
#Bottom 10 Days with maximum number of hours
day_wise.tail(10)

Unnamed: 0_level_0,Minutes Played,Hours Played
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
27 May 2021,31.78,0.53
12 Jul 2021,27.92,0.47
26 Aug 2021,26.79,0.45
20 Jun 2021,26.25,0.44
28 Apr 2021,24.98,0.42
23 Aug 2021,24.77,0.41
28 May 2021,24.47,0.41
02 May 2021,23.22,0.39
30 Apr 2021,18.33,0.31
23 Apr 2021,17.57,0.29


In [34]:
#Making a plot showing Day Wise Streaming Time in minutes and Day
day_wise['Minutes Played'].iplot(color='red',
title='Day Wise Streaming Time', yTitle = 'Number of Minutes', xTitle = 'Days', width = 3)

In [35]:
#Making a plot showing Day Wise Streaming Time in Hours and Day
day_wise['Hours Played'].iplot(color='green',
title='Day Wise Streaming Time', yTitle = 'Number of Hours', xTitle = 'Days', width = 3)

In [36]:
day_wise.to_csv('MyData\day_wise.csv')

In [37]:
day_wise=pd.read_csv('MyData\day_wise.csv')

In [38]:
index = pd.to_datetime(day_wise['Date']).sort_values().index

In [39]:
day_wise_sorted = day_wise.reindex(index)

In [40]:
#Making a barplot showing Day Wise Streaming Time and that date
day_wise_sorted.iplot(kind = 'bar', color='green', x= 'Date', y='Hours Played',
title='Day Wise Streaming Time', yTitle = 'Number of Hours', xTitle = 'Days')

In [41]:
#Making a lineplot showing Day Wise Streaming Time in Hours and that date
day_wise_sorted.iplot(kind = 'line', color='green', x= 'Date', y='Hours Played',
title='Day Wise Streaming Time', yTitle = 'Number of Hours', xTitle = 'Days', width = 3)

In [42]:
#Making a barplot showing Day Wise Streaming Time and that date
day_wise_sorted.iplot(kind = 'bar', color='blue', x= 'Date', y='Minutes Played',
title='Day Wise Streaming Time', yTitle = 'Number of Minutes', xTitle = 'Days')

In [43]:
#Making a lineplot showing Day Wise Streaming Time in Hours and that date
day_wise_sorted.iplot(kind = 'line', color='blue', x= 'Date', y='Minutes Played',
title='Day Wise Streaming Time', yTitle = 'Number of Minutes', xTitle = 'Days', width = 2)

### Result
1. At my prime days I've used Spotify upto <b>8 hours</b> in a day. <br>
2. At my worst days the time has reduced to about half an hour.

## 4. Favourite Artists

In [44]:
# Making Hours Played column
songs_by_artists['Hours Played'] = songs_by_artists['Minutes Played']/60

In [45]:
#Making a percentage column
total_time = songs_by_artists['Minutes Played'].sum()
songs_by_artists['Percent of Overall Time (in %)']= songs_by_artists['Minutes Played'].apply(lambda x:np.round(x*100/total_time, 2))

In [46]:
#Rounding the numbers to two decimal places
songs_by_artists['Minutes Played'] = songs_by_artists['Minutes Played'].apply(lambda x: np.round(x,2))
songs_by_artists['Hours Played'] = songs_by_artists['Hours Played'].apply(lambda x: np.round(x,2))

In [47]:
# Top 10 Artists
songs_by_artists.head(10)

Unnamed: 0,Artist,Minutes Played,Hours Played,Percent of Overall Time (in %)
0,Taylor Swift,19419.25,323.65,47.61
1,Céline Dion,5278.13,87.97,12.94
2,Ed Sheeran,3249.73,54.16,7.97
3,Shawn Mendes,2141.92,35.7,5.25
4,Alan Walker,2025.89,33.76,4.97
5,Lady Gaga,1183.59,19.73,2.9
6,Maren Morris,1129.92,18.83,2.77
7,Camila Cabello,1067.44,17.79,2.62
8,Dua Lipa,819.79,13.66,2.01
9,Christina Perri,666.5,11.11,1.63


In [48]:
#Making a plot showing Artist Wise Streaming Time in Minutes
songs_by_artists.iplot(color='green', x= 'Artist', y = 'Minutes Played',
title='Minutes Played and Artists', yTitle = 'Number of Minutes', xTitle = 'Artists', width = 2)

In [49]:
#Making a plot showing Artist Wise Streaming Time in Hours
songs_by_artists.iplot(color='red', x= 'Artist', y = 'Hours Played',
title='Hours Played and Artists', yTitle = 'Number of Hours', xTitle = 'Artists', width = 2)

In [50]:
#Making a plot showing Artist Wise Streaming Time in percent
songs_by_artists.iplot(color='red', x= 'Artist', y = 'Percent of Overall Time (in %)',
title='Percentage Played and Artists', yTitle = 'Percent of Overall Time (in %)', xTitle = 'Artists', width = 2)

In [51]:
#Pie chart is for only those artist who have Percent of Overall Time (in %) greater than 0.1%
import plotly.express as px
df = songs_by_artists[songs_by_artists['Percent of Overall Time (in %)']>0.1]
fig = px.pie(df, values='Hours Played', names='Artist', title='Pie Chart of Favourite Authors')
fig.show()

### Result
A. <b>Taylor Swift</b> is my favourite artist. (Why am I not surprised!😂) <br>
B. What is surprising is that I've listened to Taylor <b>47.8%</b> of time! <br>
C. My top 10 artists are:
<ol>
<li> <b>Taylor Swift</b> </li> 
<li> <b>Céline Dion</b> </li> 
<li> <b>Ed Sheeran</b> </li> 
<li> <b>Shawn Mendes</b> </li> 
<li> <b>Alan Walker</b> </li> 
<li> <b>Lady Gaga</b> </li> 
<li> <b>Maren Morris</b> </li> 
<li> <b>Camila Cabello</b> </li> 
<li> <b>Dua Lipa</b> </li> 
<li> <b>Christina Perri</b> </li> 
</ol>

## 5. Song Wise

In [52]:
#Rounding off to two digits after decimal
songs_by_time['Minutes Played'] = songs_by_time['Minutes Played'].apply(lambda x:np.round(x,2)) 

In [53]:
#Cleaninng the song name
songs_by_time['Song'] = songs_by_time['Song'].apply(lambda x:x.split('(')[0])
songs_by_time['Song'] = songs_by_time['Song'].apply(lambda x:x.split('-')[0])

### Plotting the top n songs

In [54]:
#Making a plot showing songs Wise Streaming Time in Minutes
def plot_n_songs(n):
    songs_by_time[0:n].iplot(color='green', x= 'Song', y = 'Minutes Played',
    title=f'Top {n} Songs ', yTitle = 'Number of Minutes', xTitle = 'Song', width = 2)
# plt.xticks(rotation=90)

In [55]:
#Top 10
plot_n_songs(10)

In [56]:
#Top 20
plot_n_songs(20)

In [57]:
#Top 50
plot_n_songs(50)

In [58]:
import plotly.express as px
def pie_for_n(n):
    df = songs_by_time[0:n]
    fig = px.pie(df, values='Minutes Played', names='Song', title=f'Pie Chart of Favourite {n} Songs')
    fig.show()

In [59]:
# Pie Chart for favourite 10 songs
pie_for_n(10)

In [60]:
# Pie Chart for favourite 20 songs
pie_for_n(20)

In [61]:
# Pie Chart for favourite 50 songs
pie_for_n(50)

In [62]:
#Top 50 Songs
top_fifty = songs_by_time[0:50]
#Adding Hour Column
top_fifty['Hours Played'] = np.round(top_fifty['Minutes Played']/60,2)

In [63]:
top_fifty[0:20]

Unnamed: 0,Song,Minutes Played,Hours Played
0,willow,1257.92,20.97
1,evermore,1190.79,19.85
2,exile,1124.17,18.74
3,august,1068.4,17.81
4,Enchanted,1019.12,16.99
5,You All Over Me,995.28,16.59
6,Let's Talk About Love,901.29,15.02
7,Untouchable,856.45,14.27
8,Afterglow,832.94,13.88
9,Delicate,831.24,13.85


### Result
A. willow is my most streamed song. Not surprising! <br>
B. Now comes the interseting fact, willow has been streamed for 1258 minutes. That is 21 Hours! <br>
C. Top 10 songs are:
<ol>
<li> <b>willow</b> </li> 
<li> <b>evermore</b> </li> 
<li> <b>exile</b> </li> 
<li> <b>august</b> </li> 
<li> <b>Enchanted</b> </li> 
<li> <b>You All Over Me</b> </li> 
<li> <b>Let's Talk About Love</b> </li> 
<li> <b>Untouchable</b> </li> 
<li> <b>Afterglow</b> </li> 
<li> <b>Delicate</b> </li> 

</ol>

## 6. Number of Times a song is played?

In [64]:
#Creating a DataFrame containing the number of times a song has been played
df = songs_raw.groupby('Song').count().sort_values(by='Date', ascending=False)
songs_played = df.drop(['Artist', 'Date'], axis =1 )
songs_played=songs_played.reset_index()
songs_played.columns = ['Song', 'Number of Times']
songs_played = songs_played[songs_played['Number of Times']>2]

In [65]:
#Cleaning the Song Name
songs_played['Song'] = songs_played['Song'].apply(lambda x:x.split('(')[0])
songs_played['Song'] = songs_played['Song'].apply(lambda x:x.split('-')[0])

In [66]:
songs_played[songs_played['Number of Times']>5]

Unnamed: 0,Song,Number of Times
0,willow,408
1,You All Over Me,319
2,Afterglow,316
3,august,287
4,exile,287
...,...,...
131,Stitches,6
132,Breathe,6
133,I'll Never Love,6
134,The A Team,6


In [67]:
songs_played[songs_played['Number of Times']>5].iplot(kind = 'bar', color='violet', y= 'Number of Times', x='Song',
title='Song and Times Played', yTitle = 'Songs', xTitle = 'Number of Time Played')

In [68]:
#A dictionary containing number of songs played at least a number of times
num_songs = {}

for n in range(0,350,10):
    if n==0:
        num_songs[5] = len(songs_played[songs_played['Number of Times']>5])
    else:
        number = len(songs_played[songs_played['Number of Times']>n])
        num_songs[n] = number
# for n in range(50,400,50):
#     number = len(songs_played[songs_played['Number of Times']>n])
#     num_songs[n] = number

#Creating a DataFrame for the same
songs_number = pd.DataFrame(num_songs, index = num_songs.keys())[0:1].transpose()
songs_number = songs_number.reset_index()
songs_number.columns =['Maximum Number of Time Played', 'Numbers of Songs']

In [69]:
# songs_number

In [70]:
#Making a barplot showing number of times a song is played with number of songs
songs_number.iplot(kind = 'bar', color='blue', x= 'Maximum Number of Time Played', y='Numbers of Songs',
title='Maximum Number of Times a Song is Played', yTitle = 'Numbers of Songs', xTitle = 'Maximum Number of Time Played')

In [72]:
df = songs_number[songs_number['Numbers of Songs']>1]
fig = px.pie(df, names='Maximum Number of Time Played', values='Numbers of Songs', title='Number of Songs Played a Maximum Given Times')
fig.show()

In [73]:
songs_played.head(10)

Unnamed: 0,Song,Number of Times
0,willow,408
1,You All Over Me,319
2,Afterglow,316
3,august,287
4,exile,287
5,evermore,279
6,Shape of You,260
7,The Bones,246
8,champagne problems,244
9,Darkside,241


### Results
A. <b>willow</b> is the most streamed song which is streamed 400+ times! <br>
B. There are 76 songs streamed over 50 times. <br>
C. 55 songs streamed over 100 times. <br>
D. 36 songs streamed over 150 times. <br>
E. 16 songs streamed over 200 times. <br>
F. 7 songs streamed over 250 times. <br>
G. 3 songs streamed over 300 times.

## 7. Number of Songs per Day?

In [74]:
#Not ignoring any songs
songs_raw.groupby('Date').count()['Minutes Played'].mean()

52.316602316602314

In [75]:
#Ignoring songs which has been played less than 1.5 minutes
songs_g1 = songs_raw[songs_raw['Minutes Played']>1.5]

In [76]:
songs_raw.shape[0] - songs_g1.shape[0]

2811

In [77]:
average = songs_g1.groupby('Date').count()['Minutes Played'].mean()

In [78]:
print(f"Average number of songs is {np.int(average)}!")

Average number of songs is 41!


### Result
Average number of songs is <b>41<b>!