In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import json

import plotly.express as px
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

import plotly.graph_objects as go

In [None]:
with open('StreamingHistory0.json', encoding='utf8') as f:
    data = json.load(f)

print(data[:5])

In [None]:
history_1 = pd.DataFrame()

def extract_json_value(column_name):
    
    return [i[column_name] for i in data]

history_1['artist_name'] = extract_json_value('artistName')
history_1['end_time'] = extract_json_value('endTime')
history_1['ms_played'] = extract_json_value('msPlayed')
history_1['track_name'] = extract_json_value('trackName')

In [None]:
history_1.info()

In [None]:
history_2 = pd.DataFrame()

In [None]:
history= history_1.append(history_2)

In [None]:
history.head().to_excel('hist.xlsx')

In [None]:
history['end_time'] = pd.to_datetime(history['end_time'])
history.head()

In [None]:
history['minutes_played'] = history.ms_played.divide(60000)
history.drop('ms_played', axis=1, inplace=True)
history.head()

In [None]:
most_played_artists_by_count = history.groupby(by='artist_name')['track_name'].count().sort_values(ascending=False)[:20]

print('The most popular artists by number of times their songs were played were: \n\n{}'.format(most_played_artists_by_count))

In [None]:
colors = ['RGB(103, 0, 31)','RGB(178, 24, 43)','RGB(214, 96, 77)','RGB(244, 165, 130)','RGB(253, 219, 199)',
          'RGB(247, 247, 247)','RGB(209, 229, 240)','RGB(146, 197, 222)','RGB(67, 147, 195)','RGB(33, 102, 172)',
          'RGB(5, 48, 97)']


layout = go.Layout(
    title='Popularity Of Artists By Number Of Times Their Song Was Played',
    yaxis= dict(
        title="Number of Times Played",
        gridcolor='rgb(255, 255, 255)',
        zerolinewidth=1,
        ticklen=5,
        gridwidth=2,
        titlefont=dict(size=15)),
    xaxis=dict(title="Artist Name"))


fig = go.Figure(data=[go.Bar(x=most_played_artists_by_count.index,
                             y=most_played_artists_by_count,
                             textposition='auto',
                             opacity=1,
                             marker_color=colors)],layout = layout)

fig.show()


In [None]:
history['day'] = [d.date() for d in history['end_time']]
history['time'] = [d.time() for d in history['end_time']]
history.drop('end_time', axis=1, inplace=True)
history.head()

In [None]:
day = history.groupby(by=['day'], as_index=False).sum()
day.head()

In [None]:
data  = go.Scatter(
                    x = day['day'],
                    y = day['minutes_played'],
                    mode = "lines+markers",
                    marker = dict(color = 'rgba(80, 26, 80, 0.8)'),
                    text= day['day'])

layout = dict(title = 'Days vs Minutes played',
              xaxis= dict(title= 'Days',ticklen= 5,zeroline= False),
              yaxis= dict(title= 'Minutes Played',ticklen= 5,zeroline= False)
             )
fig = go.Figure(data=data,layout = layout)
fig.write_html('day.html')
fig.show()

In [None]:
date = history_1.append(history_2)
date['minutes_played'] = date.ms_played.divide(60000)
date.drop('ms_played', axis=1, inplace=True)

date['day'] = pd.DatetimeIndex(date['end_time']).day_name()

date.head()

In [None]:
date = date.groupby(by=['day'], as_index=False).sum()
date.head(6)

In [None]:
fig = px.pie(date, names="day", values="minutes_played", color_discrete_sequence=px.colors.sequential.RdBu)

fig.show()

In [None]:
artist = history_1.append(history_2)
artist['minutes_played'] = artist.ms_played.divide(60000)
artist.drop('ms_played', axis=1, inplace=True)
artist.drop('end_time', axis=1, inplace=True)

artist.head()

In [None]:
artist_1 = artist.drop_duplicates(subset = ["track_name"])
artist_1.drop('minutes_played', axis=1, inplace=True)
artist_1

In [None]:
artist_1 = artist_1.groupby(['artist_name'], as_index = False).count()

artist_1

In [None]:
artist_1 = artist_1.rename(columns={"track_name": "unique_songs"})
artist_1.head()

In [None]:
time = artist.groupby(['artist_name'], as_index=False).sum()

time.head()

In [None]:
top_artist = pd.merge(artist_1, time, on='artist_name')
top_artist = top_artist.sort_values(by='unique_songs', ascending=False).head(15)
top_artist

In [None]:
fig = px.scatter(top_artist, x="artist_name", y="minutes_played", title='Total amount of time spent listening to each artist', size="unique_songs", color_discrete_sequence=px.colors.sequential.RdBu)

fig.show()


In [None]:
song = history_1.append(history_2)
song['minutes_played'] = song.ms_played.divide(60000)
song.drop('ms_played', axis=1, inplace=True)
song

In [None]:
song = song.groupby(['track_name'], as_index=False).sum()
song = song.sort_values(by='minutes_played', ascending=False)
song

In [None]:

song_artist = history_1.append(history_2)
song_artist = song_artist.sort_values(by='track_name', ascending=False)
song_artist.drop('end_time', axis=1, inplace=True)
song_artist.drop('ms_played', axis=1, inplace=True)
song_artist.head(10)

In [None]:
song_artist = song_artist.drop_duplicates(subset = ["track_name"])
song_artist


In [None]:

song = pd.merge(song_artist, song, on='track_name')
song = song.sort_values(by='minutes_played', ascending=False)
song

In [None]:
song = song.sort_values(by='minutes_played', ascending=False)
song.reset_index(inplace = True, drop = True) 
songs = song.drop([1,3,4,5,7,8,10,28,40,49]).head(50)
songs

In [None]:
songs.reset_index(inplace = True, drop = True)
songs

In [None]:
fig = px.bar(songs, x="track_name", y="minutes_played", title = 'Most Listened To Songs', color = "artist_name", color_discrete_sequence=px.colors.sequential.RdBu)
fig.show()

In [None]:
my_songs = pd.read_csv("appusongs.csv", encoding= 'unicode_escape')
my_songs.head()

In [None]:
top_5 = my_songs.head(6)
top_5.drop('key', axis=1, inplace=True)
top_5.drop('mode', axis=1, inplace=True)
top_5.drop('liveness', axis=1, inplace=True)
top_5.drop('valence', axis=1, inplace=True)
top_5.drop('tempo', axis=1, inplace=True)
top_5.drop('duration_ms', axis=1, inplace=True)
top_5.drop('time_signature', axis=1, inplace=True)
top_5.drop('track_id', axis=1, inplace=True)
top_5.drop('album', axis=1, inplace=True)
top_5.drop('artist', axis=1, inplace=True)
top_5

In [None]:
import plotly.graph_objects as go

categories = ['danceability','energy',
              'loudness', 'speechiness', 'instrumentalness']

fig = go.Figure()

fig.add_trace(go.Scatterpolar(
      r=[0.527,0.835,-5.298,0.0433,0.0000],
      theta=categories,
      fill='toself',
      name='The Nights'
))

fig.add_trace(go.Scatterpolar(
      r=[0.421,0.407,-7.445,0.0267,0.000961],
      theta=categories,
      fill='toself',
      name='A Thousand Years'
))


fig.add_trace(go.Scatterpolar(
      r=[0.509,0.689,-4.929,0.1020,0.00000],
      theta=categories,
      fill='toself',
      name='On my way'
))
fig.add_trace(go.Scatterpolar(
      r=[0.448,0.784,-3.686,0.0627,0.000108],
      theta=categories,
      fill='toself',
      name='Radioactive'
))
fig.add_trace(go.Scatterpolar(
      r=[0.647,0.656,-8.632,0.0354,0.000163],
      theta=categories,
      fill='toself',
      name='Phir se Ud Chala'
))

fig.update_layout(
    title = "Diversity in audio features of top 3 songs",
    polar=dict(
    radialaxis=dict(
      visible=True,
      range=[-10, 1]
    )),
  showlegend=True)
fig.write_image('Songdiversity.jpeg')
fig.show()

In [None]:

my_songs.mean(axis=0)
avg_my_songs = pd.DataFrame({"danceability":[0.589500], 
                             "energy":[0.660060],
                             "key":[4.420000],
                             "loudness":[-6.293780],
                             "mode":[0.500000],
                             "speechiness":[0.080354],
                             "instrumentalness":[0.022151],
                             "liveness":[0.206788],
                             "valence":[0.453488],
                             "tempo":[127.639420],
                             "duration_ms":[235277.380000],
                             "time_signature":[3.940000]})
avg_my_songs

In [None]:
v  = my_songs['valence']

data = go.Histogram(
    x=v,
    opacity=0.75,
    name = "song",
    marker=dict(color='rgba(171, 50, 96, 0.6)'))
layout = go.Layout(barmode='overlay',
                   title='Song Mood',
                   xaxis=dict(title='Happiness-->'),
                   yaxis=dict( title='Count'),
)
fig = go.Figure(data=data, layout=layout)
fig.show()

In [None]:
less_count, more_count, middle_count = 0, 0, 0
  
# iterating each number in list 
for num in v: 
      
    # checking condition 
    if num >= 0 and num <0.5: 
        less_count += 1
        
    elif num >=0.5 and num < 0.6:
        middle_count += 1
  
    else: 
        more_count += 1
          
print("Less than 0.5: ", less_count) 
print("More than 0.6: ", more_count)
print("Between 0.5 and 0.6: ", middle_count)

In [None]:

from matplotlib_venn import venn2, venn2_circles, venn2_unweighted
from matplotlib_venn import venn3, venn3_circles
from matplotlib import pyplot as plt
%matplotlib inline

venn2_unweighted(subsets = (28, 15, 7), set_labels = ('Low Spirit', 'High Spirit'), set_colors=('firebrick','skyblue'), alpha = 0.5);
plt.savefig('test.png')

In [None]:
fig = px.box(my_songs, y="tempo")
fig.show()
fig.write_html('tempo.html')

In [None]:
sr = pd.read_csv('sreelekshmi_songs.csv', encoding= 'unicode_escape')
sr.head()

In [None]:
sr.mean(axis=0)

In [None]:
avg_sr = pd.DataFrame({"danceability":[56.77/100], 
                             "energy":[44.12/100],
                             "key":[4.588235],
                             "loudness":[-9.802235],
                             "mode":[0.901961],
                             "speechiness":[4.0767/100],
                             "instrumentalness":[0.006681],
                             "liveness":[17.4045/100],
                             "valence":[54.60/100],
                             "tempo":[114.163],
                             "duration_ms":[263142.215686],
                             "time_signature":[3.725]})

avg_sr['Who'] = 'Sreelekshmy'

avg_sr

In [None]:
avg_my_songs['Who'] = 'Appu'
avg_my_songs

In [None]:
average = avg_sr.append(avg_my_songs)
average

In [None]:
average.to_excel('song12.xslx', index=True)
#You have to change this excel file t csv manually with columns as feature, me, Your friends name 

In [None]:
average_comp = pd.read_csv('song12.csv', encoding= 'unicode_escape')
average_comp

In [None]:

average_compare = average_comp.drop(average_comp.index[9])
average_compare = average_compare.drop(average_compare.index[2])
compare_features = average_compare.drop(average_compare.index[2])
compare_features

In [None]:
compare_features = compare_features.replace(['acousticness'])

In [None]:
trace1=go.Bar(x=compare_features['Feature'],y=compare_features['Sreelekshmy'],marker=dict(color='skyblue',opacity=1),name="Sreelekshmy")
trace2=go.Bar(x=compare_features['Feature'],y=compare_features['Appu'],marker=dict(color='darksalmon',opacity=1),name="Appu")

data=[trace1,trace2]

layout = go.Layout(title="My Music Preferences VS Sreelekshmy's",xaxis=dict(title="Features"),
                   barmode="group")

fig = go.Figure(data,layout)

fig.show()
fig.write_image('preference.jpeg')


In [None]:
loudness = pd.DataFrame({"Who":['Appu', 'Sreelekshmy'],"Loudness":[-6.293780,-9.802235]})
loudness

In [None]:
fig = px.bar(x=loudness['Who'], y=loudness['Loudness'],
              labels={"y": "Loudness Index",
                     "x": " "},
              color_discrete_sequence=px.colors.sequential.RdBu, title="Loudness Preference")

fig.show()
fig.write_image('loudness.jpeg')

In [None]:

tempo = pd.DataFrame({"Who":['Appu', 'Sreelekshmy'],"Tempo":[127.639420,114.163000]})
tempo

In [None]:
fig = px.bar(x=loudness['Who'], y=tempo['Tempo'],
              labels={"y": "Tempo Index",
                     "x": " "},
              color_discrete_sequence=px.colors.sequential.RdBu, title="Tempo Preference")

fig.show()

