### Visualisation Final Project

In [1]:
import matplotlib.pyplot as plt 
import pandas as pd 
import numpy as np 
from mpl_toolkits.mplot3d import *
import ipywidgets as widgets
from ipywidgets import interact
import mplcursors
import matplotlib.animation as animation
from IPython.display import HTML

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
# Read datasets.
youtube_spotify = pd.read_csv('Spotify_Youtube.csv')
tiktok_19 = pd.read_excel('TikTok_songs_2019.xlsx')
tiktok_20 = pd.read_csv('TikTok_songs_2020.csv')
tiktok_21 = pd.read_csv('TikTok_songs_2021.csv')
tiktok_22 = pd.read_csv('TikTok_songs_2022.csv')
tiktok = pd.concat([tiktok_19, tiktok_20], ignore_index=True, sort=False)
tiktok = pd.concat([tiktok, tiktok_21], ignore_index=True, sort=False)
tiktok = pd.concat([tiktok, tiktok_22], ignore_index=True, sort=False)
tiktok.columns

Index(['track_name', 'artist_name', 'artist_pop', 'album', 'track_pop',
       'danceability', 'energy', 'loudness', 'mode', 'key', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'time_signature', 'duration_ms'],
      dtype='object')

In [4]:
# Merge Pandas DataFrames using left_on and right_on
tracks = pd.merge(youtube_spotify, tiktok, left_on="Track", right_on="track_name")
tracks = tracks.drop(['duration_ms','artist_name','instrumentalness','liveness','valence','tempo','track_name','album','danceability','energy','loudness','mode','key','speechiness','acousticness'], axis = 1)
tracks.sort_values(by='Stream')

# Deleting duplicate songs published by the same artists in the same album
tracks = tracks.drop_duplicates(subset=['Track','Album'], keep="last")
tracks = tracks.drop_duplicates(subset=['Track','Artist'], keep="last")

In [5]:
#Removing features from dataframe.

features = tracks.columns[7:].tolist()
features.remove('Description')
features.remove('Channel')
features.remove('Comments')
features.remove('Url_youtube')
features.remove('Title')
features.remove('official_video')
features.remove('Stream')
features.remove('artist_pop')
features.remove('track_pop')
features.remove('Views')
features.remove('Likes')
features.remove('Licensed')
features.remove('time_signature')
features.remove('Tempo')
features.remove('Duration_ms')
features.remove('Key')
features.remove('Loudness')

### Task 1

In [6]:
# However in this case we are only looking at a subset of the features available, we are excluding tempo, duration and loudness
colors = ['#ff0000','#ff8700','#be0aff','#25a244','#a1ff0a','#deff0a','#147df5']

meanpointprops = dict(markeredgecolor='black',
                      markerfacecolor='white')

def f(Size):
    # Features. 
    fig = plt.figure(figsize=(14,5))
    ax = fig.subplots(nrows=1,ncols=3
                         )
    size = Size
    platforms = ['TikTok','Spotify','YouTube']
    
    for i in range(len(platforms)):
        
        platform = platforms[i]

        if platform == 'TikTok':
            top_songs = tiktok.sort_values(by='track_pop',ascending=False).dropna()
            attributes = [feature.lower() for feature in features]
            color = '#EE1D52'
        elif platform == 'YouTube':
            color = '#c4302b'
            attributes = features
            top_songs = youtube_spotify.sort_values(by='Views',ascending=False).dropna()
        elif platform == 'Spotify':
            color = '#1DB954'
            attributes = features
            top_songs = youtube_spotify.sort_values(by='Stream',ascending=False).dropna()

        if platform == 'TikTok':
            songs = top_songs['track_name'][:size]
            artists = top_songs['artist_name'][:size]
        else:
            songs = top_songs['Track'][:size]
            artists = top_songs['Artist'][:size]

        # Plot.
        features_dict = {}
        for feature in attributes:
            features_dict[feature] = top_songs[feature][:size]
        bplot = ax[i].boxplot(features_dict.values(),patch_artist=True,medianprops=dict(color='white'),showmeans=True,meanprops=meanpointprops)
        color = 0
        for patch, color in zip(bplot['boxes'], colors):
            patch.set_facecolor(color)
        for patch in bplot['means']:
            patch.set_color('white')
            
        ax[i].set_xticklabels([feature.capitalize() for feature in features],rotation=90)
        ax[i].set_title(platform);
        ax[i].yaxis.grid(alpha=0.3)

        ax[i].set_yticks(np.arange(0,1.05,0.1))
        ax[i].set_ylabel('Value', size=12)
        ax[i].set_xlabel('Features', size=12)
    fig.suptitle('Features distribution among the top '+str(Size)+ ' songs on the various platforms')

    
interact(f, Size=widgets.IntSlider(min=1, max=len(tracks), step=1, value=10))

interactive(children=(IntSlider(value=10, description='Size', max=494, min=1), Output()), _dom_classes=('widge…

<function __main__.f(Size)>

Come mostriamo chi è primo nell'ordine?

## Task 3

Siamo indecisi tra due opzioni per aggiungere le visualizzazioni a questo task.

## Task 4

In [7]:
#tracks

In [8]:
###
def f(Songs):

    index_tktk = 'track_pop'
    index_ytb = 'Likes'
    index_sptf = 'Stream'

    top_songs_t = tracks.sort_values(by=index_tktk,ascending=False).dropna()
    top_songs_t = top_songs_t[1:Songs]
    types = top_songs_t['Album_type'].unique()
    
    top_songs_y = youtube_spotify.sort_values(by=index_ytb,ascending=False).dropna()
    top_songs_y = top_songs_y[1:Songs]
    types = top_songs_y['Album_type'].unique()
    
    top_songs_s = youtube_spotify.sort_values(by=index_sptf,ascending=False).dropna()
    top_songs_s = top_songs_s[1:Songs]
    types = top_songs_s['Album_type'].unique()
    
    dict_t = {}
    dict_y = {}
    dict_s = {}
    for type in types:
        dict_t[type] = len(top_songs_t[top_songs_t['Album_type']==type])
        dict_y[type] = len(top_songs_y[top_songs_y['Album_type']==type])
        dict_s[type] = len(top_songs_s[top_songs_s['Album_type']==type])
        
    fig = plt.figure(figsize=(9,5))
    fig.suptitle('Album type distribution for top songs');
    ax = fig.subplots(nrows=1, ncols=3)
    fig.tight_layout()

    ax[0].pie(dict_t.values(), autopct='%1.1f%%')
    ax[0].set_title('Album type for the top '+str(Songs)+' songs on TikTok', size = 7)
    ax[0].legend(labels = dict_t.keys())
    ax[1].pie(dict_s.values(), autopct='%1.1f%%')
    ax[1].set_title('Album type for the top '+str(Songs)+' songs on Spotify', size = 7)
    ax[1].legend(labels = dict_s.keys())
    ax[2].pie(dict_y.values(), autopct='%1.1f%%')
    ax[2].set_title('Album type for the top '+str(Songs)+' songs on Youtube', size = 7)
    ax[2].legend(labels = dict_y.keys())
    
interact(f,Songs=widgets.IntSlider(min=2, max=len(tracks), step=1, value=1));

interactive(children=(IntSlider(value=2, description='Songs', max=494, min=2), Output()), _dom_classes=('widge…

## Task 2

per youtube e spotify ci serve un solo valore per ogni feature, ha senso mostrare la media di ogni feature o la somma?

In [9]:
from math import pi 

def f(Platform):
    fig, axs = plt.subplots(nrows=2, ncols=5,figsize=(22,9),subplot_kw=dict(projection='polar'))
    
    fig.suptitle(Platform+' Top Artists features')
    # Dataset size.
    num_artists = 10
    
    if Platform == 'TikTok':
        dataset = tiktok
        top_artists = dataset.groupby('artist_name').mean().sort_values(by='artist_pop',ascending=False).head(num_artists)
    elif Platform == 'YouTube':
        dataset = youtube_spotify
        top_artists = dataset.groupby('Artist').mean().sort_values(by='Views',ascending=False).head(num_artists)
    else:
        dataset = youtube_spotify
        top_artists = dataset.groupby('Artist').mean().sort_values(by='Stream',ascending=False).head(num_artists)
    
    artists_features = top_artists.columns.to_list()
    
    if Platform == 'TikTok':
        # Features for each artist.
        artists_features.remove('loudness')
        artists_features.remove('mode')
        artists_features.remove('key')
        artists_features.remove('tempo')
        artists_features.remove('time_signature')
        artists_features.remove('duration_ms')
        artists_features.remove('artist_pop')
        artists_features.remove('track_pop')
    else:
        artists_features.remove('Comments')
        artists_features.remove('Stream')
        artists_features.remove('Views')
        artists_features.remove('Likes')
        artists_features.remove('Tempo')
        artists_features.remove('Duration_ms')
        artists_features.remove('Key')
        artists_features.remove('Loudness')
        artists_features = artists_features[1:]
    top_artists = top_artists[artists_features]

    # define the categories and the values
    categories = artists_features
    values = top_artists.iloc[0]

    platform = Platform
    artists_names = []
    for i in range(num_artists):
        artists_names.append(top_artists.iloc[i].name)

    danceability_list = []
    energy_list = []
    speechiness_list = []
    acousticness_list = []
    instrumentalness_list = []
    liveness_list = []
    valence_list = []

    for i in range(num_artists):
        danceability_list.append(top_artists.iloc[i,0]) 
        energy_list.append(top_artists.iloc[i,1]) 
        speechiness_list.append(top_artists.iloc[i,2]) 
        acousticness_list.append(top_artists.iloc[i,3]) 
        instrumentalness_list.append(top_artists.iloc[i,4]) 
        liveness_list.append(top_artists.iloc[i,5]) 
        valence_list.append(top_artists.iloc[i,6]) 
    # Data.
    df = pd.DataFrame({
    'group': artists_names,
    'danceability': danceability_list,
    'energy': energy_list,
    'speechiness': speechiness_list,
    'acousticness': acousticness_list,
    'instrumentalness': instrumentalness_list,
    'liveness': liveness_list,
    'valence': valence_list
    })

    # artists index.
    artist_idx = 0

    # colors 
    color = ['#ff0000','#ff8700','#ffd300','#deff0a','#a1ff0a','#25a244','#0aefff','#147df5','#be0aff','brown']
    color_idx = 0
    
    rank = 1

    # number of variable
    categories=list(df)[1:]
    N = len(categories)

    # Ciclo su tutti i subplot e crea un Radar Chart su ognuno
    for ax in axs.flat:

        values=df.loc[artist_idx].drop('group').values.flatten().tolist()
        values += values[:1]

        # What will be the angle of each axis in the plot? (we divide the plot / number of variable)
        angles = [n / float(N) * 2 * pi for n in range(N)]
        angles += angles[:1]

        # Initialise the spider plot
        #ax = plt.subplot(111, polar=True)

        # Draw one axe per variable + add labels
        ax.set_xticks(angles[:-1], categories, color='grey', size=11)

        # Draw ylabels
        ax.set_rlabel_position(0)
        ax.set_ylim(0,1.01)
        

        # Plot data
        ax.plot(angles, values, linewidth=1, linestyle='solid',color = color[color_idx])

        # Fill area
        ax.fill(angles, values, color[color_idx], alpha=0.7)

        ax.set_title(str(rank)+'.'+top_artists.iloc[artist_idx].name)

        artist_idx+=1
        color_idx +=1
        rank += 1

        ax.set_rticks(np.arange(0,1.1,0.5), color="red", size=2,rotation=20)

    # Aggiustamento degli spazi tra i subplot
    fig.subplots_adjust(wspace=0.5, hspace=0.1)
    
interact(f,Platform=['TikTok','Spotify','YouTube'], step=1, value=1);

interactive(children=(Dropdown(description='Platform', options=('TikTok', 'Spotify', 'YouTube'), value='TikTok…

## Task 5

In [10]:
import warnings
warnings.filterwarnings('ignore')

In [11]:
def f(RankedBy):
    Songs = 30
    fig = plt.figure(figsize=(9,7))
    ax = fig.add_subplot()
    ax2 = ax.twiny()
    # create data
    size = Songs
    top_songs = tracks.sort_values(by=RankedBy,ascending=False).dropna()
    top_songs = top_songs[1:size]
    
    color_comments = '#9A031E'
    color_likes = '#E36414'
    color_views = '#4361ee'

    x = top_songs['Title'].to_list()
    y1 = np.array(top_songs['Likes'].to_list())
    y2 = np.array(top_songs['Views'].to_list())
    y3 = np.array(top_songs['Comments'].to_list())

    # calculate the absolute maximum value
    max_value = abs(y2).max()

    # plot the bars

    ax2.barh(x, y1, label='Likes',color=color_likes)
    ax2.barh(x, y3, left=y1, label='Comments',color=color_comments)
    bar_views = ax.barh(x, y2, label='Views',color=color_views)


    # set the same y limits for both halves
    ax.set_xlim(-max_value-1000000000, max_value+1000000000)

    max_value2 = max(abs(y1).max(), abs(y3).max())

    ax2.set_xlim(-max_value2-10000000, max_value2+10000000)

    # add a vertical line at x=0
    ax.axvline(x=0, color='white', linewidth=5)

    ax.set_title("Engagement and views on top "+str(Songs)+' songs on YouTube ranked by '+RankedBy,loc='left')
    ax.set_xlabel("Values")

    ax.tick_params(axis='x', colors=color_views)
    ax2.tick_params(axis='x', colors=color_likes)

    ax.legend(loc='upper left')
    ax2.legend()
    ax.invert_yaxis()
    ax.invert_xaxis()
    ax2.set_xticks(np.arange(0,max_value2+10000000,10000000))
    ax.set_xticks(np.arange(0,max_value+100000000*7, 1000000000))
    #ax.set_xticklabels(['4','3','2','1','0'])
    ax.xaxis.grid(alpha=0.4,linestyle='--')
    ax2.xaxis.grid(alpha=0.4,linestyle='--')
    ax.set_ymargin(0.15)
    ax.yaxis.grid()

interact(f,RankedBy = ['Views','Likes','Comments']);

interactive(children=(Dropdown(description='RankedBy', options=('Views', 'Likes', 'Comments'), value='Views'),…