# Data Analysis and Visualizations
---

In [1]:
import pandas as pd
import sqlite3
import seaborn as sns
import matplotlib.pyplot as plt 
plt.style.use('fivethirtyeight')

# connect to database 
db_conn = sqlite3.connect('../../data/sql_db/spotify_data.db')

# create a function to run queries and output pandas dataframe
def Q(query, db=db_conn):
    return pd.read_sql(query, db)

## Check to see if all of our tables loaded.

In [12]:
Q('''
SELECT name
FROM sqlite_master
WHERE type = 'table';
  ''')

Unnamed: 0,name
0,Artist
1,Album
2,Track
3,Track_Feature


### As we can see from the output above all 4 tables loaded correctly.

## Create views
---

## Top songs by artist in terms of duration_ms:

In [36]:
# This will drop the view if it exists. Need this in order to rerun code.
db_conn.execute('''DROP VIEW IF EXISTS top_5_songs_ms_view;''')

<sqlite3.Cursor at 0x1cf29917e30>

In [38]:
db_conn.execute('''
CREATE VIEW top_5_songs_ms_view AS
WITH top_songs_by_artist_cte AS 
-- subquery for cte, want necessary columns and additional column that ranks songs by duration grouped by artist
(SELECT ar.artist_name, t.song_name, a.album_name, t.duration_ms,
    DENSE_RANK() OVER (PARTITION BY ar.artist_name ORDER BY t.duration_ms DESC) as song_duration_rank_by_artist
FROM Artist ar
-- join necessary tables together
LEFT JOIN Album a 
    ON a.artist_id = ar.artist_id
LEFT JOIN Track t 
    ON a.album_id = t.album_id
ORDER BY t.duration_ms DESC)
-- query the cte for top 5 longest songs by artist
SELECT *
FROM top_songs_by_artist_cte
-- only want songs ranked in the top 5
WHERE song_duration_rank_by_artist BETWEEN 1 AND 5
ORDER BY artist_name ASC, song_duration_rank_by_artist ASC;
''')

<sqlite3.Cursor at 0x1cf2a6d3960>

In [32]:
Q('''
SELECT *
FROM top_5_songs_ms_view;
''')

Unnamed: 0,artist_name,song_name,album_name,duration_ms,song_duration_rank_by_artist
0,Baby Keem,range brothers (with Kendrick Lamar),The Melodic Blue,316733,1
1,Baby Keem,trademark usa,The Melodic Blue,270670,2
2,Baby Keem,lost souls,The Melodic Blue,269727,3
3,Baby Keem,lost souls (with Brent Faiyaz),The Melodic Blue,269727,3
4,Baby Keem,scars,The Melodic Blue,266010,4
...,...,...,...,...,...
111,Travis Scott,3500 (feat. Future & 2 Chainz),Rodeo (Expanded Edition),461840,1
112,Travis Scott,Ok Alright (feat. ScHoolboy Q),Rodeo (Expanded Edition),417693,2
113,Travis Scott,Oh My Dis Side (feat. Quavo),Rodeo (Expanded Edition),351253,3
114,Travis Scott,Maria I'm Drunk (feat. Justin Bieber & Young T...,Rodeo (Expanded Edition),349933,4


### As we can see from the output above our view creation was a success. Now to create the 2nd view.

## Top artists in the database by # of followers:

In [41]:
db_conn.execute('''DROP VIEW IF EXISTS top_10_artist_num_followers_view;''')

<sqlite3.Cursor at 0x1cf2a74d0a0>

In [42]:
db_conn.execute('''
CREATE VIEW top_10_artist_num_followers_view AS
SELECT *
FROM Artist
ORDER BY followers DESC
LIMIT 10;
''')

<sqlite3.Cursor at 0x1cf2a74d420>

In [43]:
Q('''
SELECT *
FROM top_10_artist_num_followers_view;
''')

Unnamed: 0,artist_id,artist_name,external_url,genre,image_url,followers,popularity,type,artist_uri
0,3TVXtAsR1Inumwj472S9r4,Drake,https://open.spotify.com/artist/3TVXtAsR1Inumw...,canadian hip hop,https://i.scdn.co/image/ab6761610000e5eb429338...,66995809,95,artist,spotify:artist:3TVXtAsR1Inumwj472S9r4
1,1Xyo4u8uXC1ZmMpatF05PJ,The Weeknd,https://open.spotify.com/artist/1Xyo4u8uXC1ZmM...,canadian contemporary r&b,https://i.scdn.co/image/ab6761610000e5ebb5f9e2...,49629059,94,artist,spotify:artist:1Xyo4u8uXC1ZmMpatF05PJ
2,246dkjvS1zLTtiykXe5h60,Post Malone,https://open.spotify.com/artist/246dkjvS1zLTti...,dfw rap,https://i.scdn.co/image/ab6761610000e5ebb894ef...,38142353,89,artist,spotify:artist:246dkjvS1zLTtiykXe5h60
3,4MCBfE4596Uoi2O4DtmEMz,Juice WRLD,https://open.spotify.com/artist/4MCBfE4596Uoi2...,chicago rap,https://i.scdn.co/image/ab6761610000e5eb1908e1...,25652049,89,artist,spotify:artist:4MCBfE4596Uoi2O4DtmEMz
4,3fMbdgg4jU18AjLCKBhRSm,Michael Jackson,https://open.spotify.com/artist/3fMbdgg4jU18Aj...,pop,https://i.scdn.co/image/ab6761610000e5eba2a0b9...,24129716,82,artist,spotify:artist:3fMbdgg4jU18AjLCKBhRSm
5,0Y5tJX1MQlPlqiwlOH1tJY,Travis Scott,https://open.spotify.com/artist/0Y5tJX1MQlPlqi...,hip hop,https://i.scdn.co/image/ab6761610000e5ebe707b8...,21019843,87,artist,spotify:artist:0Y5tJX1MQlPlqiwlOH1tJY
6,5K4W6rqBFWDnAN6FQUkS6x,Kanye West,https://open.spotify.com/artist/5K4W6rqBFWDnAN...,chicago rap,https://i.scdn.co/image/ab6761610000e5eb867008...,18214609,91,artist,spotify:artist:5K4W6rqBFWDnAN6FQUkS6x
7,6l3HvQ5sa6mXTsMTB19rO5,J. Cole,https://open.spotify.com/artist/6l3HvQ5sa6mXTs...,conscious hip hop,https://i.scdn.co/image/ab6761610000e5ebadd503...,17879851,85,artist,spotify:artist:6l3HvQ5sa6mXTsMTB19rO5
8,4O15NlyKLIASxsJ0PrXPfz,Lil Uzi Vert,https://open.spotify.com/artist/4O15NlyKLIASxs...,melodic rap,https://i.scdn.co/image/ab6761610000e5eb30122c...,13770423,85,artist,spotify:artist:4O15NlyKLIASxsJ0PrXPfz
9,1RyvyyTE3xzB2ZywiAwp0i,Future,https://open.spotify.com/artist/1RyvyyTE3xzB2Z...,atl hip hop,https://i.scdn.co/image/ab6761610000e5eb24e41f...,12133503,88,artist,spotify:artist:1RyvyyTE3xzB2ZywiAwp0i


### As we can see from the output above we successfully created a view with the top 10 artist by number of followers. Now to create the 3rd view.

## Top songs by artist in terms of tempo:

In [55]:
db_conn.execute('''DROP VIEW IF EXISTS top_5_songs_tempo_view;''')

<sqlite3.Cursor at 0x1cf2a77a030>

In [56]:
db_conn.execute('''
CREATE VIEW top_5_songs_tempo_view AS
WITH top_tempo_cte AS
-- subquery for cte, need to rank songs by tempo
(SELECT ar.artist_name, t.song_name, tf.tempo,
    DENSE_RANK() OVER (PARTITION BY ar.artist_name ORDER BY tf.tempo DESC) as tempo_rank_by_artist
FROM Artist ar
-- join necessary tables together
LEFT JOIN Album a 
    ON a.artist_id = ar.artist_id
LEFT JOIN Track t 
    ON a.album_id = t.album_id
LEFT JOIN Track_Feature tf
    ON t.track_id = tf.track_id)
-- query the cte for top 5 tracks with highest tempo
SELECT *
FROM top_tempo_cte
WHERE tempo_rank_by_artist BETWEEN 1 AND 5
ORDER BY artist_name ASC, tempo_rank_by_artist ASC;
''')

<sqlite3.Cursor at 0x1cf2a77ace0>

In [61]:
Q('''
SELECT *
FROM top_5_songs_tempo_view;
''')

Unnamed: 0,artist_name,song_name,tempo,tempo_rank_by_artist
0,Baby Keem,no sense,180.013,1
1,Baby Keem,Xmen,170.030,2
2,Baby Keem,lost souls,170.022,3
3,Baby Keem,lost souls (with Brent Faiyaz),169.994,4
4,Baby Keem,Miss Charlotte,161.989,5
...,...,...,...,...
102,Travis Scott,the ends,171.853,1
103,Travis Scott,outside,165.906,2
104,Travis Scott,Motorcycle Patches,162.867,3
105,Travis Scott,HOUSTONFORNICATION,160.042,4


### The view creation was a success, now let's work on our 4th view.

## Top 10 Artists with most explicit tracks:

In [63]:
db_conn.execute('''DROP VIEW IF EXISTS top_10_explicit_artist_view;''')

<sqlite3.Cursor at 0x1cf2aad98f0>

In [71]:
db_conn.execute('''
CREATE VIEW top_10_explicit_artist_view AS
SELECT ar.artist_name, SUM(t.explicit) as num_explicit_tracks
FROM Artist ar
-- join necessary tables together
LEFT JOIN Album a 
    ON a.artist_id = ar.artist_id
LEFT JOIN Track t 
    ON a.album_id = t.album_id
GROUP BY ar.artist_name
ORDER BY num_explicit_tracks DESC
LIMIT 10;
''')

<sqlite3.Cursor at 0x1cf2b917340>

In [72]:
Q('''
SELECT *
FROM top_10_explicit_artist_view;
''')

Unnamed: 0,artist_name,num_explicit_tracks
0,Lil Uzi Vert,145
1,Gunna,134
2,Lil Baby,124
3,Juice WRLD,119
4,NAV,97
5,J. Cole,96
6,Drake,95
7,Post Malone,86
8,Travis Scott,66
9,Playboi Carti,58


In [None]:
Q('''
PRAGMA table_info(Track_Feature);
''')

In [57]:
Q('''
SELECT name 
FROM sqlite_schema 
WHERE type = 'view';
''')

Unnamed: 0,name
0,top_5_songs_ms_view
1,top_10_artist_num_followers_view
2,top_5_songs_tempo_view


In [None]:
Q('''

''')

In [None]:
# commit changes to the database
db_conn.commit()

# close database connection
db_conn.close()