In [1]:
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import cm
import numpy as np
import pandas as pd
import os
%matplotlib inline
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
FOLDER = "figures"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, FOLDER)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)
    
# Set columns view to max
pd.set_option('display.max_columns', None)

In [2]:
file = 'Hot100.csv'
df = pd.read_csv(file)
df

Unnamed: 0,Song_Performer,Song,Performer,Genre,Track_Duration_s,Track_Album,Danceability,Key,Mode,Valence,Tempo,Time_Sign,Date,Rank,Last_Week,Peak_Rank,Weeks_On_Board,Year,Key_Sign
0,'03 Bonnie & Clyde | Jay-Z Featuring Beyonce K...,'03 Bonnie & Clyde,Jay-Z Featuring Beyonce Knowles,"['east coast hip hop', 'hip hop', 'pop rap', '...",205.560,The Blueprint 2 The Gift & The Curse,,,,,,,2020-12-07,8,11.0,8,7,2020.0,NA NA
1,'65 Love Affair | Paul Davis,'65 Love Affair,Paul Davis,"['album rock', 'bubblegum pop', 'country rock'...",219.813,Radio Hits Of the '80s,0.647,D,minor,0.952,155.697,4.0,2019-06-05,9,6.0,6,15,2019.0,D minor
2,"'98 Thug Paradise | Tragedy, Capone, Infinite",'98 Thug Paradise,"Tragedy, Capone, Infinite",['english indie rock'],,,,,,,,,2019-10-17,99,91.0,86,5,2019.0,NA NA
3,'Round We Go | Big Sister,'Round We Go,Big Sister,[],,,,,,,,,2019-04-06,91,,91,1,2019.0,NA NA
4,'til I Can Make It On My Own | Tammy Wynette,'til I Can Make It On My Own,Tammy Wynette,"['country', 'country dawn', 'nashville sound']",182.080,The Essential Tammy Wynette,0.450,G,major,0.146,141.148,4.0,2019-05-01,100,,100,1,2019.0,G major
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24245,Zombie | Bad Wolves,Zombie,Bad Wolves,"['alternative metal', 'metal', 'post-grunge']",254.805,Zombie,0.448,D,minor,0.190,77.093,4.0,2020-04-28,74,67.0,54,6,2020.0,D minor
24246,Zoo York | Lil Tjay Featuring Fivio Foreign & ...,Zoo York,Lil Tjay Featuring Fivio Foreign & Pop Smoke,,,,,,,,,,2020-05-23,65,,65,1,2020.0,NA NA
24247,Zoom | Future,Zoom,Future,"['atl hip hop', 'hip hop', 'pop rap', 'rap', '...",278.429,FUTURE,0.852,A,major,0.627,150.945,4.0,2020-03-11,99,,99,1,2020.0,A major
24248,Zorba The Greek | Herb Alpert & The Tijuana Brass,Zorba The Greek,Herb Alpert & The Tijuana Brass,"['adult standards', 'easy listening', 'lounge']",264.853,!!!Going Places!!!,0.531,F,major,0.192,82.107,4.0,2019-01-29,24,33.0,24,6,2019.0,F major


In [3]:
print(df.dtypes)

Song_Performer       object
Song                 object
Performer            object
Genre                object
Track_Duration_s    float64
Track_Album          object
Danceability        float64
Key                  object
Mode                 object
Valence             float64
Tempo               float64
Time_Sign           float64
Date                 object
Rank                  int64
Last_Week           float64
Peak_Rank             int64
Weeks_On_Board        int64
Year                float64
Key_Sign             object
dtype: object


In [4]:
print(df.shape)

(24250, 19)


In [5]:
print(df.isnull().sum())

Song_Performer         0
Song                   0
Performer              0
Genre               1305
Track_Duration_s    4422
Track_Album         4428
Danceability        4483
Key                 4483
Mode                4483
Valence             4483
Tempo               4483
Time_Sign           4483
Date                  26
Rank                   0
Last_Week           5524
Peak_Rank              0
Weeks_On_Board         0
Year                  26
Key_Sign               0
dtype: int64


In [6]:
df1 = df.drop(['Track_Album', 'Key', 'Mode', 'Tempo', 'Last_Week', 'Year', 'Time_Sign', 'Song_Performer'], axis=1)
df1

Unnamed: 0,Song,Performer,Genre,Track_Duration_s,Danceability,Valence,Date,Rank,Peak_Rank,Weeks_On_Board,Key_Sign
0,'03 Bonnie & Clyde,Jay-Z Featuring Beyonce Knowles,"['east coast hip hop', 'hip hop', 'pop rap', '...",205.560,,,2020-12-07,8,8,7,NA NA
1,'65 Love Affair,Paul Davis,"['album rock', 'bubblegum pop', 'country rock'...",219.813,0.647,0.952,2019-06-05,9,6,15,D minor
2,'98 Thug Paradise,"Tragedy, Capone, Infinite",['english indie rock'],,,,2019-10-17,99,86,5,NA NA
3,'Round We Go,Big Sister,[],,,,2019-04-06,91,91,1,NA NA
4,'til I Can Make It On My Own,Tammy Wynette,"['country', 'country dawn', 'nashville sound']",182.080,0.450,0.146,2019-05-01,100,100,1,G major
...,...,...,...,...,...,...,...,...,...,...,...
24245,Zombie,Bad Wolves,"['alternative metal', 'metal', 'post-grunge']",254.805,0.448,0.190,2020-04-28,74,54,6,D minor
24246,Zoo York,Lil Tjay Featuring Fivio Foreign & Pop Smoke,,,,,2020-05-23,65,65,1,NA NA
24247,Zoom,Future,"['atl hip hop', 'hip hop', 'pop rap', 'rap', '...",278.429,0.852,0.627,2020-03-11,99,99,1,A major
24248,Zorba The Greek,Herb Alpert & The Tijuana Brass,"['adult standards', 'easy listening', 'lounge']",264.853,0.531,0.192,2019-01-29,24,24,6,F major


In [7]:
df1 = df1.dropna()
df1

Unnamed: 0,Song,Performer,Genre,Track_Duration_s,Danceability,Valence,Date,Rank,Peak_Rank,Weeks_On_Board,Key_Sign
1,'65 Love Affair,Paul Davis,"['album rock', 'bubblegum pop', 'country rock'...",219.813,0.647,0.952,2019-06-05,9,6,15,D minor
4,'til I Can Make It On My Own,Tammy Wynette,"['country', 'country dawn', 'nashville sound']",182.080,0.450,0.146,2019-05-01,100,100,1,G major
6,'Til My Baby Comes Home,Luther Vandross,"['funk', 'motown', 'neo soul', 'new jack swing...",332.226,0.804,0.802,2019-03-16,90,90,1,B minor
7,'Til Summer Comes Around,Keith Urban,"['australian country', 'contemporary country',...",331.466,0.570,0.308,2020-04-03,63,59,10,A minor
8,'Til You Do Me Right,After 7,"['funk', 'neo soul', 'new jack swing', 'quiet ...",295.000,0.612,0.433,2019-10-21,31,31,16,F major
...,...,...,...,...,...,...,...,...,...,...,...
24244,Zip Code,The Five Americans,"['bubblegum pop', 'classic garage rock', 'frea...",175.040,0.393,0.927,2019-09-23,58,36,7,A major
24245,Zombie,Bad Wolves,"['alternative metal', 'metal', 'post-grunge']",254.805,0.448,0.190,2020-04-28,74,54,6,D minor
24247,Zoom,Future,"['atl hip hop', 'hip hop', 'pop rap', 'rap', '...",278.429,0.852,0.627,2020-03-11,99,99,1,A major
24248,Zorba The Greek,Herb Alpert & The Tijuana Brass,"['adult standards', 'easy listening', 'lounge']",264.853,0.531,0.192,2019-01-29,24,24,6,F major


In [8]:
df1 = df1.reset_index(drop=True)
df1

Unnamed: 0,Song,Performer,Genre,Track_Duration_s,Danceability,Valence,Date,Rank,Peak_Rank,Weeks_On_Board,Key_Sign
0,'65 Love Affair,Paul Davis,"['album rock', 'bubblegum pop', 'country rock'...",219.813,0.647,0.952,2019-06-05,9,6,15,D minor
1,'til I Can Make It On My Own,Tammy Wynette,"['country', 'country dawn', 'nashville sound']",182.080,0.450,0.146,2019-05-01,100,100,1,G major
2,'Til My Baby Comes Home,Luther Vandross,"['funk', 'motown', 'neo soul', 'new jack swing...",332.226,0.804,0.802,2019-03-16,90,90,1,B minor
3,'Til Summer Comes Around,Keith Urban,"['australian country', 'contemporary country',...",331.466,0.570,0.308,2020-04-03,63,59,10,A minor
4,'Til You Do Me Right,After 7,"['funk', 'neo soul', 'new jack swing', 'quiet ...",295.000,0.612,0.433,2019-10-21,31,31,16,F major
...,...,...,...,...,...,...,...,...,...,...,...
19636,Zip Code,The Five Americans,"['bubblegum pop', 'classic garage rock', 'frea...",175.040,0.393,0.927,2019-09-23,58,36,7,A major
19637,Zombie,Bad Wolves,"['alternative metal', 'metal', 'post-grunge']",254.805,0.448,0.190,2020-04-28,74,54,6,D minor
19638,Zoom,Future,"['atl hip hop', 'hip hop', 'pop rap', 'rap', '...",278.429,0.852,0.627,2020-03-11,99,99,1,A major
19639,Zorba The Greek,Herb Alpert & The Tijuana Brass,"['adult standards', 'easy listening', 'lounge']",264.853,0.531,0.192,2019-01-29,24,24,6,F major


In [9]:
df1["Date"] = pd.to_datetime(df1["Date"]).dt.strftime("%Y%m%d")

In [10]:
df1["Date"] = df1["Date"].astype(str).astype(int)

In [11]:
print(df1["Date"].dtypes)

int32


In [12]:
df1.dtypes

Song                 object
Performer            object
Genre                object
Track_Duration_s    float64
Danceability        float64
Valence             float64
Date                  int32
Rank                  int64
Peak_Rank             int64
Weeks_On_Board        int64
Key_Sign             object
dtype: object

In [13]:
file2 = 'Hot 100 Audio Features.xlsx'
audiofeat_df = pd.read_excel(file2)

In [14]:
audiofeat_df

Unnamed: 0,SongID,Performer,Song,spotify_genre,spotify_track_id,spotify_track_preview_url,spotify_track_duration_ms,spotify_track_explicit,spotify_track_album,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,spotify_track_popularity
0,-twistin'-White Silver SandsBill Black's Combo,Bill Black's Combo,-twistin'-White Silver Sands,[],,,,,,,,,,,,,,,,,,
1,¿Dònde Està Santa Claus? (Where Is Santa Claus...,Augie Rios,¿Dònde Està Santa Claus? (Where Is Santa Claus?),['novelty'],,,,,,,,,,,,,,,,,,
2,......And Roses And RosesAndy Williams,Andy Williams,......And Roses And Roses,"['adult standards', 'brill building pop', 'eas...",3tvqPPpXyIgKrm4PR9HCf0,https://p.scdn.co/mp3-preview/cef4883cfd1e0e53...,166106.0,0.0,The Essential Andy Williams,0.154,0.185,5.0,-14.063,1.0,0.0315,0.91100,0.000267,0.1120,0.150,83.969,4.0,38.0
3,...And Then There Were DrumsSandy Nelson,Sandy Nelson,...And Then There Were Drums,"['rock-and-roll', 'space age pop', 'surf music']",1fHHq3qHU8wpRKHzhojZ4a,,172066.0,0.0,Compelling Percussion,0.588,0.672,11.0,-17.278,0.0,0.0361,0.00256,0.745000,0.1450,0.801,121.962,4.0,11.0
4,...Baby One More TimeBritney Spears,Britney Spears,...Baby One More Time,"['dance pop', 'pop', 'post-teen pop']",3MjUtNVVq3C8Fn0MP3zhXa,https://p.scdn.co/mp3-preview/da2134a161f1cb34...,211066.0,0.0,...Baby One More Time (Digital Deluxe Version),0.759,0.699,0.0,-5.745,0.0,0.0307,0.20200,0.000131,0.4430,0.907,92.960,4.0,77.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29498,Zoo YorkLil Tjay Featuring Fivio Foreign & Pop...,Lil Tjay Featuring Fivio Foreign & Pop Smoke,Zoo York,,,,,,,,,,,,,,,,,,,
29499,ZoomFuture,Future,Zoom,"['atl hip hop', 'hip hop', 'pop rap', 'rap', '...",2IG6Te7JyvrtqhFeOF7le4,https://p.scdn.co/mp3-preview/cb8fde6edc08e70a...,278429.0,1.0,FUTURE,0.852,0.438,9.0,-7.673,1.0,0.4260,0.01450,0.000000,0.2630,0.627,150.945,4.0,51.0
29500,ZoomLil' Boosie Featuring Yung Joc,Lil' Boosie Featuring Yung Joc,Zoom,"['baton rouge rap', 'deep southern trap']",,,,,,,,,,,,,,,,,,
29501,Zorba The GreekHerb Alpert & The Tijuana Brass,Herb Alpert & The Tijuana Brass,Zorba The Greek,"['adult standards', 'easy listening', 'lounge']",3WLEVNohakzZmMpN5W7mHK,https://p.scdn.co/mp3-preview/1841a4034ba42fc0...,264853.0,0.0,!!!Going Places!!!,0.531,0.642,5.0,-12.702,1.0,0.3230,0.15400,0.279000,0.0584,0.192,82.107,4.0,35.0


In [15]:
df_merged = pd.merge(df1, audiofeat_df, on='Song')
df_merged

Unnamed: 0,Song,Performer_x,Genre,Track_Duration_s,Danceability,Valence,Date,Rank,Peak_Rank,Weeks_On_Board,Key_Sign,SongID,Performer_y,spotify_genre,spotify_track_id,spotify_track_preview_url,spotify_track_duration_ms,spotify_track_explicit,spotify_track_album,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,spotify_track_popularity
0,'65 Love Affair,Paul Davis,"['album rock', 'bubblegum pop', 'country rock'...",219.813,0.647,0.952,20190605,9,6,15,D minor,'65 Love AffairPaul Davis,Paul Davis,"['album rock', 'bubblegum pop', 'country rock'...",5nBp8F6tekSrnFg8G2Pvug,https://p.scdn.co/mp3-preview/a701445830ecacfb...,219813.0,0.0,Radio Hits Of the '80s,0.647,0.686,2.0,-4.247,0.0,0.0274,0.43200,0.000006,0.1330,0.952,155.697,4.0,40.0
1,'til I Can Make It On My Own,Tammy Wynette,"['country', 'country dawn', 'nashville sound']",182.080,0.450,0.146,20190501,100,100,1,G major,'til I Can Make It On My OwnTammy Wynette,Tammy Wynette,"['country', 'country dawn', 'nashville sound']",0aJHZYjwbfTmeyUWF7zGxI,https://p.scdn.co/mp3-preview/73a24c43d2b48dc5...,182080.0,0.0,The Essential Tammy Wynette,0.450,0.294,7.0,-12.022,1.0,0.0318,0.83200,0.000035,0.1080,0.146,141.148,4.0,31.0
2,'Til My Baby Comes Home,Luther Vandross,"['funk', 'motown', 'neo soul', 'new jack swing...",332.226,0.804,0.802,20190316,90,90,1,B minor,'Til My Baby Comes HomeLuther Vandross,Luther Vandross,"['funk', 'motown', 'neo soul', 'new jack swing...",2R97RZWUx4vAFbMMtmb5Ss,https://p.scdn.co/mp3-preview/c54184ec2fc7e8bd...,332226.0,0.0,The Night I Fell In Love,0.804,0.714,11.0,-6.714,0.0,0.1830,0.05670,0.000006,0.0253,0.802,139.663,4.0,29.0
3,'Til Summer Comes Around,Keith Urban,"['australian country', 'contemporary country',...",331.466,0.570,0.308,20200403,63,59,10,A minor,'Til Summer Comes AroundKeith Urban,Keith Urban,"['australian country', 'contemporary country',...",1CKmI1IQjVEVB3F7VmJmM3,,331466.0,0.0,Defying Gravity,0.570,0.629,9.0,-7.608,0.0,0.0331,0.59300,0.000136,0.7700,0.308,127.907,4.0,42.0
4,'Til You Do Me Right,After 7,"['funk', 'neo soul', 'new jack swing', 'quiet ...",295.000,0.612,0.433,20191021,31,31,16,F major,'Til You Do Me RightAfter 7,After 7,"['funk', 'neo soul', 'new jack swing', 'quiet ...",3kGMziz884MLV1oCwrarmN,,295000.0,0.0,Reflections,0.612,0.542,5.0,-7.563,1.0,0.0264,0.07810,0.000000,0.0763,0.433,76.744,4.0,42.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24106,Zombie,Bad Wolves,"['alternative metal', 'metal', 'post-grunge']",254.805,0.448,0.190,20200428,74,54,6,D minor,ZombieBad Wolves,Bad Wolves,"['alternative metal', 'metal', 'post-grunge']",1vNoA9F5ASnlBISFekDmg3,https://p.scdn.co/mp3-preview/4978147ac417f060...,254805.0,0.0,Zombie,0.448,0.826,2.0,-3.244,0.0,0.0319,0.00756,0.000000,0.1170,0.190,77.093,4.0,74.0
24107,Zoom,Future,"['atl hip hop', 'hip hop', 'pop rap', 'rap', '...",278.429,0.852,0.627,20200311,99,99,1,A major,ZoomFuture,Future,"['atl hip hop', 'hip hop', 'pop rap', 'rap', '...",2IG6Te7JyvrtqhFeOF7le4,https://p.scdn.co/mp3-preview/cb8fde6edc08e70a...,278429.0,1.0,FUTURE,0.852,0.438,9.0,-7.673,1.0,0.4260,0.01450,0.000000,0.2630,0.627,150.945,4.0,51.0
24108,Zoom,Future,"['atl hip hop', 'hip hop', 'pop rap', 'rap', '...",278.429,0.852,0.627,20200311,99,99,1,A major,ZoomLil' Boosie Featuring Yung Joc,Lil' Boosie Featuring Yung Joc,"['baton rouge rap', 'deep southern trap']",,,,,,,,,,,,,,,,,,
24109,Zorba The Greek,Herb Alpert & The Tijuana Brass,"['adult standards', 'easy listening', 'lounge']",264.853,0.531,0.192,20190129,24,24,6,F major,Zorba The GreekHerb Alpert & The Tijuana Brass,Herb Alpert & The Tijuana Brass,"['adult standards', 'easy listening', 'lounge']",3WLEVNohakzZmMpN5W7mHK,https://p.scdn.co/mp3-preview/1841a4034ba42fc0...,264853.0,0.0,!!!Going Places!!!,0.531,0.642,5.0,-12.702,1.0,0.3230,0.15400,0.279000,0.0584,0.192,82.107,4.0,35.0


In [16]:
df_merged = df_merged.drop(['valence','tempo','time_signature','spotify_track_popularity'], axis=1)
df_merged

Unnamed: 0,Song,Performer_x,Genre,Track_Duration_s,Danceability,Valence,Date,Rank,Peak_Rank,Weeks_On_Board,Key_Sign,SongID,Performer_y,spotify_genre,spotify_track_id,spotify_track_preview_url,spotify_track_duration_ms,spotify_track_explicit,spotify_track_album,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness
0,'65 Love Affair,Paul Davis,"['album rock', 'bubblegum pop', 'country rock'...",219.813,0.647,0.952,20190605,9,6,15,D minor,'65 Love AffairPaul Davis,Paul Davis,"['album rock', 'bubblegum pop', 'country rock'...",5nBp8F6tekSrnFg8G2Pvug,https://p.scdn.co/mp3-preview/a701445830ecacfb...,219813.0,0.0,Radio Hits Of the '80s,0.647,0.686,2.0,-4.247,0.0,0.0274,0.43200,0.000006,0.1330
1,'til I Can Make It On My Own,Tammy Wynette,"['country', 'country dawn', 'nashville sound']",182.080,0.450,0.146,20190501,100,100,1,G major,'til I Can Make It On My OwnTammy Wynette,Tammy Wynette,"['country', 'country dawn', 'nashville sound']",0aJHZYjwbfTmeyUWF7zGxI,https://p.scdn.co/mp3-preview/73a24c43d2b48dc5...,182080.0,0.0,The Essential Tammy Wynette,0.450,0.294,7.0,-12.022,1.0,0.0318,0.83200,0.000035,0.1080
2,'Til My Baby Comes Home,Luther Vandross,"['funk', 'motown', 'neo soul', 'new jack swing...",332.226,0.804,0.802,20190316,90,90,1,B minor,'Til My Baby Comes HomeLuther Vandross,Luther Vandross,"['funk', 'motown', 'neo soul', 'new jack swing...",2R97RZWUx4vAFbMMtmb5Ss,https://p.scdn.co/mp3-preview/c54184ec2fc7e8bd...,332226.0,0.0,The Night I Fell In Love,0.804,0.714,11.0,-6.714,0.0,0.1830,0.05670,0.000006,0.0253
3,'Til Summer Comes Around,Keith Urban,"['australian country', 'contemporary country',...",331.466,0.570,0.308,20200403,63,59,10,A minor,'Til Summer Comes AroundKeith Urban,Keith Urban,"['australian country', 'contemporary country',...",1CKmI1IQjVEVB3F7VmJmM3,,331466.0,0.0,Defying Gravity,0.570,0.629,9.0,-7.608,0.0,0.0331,0.59300,0.000136,0.7700
4,'Til You Do Me Right,After 7,"['funk', 'neo soul', 'new jack swing', 'quiet ...",295.000,0.612,0.433,20191021,31,31,16,F major,'Til You Do Me RightAfter 7,After 7,"['funk', 'neo soul', 'new jack swing', 'quiet ...",3kGMziz884MLV1oCwrarmN,,295000.0,0.0,Reflections,0.612,0.542,5.0,-7.563,1.0,0.0264,0.07810,0.000000,0.0763
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24106,Zombie,Bad Wolves,"['alternative metal', 'metal', 'post-grunge']",254.805,0.448,0.190,20200428,74,54,6,D minor,ZombieBad Wolves,Bad Wolves,"['alternative metal', 'metal', 'post-grunge']",1vNoA9F5ASnlBISFekDmg3,https://p.scdn.co/mp3-preview/4978147ac417f060...,254805.0,0.0,Zombie,0.448,0.826,2.0,-3.244,0.0,0.0319,0.00756,0.000000,0.1170
24107,Zoom,Future,"['atl hip hop', 'hip hop', 'pop rap', 'rap', '...",278.429,0.852,0.627,20200311,99,99,1,A major,ZoomFuture,Future,"['atl hip hop', 'hip hop', 'pop rap', 'rap', '...",2IG6Te7JyvrtqhFeOF7le4,https://p.scdn.co/mp3-preview/cb8fde6edc08e70a...,278429.0,1.0,FUTURE,0.852,0.438,9.0,-7.673,1.0,0.4260,0.01450,0.000000,0.2630
24108,Zoom,Future,"['atl hip hop', 'hip hop', 'pop rap', 'rap', '...",278.429,0.852,0.627,20200311,99,99,1,A major,ZoomLil' Boosie Featuring Yung Joc,Lil' Boosie Featuring Yung Joc,"['baton rouge rap', 'deep southern trap']",,,,,,,,,,,,,,
24109,Zorba The Greek,Herb Alpert & The Tijuana Brass,"['adult standards', 'easy listening', 'lounge']",264.853,0.531,0.192,20190129,24,24,6,F major,Zorba The GreekHerb Alpert & The Tijuana Brass,Herb Alpert & The Tijuana Brass,"['adult standards', 'easy listening', 'lounge']",3WLEVNohakzZmMpN5W7mHK,https://p.scdn.co/mp3-preview/1841a4034ba42fc0...,264853.0,0.0,!!!Going Places!!!,0.531,0.642,5.0,-12.702,1.0,0.3230,0.15400,0.279000,0.0584


In [17]:
df_merged = df_merged.drop(['SongID',
                            'Performer_y','spotify_genre',
                            'spotify_track_id','spotify_track_preview_url',
                            'spotify_track_duration_ms','spotify_track_explicit',
                            'key','mode'], axis=1)

In [18]:
df_merged = df_merged.rename(columns={'Performer_x': 'Performer'})

In [19]:
df_merged = df_merged.drop(['danceability'], axis=1)
df_merged

Unnamed: 0,Song,Performer,Genre,Track_Duration_s,Danceability,Valence,Date,Rank,Peak_Rank,Weeks_On_Board,Key_Sign,spotify_track_album,energy,loudness,speechiness,acousticness,instrumentalness,liveness
0,'65 Love Affair,Paul Davis,"['album rock', 'bubblegum pop', 'country rock'...",219.813,0.647,0.952,20190605,9,6,15,D minor,Radio Hits Of the '80s,0.686,-4.247,0.0274,0.43200,0.000006,0.1330
1,'til I Can Make It On My Own,Tammy Wynette,"['country', 'country dawn', 'nashville sound']",182.080,0.450,0.146,20190501,100,100,1,G major,The Essential Tammy Wynette,0.294,-12.022,0.0318,0.83200,0.000035,0.1080
2,'Til My Baby Comes Home,Luther Vandross,"['funk', 'motown', 'neo soul', 'new jack swing...",332.226,0.804,0.802,20190316,90,90,1,B minor,The Night I Fell In Love,0.714,-6.714,0.1830,0.05670,0.000006,0.0253
3,'Til Summer Comes Around,Keith Urban,"['australian country', 'contemporary country',...",331.466,0.570,0.308,20200403,63,59,10,A minor,Defying Gravity,0.629,-7.608,0.0331,0.59300,0.000136,0.7700
4,'Til You Do Me Right,After 7,"['funk', 'neo soul', 'new jack swing', 'quiet ...",295.000,0.612,0.433,20191021,31,31,16,F major,Reflections,0.542,-7.563,0.0264,0.07810,0.000000,0.0763
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24106,Zombie,Bad Wolves,"['alternative metal', 'metal', 'post-grunge']",254.805,0.448,0.190,20200428,74,54,6,D minor,Zombie,0.826,-3.244,0.0319,0.00756,0.000000,0.1170
24107,Zoom,Future,"['atl hip hop', 'hip hop', 'pop rap', 'rap', '...",278.429,0.852,0.627,20200311,99,99,1,A major,FUTURE,0.438,-7.673,0.4260,0.01450,0.000000,0.2630
24108,Zoom,Future,"['atl hip hop', 'hip hop', 'pop rap', 'rap', '...",278.429,0.852,0.627,20200311,99,99,1,A major,,,,,,,
24109,Zorba The Greek,Herb Alpert & The Tijuana Brass,"['adult standards', 'easy listening', 'lounge']",264.853,0.531,0.192,20190129,24,24,6,F major,!!!Going Places!!!,0.642,-12.702,0.3230,0.15400,0.279000,0.0584


In [20]:
df_merged.columns = df_merged.columns.str.lower()
df_merged

Unnamed: 0,song,performer,genre,track_duration_s,danceability,valence,date,rank,peak_rank,weeks_on_board,key_sign,spotify_track_album,energy,loudness,speechiness,acousticness,instrumentalness,liveness
0,'65 Love Affair,Paul Davis,"['album rock', 'bubblegum pop', 'country rock'...",219.813,0.647,0.952,20190605,9,6,15,D minor,Radio Hits Of the '80s,0.686,-4.247,0.0274,0.43200,0.000006,0.1330
1,'til I Can Make It On My Own,Tammy Wynette,"['country', 'country dawn', 'nashville sound']",182.080,0.450,0.146,20190501,100,100,1,G major,The Essential Tammy Wynette,0.294,-12.022,0.0318,0.83200,0.000035,0.1080
2,'Til My Baby Comes Home,Luther Vandross,"['funk', 'motown', 'neo soul', 'new jack swing...",332.226,0.804,0.802,20190316,90,90,1,B minor,The Night I Fell In Love,0.714,-6.714,0.1830,0.05670,0.000006,0.0253
3,'Til Summer Comes Around,Keith Urban,"['australian country', 'contemporary country',...",331.466,0.570,0.308,20200403,63,59,10,A minor,Defying Gravity,0.629,-7.608,0.0331,0.59300,0.000136,0.7700
4,'Til You Do Me Right,After 7,"['funk', 'neo soul', 'new jack swing', 'quiet ...",295.000,0.612,0.433,20191021,31,31,16,F major,Reflections,0.542,-7.563,0.0264,0.07810,0.000000,0.0763
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24106,Zombie,Bad Wolves,"['alternative metal', 'metal', 'post-grunge']",254.805,0.448,0.190,20200428,74,54,6,D minor,Zombie,0.826,-3.244,0.0319,0.00756,0.000000,0.1170
24107,Zoom,Future,"['atl hip hop', 'hip hop', 'pop rap', 'rap', '...",278.429,0.852,0.627,20200311,99,99,1,A major,FUTURE,0.438,-7.673,0.4260,0.01450,0.000000,0.2630
24108,Zoom,Future,"['atl hip hop', 'hip hop', 'pop rap', 'rap', '...",278.429,0.852,0.627,20200311,99,99,1,A major,,,,,,,
24109,Zorba The Greek,Herb Alpert & The Tijuana Brass,"['adult standards', 'easy listening', 'lounge']",264.853,0.531,0.192,20190129,24,24,6,F major,!!!Going Places!!!,0.642,-12.702,0.3230,0.15400,0.279000,0.0584


In [21]:
final_df = df_merged.dropna()
final_df

Unnamed: 0,song,performer,genre,track_duration_s,danceability,valence,date,rank,peak_rank,weeks_on_board,key_sign,spotify_track_album,energy,loudness,speechiness,acousticness,instrumentalness,liveness
0,'65 Love Affair,Paul Davis,"['album rock', 'bubblegum pop', 'country rock'...",219.813,0.647,0.952,20190605,9,6,15,D minor,Radio Hits Of the '80s,0.686,-4.247,0.0274,0.43200,0.000006,0.1330
1,'til I Can Make It On My Own,Tammy Wynette,"['country', 'country dawn', 'nashville sound']",182.080,0.450,0.146,20190501,100,100,1,G major,The Essential Tammy Wynette,0.294,-12.022,0.0318,0.83200,0.000035,0.1080
2,'Til My Baby Comes Home,Luther Vandross,"['funk', 'motown', 'neo soul', 'new jack swing...",332.226,0.804,0.802,20190316,90,90,1,B minor,The Night I Fell In Love,0.714,-6.714,0.1830,0.05670,0.000006,0.0253
3,'Til Summer Comes Around,Keith Urban,"['australian country', 'contemporary country',...",331.466,0.570,0.308,20200403,63,59,10,A minor,Defying Gravity,0.629,-7.608,0.0331,0.59300,0.000136,0.7700
4,'Til You Do Me Right,After 7,"['funk', 'neo soul', 'new jack swing', 'quiet ...",295.000,0.612,0.433,20191021,31,31,16,F major,Reflections,0.542,-7.563,0.0264,0.07810,0.000000,0.0763
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24105,Zip Code,The Five Americans,"['bubblegum pop', 'classic garage rock', 'frea...",175.040,0.393,0.927,20190923,58,36,7,A major,The Five Americans (Rerecorded Versions),0.594,-5.986,0.0267,0.03940,0.000000,0.0479
24106,Zombie,Bad Wolves,"['alternative metal', 'metal', 'post-grunge']",254.805,0.448,0.190,20200428,74,54,6,D minor,Zombie,0.826,-3.244,0.0319,0.00756,0.000000,0.1170
24107,Zoom,Future,"['atl hip hop', 'hip hop', 'pop rap', 'rap', '...",278.429,0.852,0.627,20200311,99,99,1,A major,FUTURE,0.438,-7.673,0.4260,0.01450,0.000000,0.2630
24109,Zorba The Greek,Herb Alpert & The Tijuana Brass,"['adult standards', 'easy listening', 'lounge']",264.853,0.531,0.192,20190129,24,24,6,F major,!!!Going Places!!!,0.642,-12.702,0.3230,0.15400,0.279000,0.0584


In [22]:
print(final_df.isnull().sum())

song                   0
performer              0
genre                  0
track_duration_s       0
danceability           0
valence                0
date                   0
rank                   0
peak_rank              0
weeks_on_board         0
key_sign               0
spotify_track_album    0
energy                 0
loudness               0
speechiness            0
acousticness           0
instrumentalness       0
liveness               0
dtype: int64


In [23]:
final_df = final_df.reset_index(drop=True)
final_df

Unnamed: 0,song,performer,genre,track_duration_s,danceability,valence,date,rank,peak_rank,weeks_on_board,key_sign,spotify_track_album,energy,loudness,speechiness,acousticness,instrumentalness,liveness
0,'65 Love Affair,Paul Davis,"['album rock', 'bubblegum pop', 'country rock'...",219.813,0.647,0.952,20190605,9,6,15,D minor,Radio Hits Of the '80s,0.686,-4.247,0.0274,0.43200,0.000006,0.1330
1,'til I Can Make It On My Own,Tammy Wynette,"['country', 'country dawn', 'nashville sound']",182.080,0.450,0.146,20190501,100,100,1,G major,The Essential Tammy Wynette,0.294,-12.022,0.0318,0.83200,0.000035,0.1080
2,'Til My Baby Comes Home,Luther Vandross,"['funk', 'motown', 'neo soul', 'new jack swing...",332.226,0.804,0.802,20190316,90,90,1,B minor,The Night I Fell In Love,0.714,-6.714,0.1830,0.05670,0.000006,0.0253
3,'Til Summer Comes Around,Keith Urban,"['australian country', 'contemporary country',...",331.466,0.570,0.308,20200403,63,59,10,A minor,Defying Gravity,0.629,-7.608,0.0331,0.59300,0.000136,0.7700
4,'Til You Do Me Right,After 7,"['funk', 'neo soul', 'new jack swing', 'quiet ...",295.000,0.612,0.433,20191021,31,31,16,F major,Reflections,0.542,-7.563,0.0264,0.07810,0.000000,0.0763
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23577,Zip Code,The Five Americans,"['bubblegum pop', 'classic garage rock', 'frea...",175.040,0.393,0.927,20190923,58,36,7,A major,The Five Americans (Rerecorded Versions),0.594,-5.986,0.0267,0.03940,0.000000,0.0479
23578,Zombie,Bad Wolves,"['alternative metal', 'metal', 'post-grunge']",254.805,0.448,0.190,20200428,74,54,6,D minor,Zombie,0.826,-3.244,0.0319,0.00756,0.000000,0.1170
23579,Zoom,Future,"['atl hip hop', 'hip hop', 'pop rap', 'rap', '...",278.429,0.852,0.627,20200311,99,99,1,A major,FUTURE,0.438,-7.673,0.4260,0.01450,0.000000,0.2630
23580,Zorba The Greek,Herb Alpert & The Tijuana Brass,"['adult standards', 'easy listening', 'lounge']",264.853,0.531,0.192,20190129,24,24,6,F major,!!!Going Places!!!,0.642,-12.702,0.3230,0.15400,0.279000,0.0584


In [24]:
print(final_df.dtypes)

song                    object
performer               object
genre                   object
track_duration_s       float64
danceability           float64
valence                float64
date                     int32
rank                     int64
peak_rank                int64
weeks_on_board           int64
key_sign                object
spotify_track_album     object
energy                 float64
loudness               float64
speechiness            float64
acousticness           float64
instrumentalness       float64
liveness               float64
dtype: object


In [25]:
print(final_df.shape)

(23582, 18)


In [26]:
print(final_df.isnull().sum())

song                   0
performer              0
genre                  0
track_duration_s       0
danceability           0
valence                0
date                   0
rank                   0
peak_rank              0
weeks_on_board         0
key_sign               0
spotify_track_album    0
energy                 0
loudness               0
speechiness            0
acousticness           0
instrumentalness       0
liveness               0
dtype: int64


In [27]:
# Clear up Genre Column by importing 10k mtv music artist csv files
# merge all four csv files into one csv then merge with final_df and 
# replace original genre column w/ new one

f1 = pd.read_csv('10000-MTV-Music-Artists-page-1.csv')
f2 = pd.read_csv('10000-MTV-Music-Artists-page-2.csv')
f3 = pd.read_csv('10000-MTV-Music-Artists-page-3.csv')
f4 = pd.read_csv('10000-MTV-Music-Artists-page-4.csv')

In [28]:
# Merge all vertically
merged_mtv = pd.concat([f1,f2,f3,f4], axis=0)
merged_mtv

Unnamed: 0,name,facebook,twitter,website,genre,mtv
0,Adele,http://www.facebook.com/9770929278,http://www.twitter.com/officialadele,,Pop,http://www.mtv.com/artists/adele/biography
1,Joey + Rory,http://www.facebook.com/15044507815,http://www.twitter.com/joeyandrory,,Country,http://www.cmt.com/artists/joey-rory/biography
2,Draaco Aventura,http://www.facebook.com/856796091053581,http://www.twitter.com/DraacoAventura,http://www.bandpage.com/draacoaventura,Pop Latino,http://www.mtv.com/artists/draaco-aventura/bio...
3,Justin Bieber,http://www.facebook.com/309570926875,http://www.twitter.com/justinbieber,http://www.justinbiebermusic.com,Pop,http://www.mtv.com/artists/justin-bieber/biogr...
4,Peer van Mladen,http://www.facebook.com/264487966,http://www.twitter.com/Predrag_Jugovic,http://pejaintergroup.eu/Peer_van_Mladen.html,House,http://www.mtv.com/artists/peer-van-mladen/bio...
...,...,...,...,...,...,...
1012,Hermann Kopp,,,,,http://www.mtv.com/artists/hermann-kopp/biography
1013,Greg West - Ultimate Aldean,,,http://www.ultimatealdean.com,Country,http://www.cmt.com/artists/greg-west-ultimate-...
1014,J-Starr the Prince,http://www.facebook.com/179080192291541,http://twitter.com/Jstarr_Prince,http://www.jstarrtheprince.com,Hip-Hop/Rap,http://www.mtv.com/artists/j-starr-the-prince/...
1015,Cardiknox,http://www.facebook.com/Cardiknox,http://www.twitter.com/cardiknox,http://www.cardiknox.com,Electronic,http://www.mtv.com/artists/cardiknox/biography


In [29]:
# Drop unecessary columns
merged_mtv = merged_mtv.drop(['facebook', 'twitter', 
                              'website', 'mtv'], axis=1)

In [30]:
merged_mtv = merged_mtv.rename(columns={'name': 'performer'})
merged_mtv.reset_index(drop=True)

Unnamed: 0,performer,genre
0,Adele,Pop
1,Joey + Rory,Country
2,Draaco Aventura,Pop Latino
3,Justin Bieber,Pop
4,Peer van Mladen,House
...,...,...
8339,Hermann Kopp,
8340,Greg West - Ultimate Aldean,Country
8341,J-Starr the Prince,Hip-Hop/Rap
8342,Cardiknox,Electronic


In [32]:
final_df = final_df.drop(['genre'], axis=1)
final_df

Unnamed: 0,song,performer,track_duration_s,danceability,valence,date,rank,peak_rank,weeks_on_board,key_sign,spotify_track_album,energy,loudness,speechiness,acousticness,instrumentalness,liveness
0,'65 Love Affair,Paul Davis,219.813,0.647,0.952,20190605,9,6,15,D minor,Radio Hits Of the '80s,0.686,-4.247,0.0274,0.43200,0.000006,0.1330
1,'til I Can Make It On My Own,Tammy Wynette,182.080,0.450,0.146,20190501,100,100,1,G major,The Essential Tammy Wynette,0.294,-12.022,0.0318,0.83200,0.000035,0.1080
2,'Til My Baby Comes Home,Luther Vandross,332.226,0.804,0.802,20190316,90,90,1,B minor,The Night I Fell In Love,0.714,-6.714,0.1830,0.05670,0.000006,0.0253
3,'Til Summer Comes Around,Keith Urban,331.466,0.570,0.308,20200403,63,59,10,A minor,Defying Gravity,0.629,-7.608,0.0331,0.59300,0.000136,0.7700
4,'Til You Do Me Right,After 7,295.000,0.612,0.433,20191021,31,31,16,F major,Reflections,0.542,-7.563,0.0264,0.07810,0.000000,0.0763
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23577,Zip Code,The Five Americans,175.040,0.393,0.927,20190923,58,36,7,A major,The Five Americans (Rerecorded Versions),0.594,-5.986,0.0267,0.03940,0.000000,0.0479
23578,Zombie,Bad Wolves,254.805,0.448,0.190,20200428,74,54,6,D minor,Zombie,0.826,-3.244,0.0319,0.00756,0.000000,0.1170
23579,Zoom,Future,278.429,0.852,0.627,20200311,99,99,1,A major,FUTURE,0.438,-7.673,0.4260,0.01450,0.000000,0.2630
23580,Zorba The Greek,Herb Alpert & The Tijuana Brass,264.853,0.531,0.192,20190129,24,24,6,F major,!!!Going Places!!!,0.642,-12.702,0.3230,0.15400,0.279000,0.0584


In [33]:
final_df = final_df.drop(['spotify_track_album'], axis=1)
final_df

Unnamed: 0,song,performer,track_duration_s,danceability,valence,date,rank,peak_rank,weeks_on_board,key_sign,energy,loudness,speechiness,acousticness,instrumentalness,liveness
0,'65 Love Affair,Paul Davis,219.813,0.647,0.952,20190605,9,6,15,D minor,0.686,-4.247,0.0274,0.43200,0.000006,0.1330
1,'til I Can Make It On My Own,Tammy Wynette,182.080,0.450,0.146,20190501,100,100,1,G major,0.294,-12.022,0.0318,0.83200,0.000035,0.1080
2,'Til My Baby Comes Home,Luther Vandross,332.226,0.804,0.802,20190316,90,90,1,B minor,0.714,-6.714,0.1830,0.05670,0.000006,0.0253
3,'Til Summer Comes Around,Keith Urban,331.466,0.570,0.308,20200403,63,59,10,A minor,0.629,-7.608,0.0331,0.59300,0.000136,0.7700
4,'Til You Do Me Right,After 7,295.000,0.612,0.433,20191021,31,31,16,F major,0.542,-7.563,0.0264,0.07810,0.000000,0.0763
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23577,Zip Code,The Five Americans,175.040,0.393,0.927,20190923,58,36,7,A major,0.594,-5.986,0.0267,0.03940,0.000000,0.0479
23578,Zombie,Bad Wolves,254.805,0.448,0.190,20200428,74,54,6,D minor,0.826,-3.244,0.0319,0.00756,0.000000,0.1170
23579,Zoom,Future,278.429,0.852,0.627,20200311,99,99,1,A major,0.438,-7.673,0.4260,0.01450,0.000000,0.2630
23580,Zorba The Greek,Herb Alpert & The Tijuana Brass,264.853,0.531,0.192,20190129,24,24,6,F major,0.642,-12.702,0.3230,0.15400,0.279000,0.0584


In [35]:
# Export to final_df to csv
final_df.to_csv('final_df.csv', index=False)