In [1]:
"""
Pandas demonstration of basic capabilities for the DE team
"""
import pandas as pd


In [2]:
# create simple DF
simple_df = pd.DataFrame({"header": ['val 1.1','val 1.2'], "header 1": ['val 2.1', 'val 2.2']})
simple_df

Unnamed: 0,header,header 1
0,val 1.1,val 2.1
1,val 1.2,val 2.2


In [3]:
# create simple DF with indexed strings
simple_indexed_df = pd.DataFrame({"header": ['val 1.1','val 1.2'], "header 1": ['val 2.1', 'val 2.2']},index=['index 1', 'index 2'])
simple_indexed_df

Unnamed: 0,header,header 1
index 1,val 1.1,val 2.1
index 2,val 1.2,val 2.2


In [4]:
# read csv file
df = pd.read_csv('./spotify-2023.csv', encoding='latin-1')
df.head(10)

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,125,B,Major,80,89,83,31,0,8,4
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,92,C#,Major,71,61,74,7,0,10,4
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,138,F,Major,51,32,53,17,0,31,6
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,170,A,Major,55,58,72,11,0,11,15
4,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,144,A,Minor,65,23,80,14,63,11,6
5,Sprinter,"Dave, Central Cee",2,2023,6,1,2186,91,183706234,67,...,141,C#,Major,92,66,58,19,0,8,24
6,Ella Baila Sola,"Eslabon Armado, Peso Pluma",2,2023,3,16,3090,50,725980112,34,...,148,F,Minor,67,83,76,48,0,8,3
7,Columbia,Quevedo,1,2023,7,7,714,43,58149378,25,...,100,F,Major,67,26,71,37,0,11,4
8,fukumean,Gunna,1,2023,5,15,1096,83,95217315,60,...,130,C#,Minor,85,22,62,12,0,28,9
9,La Bebe - Remix,"Peso Pluma, Yng Lvcas",2,2023,3,17,2953,44,553634067,49,...,170,D,Minor,81,56,48,21,0,8,33


In [5]:
# size information
df.shape

(953, 24)

In [6]:
# get one column by name
df.track_name.head(10)

0    Seven (feat. Latto) (Explicit Ver.)
1                                   LALA
2                                vampire
3                           Cruel Summer
4                         WHERE SHE GOES
5                               Sprinter
6                        Ella Baila Sola
7                               Columbia
8                               fukumean
9                        La Bebe - Remix
Name: track_name, dtype: object

In [7]:
# second way get one column by name + first element
df['track_name'][0]

'Seven (feat. Latto) (Explicit Ver.)'

In [8]:
# get information by index
df.iloc[0]

track_name              Seven (feat. Latto) (Explicit Ver.)
artist(s)_name                             Latto, Jung Kook
artist_count                                              2
released_year                                          2023
released_month                                            7
released_day                                             14
in_spotify_playlists                                    553
in_spotify_charts                                       147
streams                                           141381703
in_apple_playlists                                       43
in_apple_charts                                         263
in_deezer_playlists                                      45
in_deezer_charts                                         10
in_shazam_charts                                        826
bpm                                                     125
key                                                       B
mode                                    

In [9]:
# get information by index
df.loc[0]

track_name              Seven (feat. Latto) (Explicit Ver.)
artist(s)_name                             Latto, Jung Kook
artist_count                                              2
released_year                                          2023
released_month                                            7
released_day                                             14
in_spotify_playlists                                    553
in_spotify_charts                                       147
streams                                           141381703
in_apple_playlists                                       43
in_apple_charts                                         263
in_deezer_playlists                                      45
in_deezer_charts                                         10
in_shazam_charts                                        826
bpm                                                     125
key                                                       B
mode                                    

In [10]:
# The above works well if the indices match
df_loc_example = df.head(5)
df_loc_example.index = [1,2,3,4,5]

In [11]:
df_loc_example.head()

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
1,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,125,B,Major,80,89,83,31,0,8,4
2,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,92,C#,Major,71,61,74,7,0,10,4
3,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,138,F,Major,51,32,53,17,0,31,6
4,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,170,A,Major,55,58,72,11,0,11,15
5,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,144,A,Minor,65,23,80,14,63,11,6


In [12]:
df_loc_example.iloc[0] # work correctly

track_name              Seven (feat. Latto) (Explicit Ver.)
artist(s)_name                             Latto, Jung Kook
artist_count                                              2
released_year                                          2023
released_month                                            7
released_day                                             14
in_spotify_playlists                                    553
in_spotify_charts                                       147
streams                                           141381703
in_apple_playlists                                       43
in_apple_charts                                         263
in_deezer_playlists                                      45
in_deezer_charts                                         10
in_shazam_charts                                        826
bpm                                                     125
key                                                       B
mode                                    

In [13]:
#df_loc_example.loc[0] # doesn't work, we don't have `0` index

In [14]:
# we can take column as well
df.iloc[:, 0]

0      Seven (feat. Latto) (Explicit Ver.)
1                                     LALA
2                                  vampire
3                             Cruel Summer
4                           WHERE SHE GOES
                      ...                 
948                           My Mind & Me
949              Bigger Than The Whole Sky
950                   A Veces (feat. Feid)
951                          En La De Ella
952                                  Alone
Name: track_name, Length: 953, dtype: object

In [15]:
# it can be range
df.iloc[0:3, 0]

0    Seven (feat. Latto) (Explicit Ver.)
1                                   LALA
2                                vampire
Name: track_name, dtype: object

In [16]:
# it can be list 
df.iloc[[0, 1, 2], 0]

0    Seven (feat. Latto) (Explicit Ver.)
1                                   LALA
2                                vampire
Name: track_name, dtype: object

In [17]:
# like tail
df.iloc[-5:]

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
948,My Mind & Me,Selena Gomez,1,2022,11,3,953,0,91473363,61,...,144,A,Major,60,24,39,57,0,8,3
949,Bigger Than The Whole Sky,Taylor Swift,1,2022,10,21,1180,0,121871870,4,...,166,F#,Major,42,7,24,83,1,12,6
950,A Veces (feat. Feid),"Feid, Paulo Londra",2,2022,11,3,573,0,73513683,2,...,92,C#,Major,80,81,67,4,0,8,6
951,En La De Ella,"Feid, Sech, Jhayco",3,2022,10,20,1320,0,133895612,29,...,97,C#,Major,82,67,77,8,0,12,5
952,Alone,Burna Boy,1,2022,11,4,782,2,96007391,27,...,90,E,Minor,61,32,67,15,0,11,5


In [18]:
# we can use column as index
df_loc_example.set_index('track_name',inplace = True,append = True, drop = True)

In [19]:
df_loc_example.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,in_apple_charts,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
Unnamed: 0_level_1,track_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,263,...,125,B,Major,80,89,83,31,0,8,4
2,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,126,...,92,C#,Major,71,61,74,7,0,10,4
3,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,207,...,138,F,Major,51,32,53,17,0,31,6
4,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,207,...,170,A,Major,55,58,72,11,0,11,15
5,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,133,...,144,A,Minor,65,23,80,14,63,11,6


In [20]:
df_no_index = pd.read_csv('./spotify-2023.csv', index_col= 'track_name', encoding='latin-1' ) 
df_no_index

Unnamed: 0_level_0,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,in_apple_charts,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
track_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,263,...,125,B,Major,80,89,83,31,0,8,4
LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,126,...,92,C#,Major,71,61,74,7,0,10,4
vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,207,...,138,F,Major,51,32,53,17,0,31,6
Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,207,...,170,A,Major,55,58,72,11,0,11,15
WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,133,...,144,A,Minor,65,23,80,14,63,11,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
My Mind & Me,Selena Gomez,1,2022,11,3,953,0,91473363,61,13,...,144,A,Major,60,24,39,57,0,8,3
Bigger Than The Whole Sky,Taylor Swift,1,2022,10,21,1180,0,121871870,4,0,...,166,F#,Major,42,7,24,83,1,12,6
A Veces (feat. Feid),"Feid, Paulo Londra",2,2022,11,3,573,0,73513683,2,0,...,92,C#,Major,80,81,67,4,0,8,6
En La De Ella,"Feid, Sech, Jhayco",3,2022,10,20,1320,0,133895612,29,26,...,97,C#,Major,82,67,77,8,0,12,5


In [21]:
# and now we can call it by name :)
df_no_index.loc['LALA']

artist(s)_name          Myke Towers
artist_count                      1
released_year                  2023
released_month                    3
released_day                     23
in_spotify_playlists           1474
in_spotify_charts                48
streams                   133716286
in_apple_playlists               48
in_apple_charts                 126
in_deezer_playlists              58
in_deezer_charts                 14
in_shazam_charts                382
bpm                              92
key                              C#
mode                          Major
danceability_%                   71
valence_%                        61
energy_%                         74
acousticness_%                    7
instrumentalness_%                0
liveness_%                       10
speechiness_%                     4
Name: LALA, dtype: object

In [22]:
# sometimes we cant use df.column call, in our case with filter
df_no_index.loc[df_no_index["artist(s)_name"] == 'Taylor Swift']

Unnamed: 0_level_0,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,in_apple_charts,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
track_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,207,...,170,A,Major,55,58,72,11,0,11,15
I Can See You (Taylorï¿½ï¿½ï¿½s Version) (From The,Taylor Swift,1,2023,7,7,516,38,52135248,73,119,...,123,F#,Major,69,82,76,6,0,6,3
Anti-Hero,Taylor Swift,1,2022,10,21,9082,56,999748277,242,142,...,97,E,Major,64,51,63,12,0,19,5
Blank Space,Taylor Swift,1,2014,1,1,11434,53,1355959075,154,123,...,96,F,Major,75,57,68,9,0,13,6
Style,Taylor Swift,1,2014,1,1,7830,42,786181836,94,111,...,95,D,Major,60,48,79,0,0,12,4
cardigan,Taylor Swift,1,2020,7,24,7923,29,812019557,106,112,...,130,,Minor,61,53,58,55,0,27,4
Karma,Taylor Swift,1,2022,10,21,3818,23,404562836,37,55,...,90,G#,Major,64,10,62,7,0,48,7
Enchanted (Taylor's Version),Taylor Swift,1,2023,7,7,148,24,39578178,32,93,...,82,G#,Major,51,22,53,1,0,15,3
Back To December (Taylor's Version),Taylor Swift,1,2023,7,7,139,17,39228929,16,72,...,142,D,Major,50,20,64,1,0,12,3
Donï¿½ï¿½ï¿½t Bl,Taylor Swift,1,2017,11,8,4875,23,685032533,19,45,...,136,A,Minor,62,19,53,11,0,6,4


In [23]:
# much simpler version
df_no_index[df_no_index["artist(s)_name"] == 'Taylor Swift']

Unnamed: 0_level_0,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,in_apple_charts,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
track_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,207,...,170,A,Major,55,58,72,11,0,11,15
I Can See You (Taylorï¿½ï¿½ï¿½s Version) (From The,Taylor Swift,1,2023,7,7,516,38,52135248,73,119,...,123,F#,Major,69,82,76,6,0,6,3
Anti-Hero,Taylor Swift,1,2022,10,21,9082,56,999748277,242,142,...,97,E,Major,64,51,63,12,0,19,5
Blank Space,Taylor Swift,1,2014,1,1,11434,53,1355959075,154,123,...,96,F,Major,75,57,68,9,0,13,6
Style,Taylor Swift,1,2014,1,1,7830,42,786181836,94,111,...,95,D,Major,60,48,79,0,0,12,4
cardigan,Taylor Swift,1,2020,7,24,7923,29,812019557,106,112,...,130,,Minor,61,53,58,55,0,27,4
Karma,Taylor Swift,1,2022,10,21,3818,23,404562836,37,55,...,90,G#,Major,64,10,62,7,0,48,7
Enchanted (Taylor's Version),Taylor Swift,1,2023,7,7,148,24,39578178,32,93,...,82,G#,Major,51,22,53,1,0,15,3
Back To December (Taylor's Version),Taylor Swift,1,2023,7,7,139,17,39228929,16,72,...,142,D,Major,50,20,64,1,0,12,3
Donï¿½ï¿½ï¿½t Bl,Taylor Swift,1,2017,11,8,4875,23,685032533,19,45,...,136,A,Minor,62,19,53,11,0,6,4


In [24]:
# can use multiple filters as well  
df_no_index.loc[(df_no_index["artist(s)_name"] == 'Taylor Swift') & (df_no_index.released_year == 2019)]

Unnamed: 0_level_0,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,in_apple_charts,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
track_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,207,...,170,A,Major,55,58,72,11,0,11,15
All Of The Girls You Loved Before,Taylor Swift,1,2019,8,23,1282,6,185240616,26,6,...,96,D,Major,72,40,47,71,0,13,4


In [25]:
# can use multiple filters as well  
df_no_index.loc[(df_no_index.released_year == 2020) | (df_no_index.released_year == 2019)]

Unnamed: 0_level_0,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,in_apple_charts,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
track_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,207,...,170,A,Major,55,58,72,11,0,11,15
Angels Like You,Miley Cyrus,1,2020,11,27,3372,19,570515054,65,48,...,122,F,Major,67,49,64,10,0,10,3
Blinding Lights,The Weeknd,1,2019,11,29,43899,69,3703895074,672,199,...,171,C#,Major,50,38,80,0,0,9,7
cardigan,Taylor Swift,1,2020,7,24,7923,29,812019557,106,112,...,130,,Minor,61,53,58,55,0,27,4
Heat Waves,Glass Animals,1,2020,6,28,22543,63,2557975762,386,144,...,81,B,Major,76,53,53,44,0,9,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Callaita,"Bad Bunny, Tainy",2,2019,5,31,9539,15,1304313953,162,116,...,176,D,Major,61,24,62,60,0,24,31
San Lucas,Kevin Kaarl,1,2019,11,7,407,1,244891912,5,0,...,92,G,Major,58,27,36,86,0,9,3
Caile,Luar La L,1,2020,12,18,1494,2,273914335,17,12,...,122,,Major,70,46,76,30,0,9,45
Mary On A Cross,Ghost,1,2019,9,13,2668,2,387080183,38,266,...,130,B,Major,47,56,90,0,0,10,4


In [26]:
# can use list filters as well  
df_no_index.loc[df_no_index.released_year.isin([2019, 2020])]

Unnamed: 0_level_0,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,in_apple_charts,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
track_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,207,...,170,A,Major,55,58,72,11,0,11,15
Angels Like You,Miley Cyrus,1,2020,11,27,3372,19,570515054,65,48,...,122,F,Major,67,49,64,10,0,10,3
Blinding Lights,The Weeknd,1,2019,11,29,43899,69,3703895074,672,199,...,171,C#,Major,50,38,80,0,0,9,7
cardigan,Taylor Swift,1,2020,7,24,7923,29,812019557,106,112,...,130,,Minor,61,53,58,55,0,27,4
Heat Waves,Glass Animals,1,2020,6,28,22543,63,2557975762,386,144,...,81,B,Major,76,53,53,44,0,9,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Callaita,"Bad Bunny, Tainy",2,2019,5,31,9539,15,1304313953,162,116,...,176,D,Major,61,24,62,60,0,24,31
San Lucas,Kevin Kaarl,1,2019,11,7,407,1,244891912,5,0,...,92,G,Major,58,27,36,86,0,9,3
Caile,Luar La L,1,2020,12,18,1494,2,273914335,17,12,...,122,,Major,70,46,76,30,0,9,45
Mary On A Cross,Ghost,1,2019,9,13,2668,2,387080183,38,266,...,130,B,Major,47,56,90,0,0,10,4


In [27]:
df_no_index.loc[df_no_index.released_year.notnull()]

Unnamed: 0_level_0,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,in_apple_charts,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
track_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,263,...,125,B,Major,80,89,83,31,0,8,4
LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,126,...,92,C#,Major,71,61,74,7,0,10,4
vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,207,...,138,F,Major,51,32,53,17,0,31,6
Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,207,...,170,A,Major,55,58,72,11,0,11,15
WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,133,...,144,A,Minor,65,23,80,14,63,11,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
My Mind & Me,Selena Gomez,1,2022,11,3,953,0,91473363,61,13,...,144,A,Major,60,24,39,57,0,8,3
Bigger Than The Whole Sky,Taylor Swift,1,2022,10,21,1180,0,121871870,4,0,...,166,F#,Major,42,7,24,83,1,12,6
A Veces (feat. Feid),"Feid, Paulo Londra",2,2022,11,3,573,0,73513683,2,0,...,92,C#,Major,80,81,67,4,0,8,6
En La De Ella,"Feid, Sech, Jhayco",3,2022,10,20,1320,0,133895612,29,26,...,97,C#,Major,82,67,77,8,0,12,5


In [28]:
# modern version
df_no_index[df_no_index.released_year.isna() == False]

Unnamed: 0_level_0,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,in_apple_charts,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
track_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,263,...,125,B,Major,80,89,83,31,0,8,4
LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,126,...,92,C#,Major,71,61,74,7,0,10,4
vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,207,...,138,F,Major,51,32,53,17,0,31,6
Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,207,...,170,A,Major,55,58,72,11,0,11,15
WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,133,...,144,A,Minor,65,23,80,14,63,11,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
My Mind & Me,Selena Gomez,1,2022,11,3,953,0,91473363,61,13,...,144,A,Major,60,24,39,57,0,8,3
Bigger Than The Whole Sky,Taylor Swift,1,2022,10,21,1180,0,121871870,4,0,...,166,F#,Major,42,7,24,83,1,12,6
A Veces (feat. Feid),"Feid, Paulo Londra",2,2022,11,3,573,0,73513683,2,0,...,92,C#,Major,80,81,67,4,0,8,6
En La De Ella,"Feid, Sech, Jhayco",3,2022,10,20,1320,0,133895612,29,26,...,97,C#,Major,82,67,77,8,0,12,5


In [29]:
df_no_index.loc[df_no_index.released_year.isnull()]

Unnamed: 0_level_0,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,in_apple_charts,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
track_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1


In [30]:
# describe
df_no_index.streams.describe()

count           953
unique          949
top       723894473
freq              2
Name: streams, dtype: object

In [31]:
# uniq amount artists
df_no_index.artist_count.unique()

array([2, 1, 3, 8, 4, 5, 6, 7], dtype=int64)

In [32]:
# count with group by
df_no_index['artist(s)_name'].value_counts()

artist(s)_name
Taylor Swift                 34
The Weeknd                   22
Bad Bunny                    19
SZA                          19
Harry Styles                 17
                             ..
Karol G, Ovy On The Drums     1
Coolio, L.V.                  1
Kordhell                      1
Kenia OS                      1
Feid, Sech, Jhayco            1
Name: count, Length: 645, dtype: int64

In [33]:
df.head(10)

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,125,B,Major,80,89,83,31,0,8,4
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,92,C#,Major,71,61,74,7,0,10,4
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,138,F,Major,51,32,53,17,0,31,6
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,170,A,Major,55,58,72,11,0,11,15
4,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,144,A,Minor,65,23,80,14,63,11,6
5,Sprinter,"Dave, Central Cee",2,2023,6,1,2186,91,183706234,67,...,141,C#,Major,92,66,58,19,0,8,24
6,Ella Baila Sola,"Eslabon Armado, Peso Pluma",2,2023,3,16,3090,50,725980112,34,...,148,F,Minor,67,83,76,48,0,8,3
7,Columbia,Quevedo,1,2023,7,7,714,43,58149378,25,...,100,F,Major,67,26,71,37,0,11,4
8,fukumean,Gunna,1,2023,5,15,1096,83,95217315,60,...,130,C#,Minor,85,22,62,12,0,28,9
9,La Bebe - Remix,"Peso Pluma, Yng Lvcas",2,2023,3,17,2953,44,553634067,49,...,170,D,Minor,81,56,48,21,0,8,33


In [34]:
# can simply create table name - values
df['artist(s)_name'] + " presents: " + df['track_name']

0      Latto, Jung Kook presents: Seven (feat. Latto)...
1                             Myke Towers presents: LALA
2                       Olivia Rodrigo presents: vampire
3                    Taylor Swift presents: Cruel Summer
4                     Bad Bunny presents: WHERE SHE GOES
                             ...                        
948                  Selena Gomez presents: My Mind & Me
949     Taylor Swift presents: Bigger Than The Whole Sky
950    Feid, Paulo Londra presents: A Veces (feat. Feid)
951           Feid, Sech, Jhayco presents: En La De Ella
952                            Burna Boy presents: Alone
Length: 953, dtype: object

In [35]:
# group by block
# count by each year
df.groupby('released_year').released_year.count()

released_year
1930      1
1942      1
1946      1
1950      1
1952      1
1957      2
1958      3
1959      2
1963      3
1968      1
1970      2
1971      1
1973      1
1975      2
1979      1
1982      2
1983      1
1984      4
1985      2
1986      2
1987      1
1991      2
1992      1
1994      1
1995      2
1996      1
1997      1
1998      1
1999      5
2000      4
2002      6
2003      2
2004      4
2005      1
2007      1
2008      2
2010      7
2011     10
2012     10
2013     13
2014     13
2015     11
2016     18
2017     23
2018     10
2019     36
2020     37
2021    119
2022    402
2023    175
Name: released_year, dtype: int64

In [36]:
df.groupby('released_year').bpm.max()

released_year
1930    130
1942     96
1946    139
1950    143
1952    140
1957    175
1958    152
1959    134
1963    202
1968    116
1970    148
1971    147
1973     80
1975    102
1979     95
1982    151
1983    117
1984    115
1985    112
1986    180
1987    125
1991    117
1992     92
1994    150
1995     92
1996     88
1997    144
1998    156
1999    173
2000    147
2002    171
2003    148
2004    172
2005     93
2007    140
2008    138
2010    164
2011    162
2012    206
2013    160
2014    160
2015    180
2016    186
2017    172
2018    150
2019    176
2020    186
2021    206
2022    200
2023    204
Name: bpm, dtype: int64

In [37]:
df.groupby('released_year').apply(lambda df: df['artist(s)_name'].iloc[0])

released_year
1930                              Styrx, utku INC, Thezth
1942    Bing Crosby, John Scott Trotter & His Orchestr...
1946                                        Nat King Cole
1950                    Frank Sinatra, B. Swanson Quartet
1952                                            Burl Ives
1957                                          Bobby Helms
1958                                           Brenda Lee
1959                                          Dean Martin
1963                                        Andy Williams
1968                         Creedence Clearwater Revival
1970                                      Josï¿½ï¿½ Felic
1971    John Lennon, The Harlem Community Choir, The P...
1973                                            Aerosmith
1975                                            Vance Joy
1979                                       Paul McCartney
1982                                        Musical Youth
1983                                           The Police


In [38]:
# group by list
df.groupby(['released_year', 'released_month']).apply(lambda df: df['artist(s)_name'].iloc[0])

released_year  released_month
1930           1                                           Styrx, utku INC, Thezth
1942           1                 Bing Crosby, John Scott Trotter & His Orchestr...
1946           11                                                    Nat King Cole
1950           1                                 Frank Sinatra, B. Swanson Quartet
1952           1                                                         Burl Ives
                                                       ...                        
2023           3                                                       Myke Towers
               4                                         Bad Bunny, Grupo Frontera
               5                                                         Bad Bunny
               6                                                    Olivia Rodrigo
               7                                                  Latto, Jung Kook
Length: 162, dtype: object

In [39]:
df.groupby('artist(s)_name').released_year.agg([max, min, len])

Unnamed: 0_level_0,max,min,len
artist(s)_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
(G)I-DLE,2023,2022,2
"21 Savage, Gunna",2022,2022,1
"24kgoldn, Iann Dior",2020,2020,1
50 Cent,2002,2002,1
"A$AP Rocky, Metro Boomin, Roisee",2023,2023,1
...,...,...,...
j-hope,2022,2022,1
"j-hope, J. Cole",2023,2023,1
sped up 8282,1997,1997,1
"sped up nightcore, ARIZONATEARS, Lil Uzi Vert",2023,2023,1


In [40]:
# dont forget to combine it
df.groupby('artist(s)_name').released_year.agg([max, min, len]).loc['teto']

max    2022
min    2022
len       1
Name: teto, dtype: int64

In [41]:
# sorting
df.groupby('artist(s)_name').released_year.agg([max, min, len]).sort_values(by = 'min')

Unnamed: 0_level_0,max,min,len
artist(s)_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Styrx, utku INC, Thezth",1930,1930,1
"Bing Crosby, John Scott Trotter & His Orchestra, Ken Darby Singers",1942,1942,1
Nat King Cole,1959,1946,2
"Frank Sinatra, B. Swanson Quartet",1950,1950,1
Burl Ives,1952,1952,1
...,...,...,...
"Grupo Marca Registrada, Grupo Frontera",2023,2023,1
"Sam Smith, Calvin Harris, Jessie Reyez",2023,2023,1
"Gorillaz, Bad Bunny",2023,2023,1
"Israel & Rodolffo, Mari Fernandez",2023,2023,1


In [42]:
df.dtypes

track_name              object
artist(s)_name          object
artist_count             int64
released_year            int64
released_month           int64
released_day             int64
in_spotify_playlists     int64
in_spotify_charts        int64
streams                 object
in_apple_playlists       int64
in_apple_charts          int64
in_deezer_playlists     object
in_deezer_charts         int64
in_shazam_charts        object
bpm                      int64
key                     object
mode                    object
danceability_%           int64
valence_%                int64
energy_%                 int64
acousticness_%           int64
instrumentalness_%       int64
liveness_%               int64
speechiness_%            int64
dtype: object

In [60]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 953 entries, 0 to 952
Data columns (total 24 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   track_name            953 non-null    object
 1   artist(s)_name        953 non-null    object
 2   artist_count          953 non-null    int64 
 3   released_year         953 non-null    int64 
 4   released_month        953 non-null    int64 
 5   released_day          953 non-null    int64 
 6   in_spotify_playlists  953 non-null    int64 
 7   in_spotify_charts     953 non-null    int64 
 8   streams               953 non-null    object
 9   in_apple_playlists    953 non-null    int64 
 10  in_apple_charts       953 non-null    int64 
 11  in_deezer_playlists   953 non-null    object
 12  in_deezer_charts      953 non-null    int64 
 13  in_shazam_charts      903 non-null    object
 14  bpm                   953 non-null    int64 
 15  key                   858 non-null    ob

In [43]:
df.head(10)

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,125,B,Major,80,89,83,31,0,8,4
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,92,C#,Major,71,61,74,7,0,10,4
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,138,F,Major,51,32,53,17,0,31,6
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,170,A,Major,55,58,72,11,0,11,15
4,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,144,A,Minor,65,23,80,14,63,11,6
5,Sprinter,"Dave, Central Cee",2,2023,6,1,2186,91,183706234,67,...,141,C#,Major,92,66,58,19,0,8,24
6,Ella Baila Sola,"Eslabon Armado, Peso Pluma",2,2023,3,16,3090,50,725980112,34,...,148,F,Minor,67,83,76,48,0,8,3
7,Columbia,Quevedo,1,2023,7,7,714,43,58149378,25,...,100,F,Major,67,26,71,37,0,11,4
8,fukumean,Gunna,1,2023,5,15,1096,83,95217315,60,...,130,C#,Minor,85,22,62,12,0,28,9
9,La Bebe - Remix,"Peso Pluma, Yng Lvcas",2,2023,3,17,2953,44,553634067,49,...,170,D,Minor,81,56,48,21,0,8,33


In [44]:
df_1 = df.in_apple_playlists.astype('float64')

In [45]:
df.head(10)

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,125,B,Major,80,89,83,31,0,8,4
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,92,C#,Major,71,61,74,7,0,10,4
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,138,F,Major,51,32,53,17,0,31,6
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,170,A,Major,55,58,72,11,0,11,15
4,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,144,A,Minor,65,23,80,14,63,11,6
5,Sprinter,"Dave, Central Cee",2,2023,6,1,2186,91,183706234,67,...,141,C#,Major,92,66,58,19,0,8,24
6,Ella Baila Sola,"Eslabon Armado, Peso Pluma",2,2023,3,16,3090,50,725980112,34,...,148,F,Minor,67,83,76,48,0,8,3
7,Columbia,Quevedo,1,2023,7,7,714,43,58149378,25,...,100,F,Major,67,26,71,37,0,11,4
8,fukumean,Gunna,1,2023,5,15,1096,83,95217315,60,...,130,C#,Minor,85,22,62,12,0,28,9
9,La Bebe - Remix,"Peso Pluma, Yng Lvcas",2,2023,3,17,2953,44,553634067,49,...,170,D,Minor,81,56,48,21,0,8,33


In [46]:
df_1.head(10)

0     43.0
1     48.0
2     94.0
3    116.0
4     84.0
5     67.0
6     34.0
7     25.0
8     60.0
9     49.0
Name: in_apple_playlists, dtype: float64

In [47]:
# try to find nulls
df[pd.isnull(df.in_spotify_charts)]

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%


In [48]:
# replase nan to -1
df.in_spotify_charts.fillna("-1")

0      147
1       48
2      113
3      100
4       50
      ... 
948      0
949      0
950      0
951      0
952      2
Name: in_spotify_charts, Length: 953, dtype: int64

In [49]:
df['mode'].replace("Major", "M")

0          M
1          M
2          M
3          M
4      Minor
       ...  
948        M
949        M
950        M
951        M
952    Minor
Name: mode, Length: 953, dtype: object

In [50]:
df_test = df['mode'].replace("Major", "M")

In [51]:
df.head()

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,125,B,Major,80,89,83,31,0,8,4
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,92,C#,Major,71,61,74,7,0,10,4
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,138,F,Major,51,32,53,17,0,31,6
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,170,A,Major,55,58,72,11,0,11,15
4,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,144,A,Minor,65,23,80,14,63,11,6


In [52]:
df['mode'] = df['mode'].replace("Major", "M")
df['mode'] = df['mode'].replace("Minor", "Mi")

In [53]:
df.head()

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,125,B,M,80,89,83,31,0,8,4
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,92,C#,M,71,61,74,7,0,10,4
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,138,F,M,51,32,53,17,0,31,6
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,170,A,M,55,58,72,11,0,11,15
4,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,144,A,Mi,65,23,80,14,63,11,6


In [54]:
df_concat_1 = df.head(10)
df_concat_2 = df.head(10)

In [55]:
# concat
pd.concat([df_concat_1, df_concat_2])

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,125,B,M,80,89,83,31,0,8,4
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,92,C#,M,71,61,74,7,0,10,4
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,138,F,M,51,32,53,17,0,31,6
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,170,A,M,55,58,72,11,0,11,15
4,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,144,A,Mi,65,23,80,14,63,11,6
5,Sprinter,"Dave, Central Cee",2,2023,6,1,2186,91,183706234,67,...,141,C#,M,92,66,58,19,0,8,24
6,Ella Baila Sola,"Eslabon Armado, Peso Pluma",2,2023,3,16,3090,50,725980112,34,...,148,F,Mi,67,83,76,48,0,8,3
7,Columbia,Quevedo,1,2023,7,7,714,43,58149378,25,...,100,F,M,67,26,71,37,0,11,4
8,fukumean,Gunna,1,2023,5,15,1096,83,95217315,60,...,130,C#,Mi,85,22,62,12,0,28,9
9,La Bebe - Remix,"Peso Pluma, Yng Lvcas",2,2023,3,17,2953,44,553634067,49,...,170,D,Mi,81,56,48,21,0,8,33


In [56]:
df_concat_2 = df_concat_2.rename(columns={'released_year': 'test_year'})

In [57]:
df_concat_2.head()

Unnamed: 0,track_name,artist(s)_name,artist_count,test_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,125,B,M,80,89,83,31,0,8,4
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,92,C#,M,71,61,74,7,0,10,4
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,138,F,M,51,32,53,17,0,31,6
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,170,A,M,55,58,72,11,0,11,15
4,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,144,A,Mi,65,23,80,14,63,11,6


In [58]:
# join 
df_concat_1.join(df_concat_2, lsuffix='track_name', rsuffix='track_name').dtypes

track_nametrack_name              object
artist(s)_nametrack_name          object
artist_counttrack_name             int64
released_year                      int64
released_monthtrack_name           int64
released_daytrack_name             int64
in_spotify_playliststrack_name     int64
in_spotify_chartstrack_name        int64
streamstrack_name                 object
in_apple_playliststrack_name       int64
in_apple_chartstrack_name          int64
in_deezer_playliststrack_name     object
in_deezer_chartstrack_name         int64
in_shazam_chartstrack_name        object
bpmtrack_name                      int64
keytrack_name                     object
modetrack_name                    object
danceability_%track_name           int64
valence_%track_name                int64
energy_%track_name                 int64
acousticness_%track_name           int64
instrumentalness_%track_name       int64
liveness_%track_name               int64
speechiness_%track_name            int64
track_nametrack_