In [1]:
import pandas as pd
from collections import Counter

In [2]:
data = pd.read_csv('spotify_songs.csv')
data["track_album_release_date"] = pd.to_datetime(data["track_album_release_date"])

In [3]:
data.head()

Unnamed: 0,track_id,track_name,track_artist,track_popularity,track_album_id,track_album_name,track_album_release_date,playlist_name,playlist_id,playlist_genre,...,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
0,6f807x0ima9a1j3VPbc7VN,I Don't Care (with Justin Bieber) - Loud Luxur...,Ed Sheeran,66,2oCs0DGTsRO98Gh5ZSl2Cx,I Don't Care (with Justin Bieber) [Loud Luxury...,2019-06-14,Pop Remix,37i9dQZF1DXcZDD7cfEKhW,pop,...,6,-2.634,1,0.0583,0.102,0.0,0.0653,0.518,122.036,194754
1,0r7CVbZTWZgbTCYdfa2P31,Memories - Dillon Francis Remix,Maroon 5,67,63rPSO264uRjW1X5E6cWv6,Memories (Dillon Francis Remix),2019-12-13,Pop Remix,37i9dQZF1DXcZDD7cfEKhW,pop,...,11,-4.969,1,0.0373,0.0724,0.00421,0.357,0.693,99.972,162600
2,1z1Hg7Vb0AhHDiEmnDE79l,All the Time - Don Diablo Remix,Zara Larsson,70,1HoSmj2eLcsrR0vE9gThr4,All the Time (Don Diablo Remix),2019-07-05,Pop Remix,37i9dQZF1DXcZDD7cfEKhW,pop,...,1,-3.432,0,0.0742,0.0794,2.3e-05,0.11,0.613,124.008,176616
3,75FpbthrwQmzHlBJLuGdC7,Call You Mine - Keanu Silva Remix,The Chainsmokers,60,1nqYsOef1yKKuGOVchbsk6,Call You Mine - The Remixes,2019-07-19,Pop Remix,37i9dQZF1DXcZDD7cfEKhW,pop,...,7,-3.778,1,0.102,0.0287,9e-06,0.204,0.277,121.956,169093
4,1e8PAfcKUYoKkxPhrHqw4x,Someone You Loved - Future Humans Remix,Lewis Capaldi,69,7m7vv9wlQ4i0LFuJiE2zsQ,Someone You Loved (Future Humans Remix),2019-03-05,Pop Remix,37i9dQZF1DXcZDD7cfEKhW,pop,...,1,-4.672,1,0.0359,0.0803,0.0,0.0833,0.725,123.976,189052


In [12]:
genres = set(data["playlist_genre"])

c = Counter(list(data["track_name"]))

result = {}

for genre in genres:
  tracks = data[data["playlist_genre"] == genre]

  print(f"{len(tracks)} tracks in {genre}")

  years = set(release_date.year for release_date in data["track_album_release_date"])

  genre_best_df = pd.DataFrame(columns=data.columns)

  for year in years:
    year_tracks = tracks[[release.year == year for release in tracks["track_album_release_date"]]]

    if len(year_tracks) == 0:
      continue

    sorted_tracks = year_tracks.sort_values(by="track_popularity", ascending=False)

    best_track = sorted_tracks.iloc[0]
  
    print(f"{genre:10}, {year:4d}, {best_track['track_name']:60}\r", end="")

    genre_best_df = genre_best_df.append(best_track)

  stats = Counter(list(genre_best_df["track_artist"]))

  best = [item[0] for item in stats.most_common()[:1]]

  result[genre] = best

  print()


print(result)

6043 tracks in edm
edm       , 2020, Yummy                                                        Heldens & Gregor Salto Vocal Mix Edit
5507 tracks in pop
pop       , 2020, Yummy                                                       
5155 tracks in latin
latin     , 2020, Yummy                                                       
4951 tracks in rock
rock      , 2020, Woods                                                       
5431 tracks in r&b
r&b       , 2020, Yummy                                                       er
5746 tracks in rap
rap       , 2020, Life Is Good (feat. Drake)                                  
{'edm': ['Gino Soccio'], 'pop': ['Coldplay'], 'latin': ['Gloria Estefan'], 'rock': ['AC/DC'], 'r&b': ['Mariah Carey'], 'rap': ['Eminem']}


In [5]:
def most_popular_entry(artist_name):
  return data[data["track_artist"] == artist_name].sort_values(by="track_popularity", ascending=False).iloc[0]

In [13]:
popular_artists = ["Gloria Estefan", "AC/DC", "Eminem", "Gino Soccio", "Coldplay", "Mariah Carey"]

populars_df = pd.DataFrame(columns=data.columns)

for artist in popular_artists:
  print(artist)
  populars_df = populars_df.append(most_popular_entry(artist))
  print("Appended...")
  
print("--Finished--")

populars_df.to_csv("popular_entries.csv", index=False)

Gloria Estefan
Appended...
AC/DC
Appended...
Eminem
Appended...
Gino Soccio
Appended...
Coldplay
Appended...
Mariah Carey
Appended...
--Finished--


In [14]:
populars_df

Unnamed: 0,track_id,track_name,track_artist,track_popularity,track_album_id,track_album_name,track_album_release_date,playlist_name,playlist_id,playlist_genre,...,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
17974,4aMT5LHe8A2uIc11H8Cx2m,Conga,Gloria Estefan,60,2YJ81wckAY1scqgIsFNVBW,Exitos de gloria estefan,1990-09-25,This Is Gloria Estefan,37i9dQZF1DXarsfY89zXC6,latin,...,2,-10.202,1,0.0464,0.16,0.0583,0.0563,0.882,122.219,255107
15155,08mG3Y1vljYA6bvDt4Wqkj,Back In Black,AC/DC,83,6mUdeDZCsExyJLMdAfDuwh,Back In Black,1980-07-25,"Classic Rock 70s 80s 90s, Rock Classics - 70s ...",3Ho3iO0iJykgEQNbjB2sic,rock,...,9,-5.678,1,0.047,0.011,0.00965,0.0828,0.763,188.386,255493
8759,4xkOaSrkexMciUUogZKVTS,'Till I Collapse,Eminem,83,2cWBwpqMsDJC1ZUwz813lo,The Eminem Show,2002-05-26,Gangster Rap | 100 % Gangster,2e0d7otcM1oyecfi3zZPWk,rap,...,1,-3.237,1,0.186,0.0622,0.0,0.0816,0.1,171.447,297787
31657,4lteGaLzU5veJOe6wnOvz4,Dancer,Gino Soccio,50,71NzsQgmjQ5rIm33Hpjv2V,Outline,1979-01-01,House/Electro/Progressive/Disco/Lofi/Synthwave,42jbIN5Zj0UUPdtmqxQRKF,edm,...,11,-9.576,0,0.0884,0.0183,0.552,0.0311,0.389,122.106,508120
3264,75JFxkI2RXiU7L9VXzMkle,The Scientist,Coldplay,83,0RHX9XECH8IVI3LNgWDpmQ,A Rush of Blood to the Head,2002-08-08,Mix ElectroPop//ElectroHouse// DeepHouse 2020,23swqzpOZwW1NhPiZ7iyFI,pop,...,5,-7.224,1,0.0243,0.727,1.7e-05,0.11,0.212,146.448,309600
25788,0bYg9bo50gSsH3LtXe2SQn,All I Want for Christmas Is You,Mariah Carey,90,61ulfFSmmxMhc2wCdmdMkN,Merry Christmas,1994-11-01,Christmas Soul,6FZYc2BvF7tColxO8PBShV,r&b,...,7,-7.462,1,0.0386,0.164,0.0,0.0708,0.346,150.277,241107
