In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.style as style
%matplotlib inline
import seaborn as sns
from scipy import stats

import warnings
warnings.filterwarnings('ignore')

plt.rc('xtick', labelsize = 10)
plt.rc('ytick', labelsize = 10)
plt.rc('axes', labelsize = 12, labelpad = 5)

In [8]:
df = pd.read_csv('drake_songs.csv', index_col='Unnamed: 0')

In [9]:
df

Unnamed: 0,title,album,release_date,featured_artists,producer_artists,writer_artists,genius_track_id,genius_album_id
0,0 to 100 / The Catch Up,#FYM12,2014-06-01,[],"['Chester Hansen', 'Vinylz', 'Ging', 'Boi-1da'...","['Chester Hansen', 'Ging', 'Nineteen85', 'Drak...",156640,914226.0
1,10 Bands,If You’re Reading This It’s Too Late,2015-02-13,[],"['Ging', 'Sevn Thomas', 'Boi-1da']","['Sevn Thomas', 'Ging', 'Boi-1da', 'Quentin Mi...",703738,119674.0
2,1Xtra Freestyle,Tim Westwood I Freestyles,2010-06-11,[],['Tim Westwood'],['Drake'],421444,883007.0
3,2,<single>,,[],[],[],2457033,
4,2011 Juno Awards In Toronto,<single>,2011-03-26,[],[],[],214614,
...,...,...,...,...,...,...,...,...
601,You Only Live Twice,Certified Lover Boy,2021-09-03,"['Rick Ross', 'Lil Wayne']","['Brian “B-Nasty” Reid', 'Bink!']","['Brian “B-Nasty” Reid', 'Bink!', 'Rick Ross',...",7165563,585647.0
602,You & The 6,If You’re Reading This It’s Too Late,2015-02-13,[],"['Vinylz', 'Allen Ritter', '!llmind', '40', 'B...","['Majid Jordan', 'Jenna Andrews', 'Jordan Ullm...",703755,119674.0
603,Zane Lowe Interview,<single>,,['Zane Lowe'],[],[],2464103,
604,Zodiac Sign,<single>,,['Jessie Reyez'],[],"['Jessie Reyez', 'Drake']",5362793,


In [84]:
df.dtypes

title                object
album                object
release_date         object
featured_artists     object
producer_artists     object
writer_artists       object
genius_track_id       int64
genius_album_id     float64
dtype: object

In [82]:
for column in ['featured_artists', 'producer_artists', 'writer_artists']:
    df[column] = df[column].apply(lambda x: x.strip('[]').replace("'",""))
    df[column] = df[column].apply(lambda x: x.split(','))

In [83]:
df.sample(10)

Unnamed: 0,title,album,release_date,featured_artists,producer_artists,writer_artists,genius_track_id,genius_album_id
210,I Guess It’s Fuck Me,Her Loss,2022-11-04,[],[TheLoudPack],"[Drake, IsThisShiv, MixedByTheBest, Dougie F]",8524045,966485.0
87,Concentrate*,<single>,,[],[],[],5701120,
546,Trophies,Young Money: Rise of an Empire,2013-12-30,[],"[Hit-Boy, Rey Reel, 40, Hagler]","[Rey Reel, Sharon Abshire, Bernard Joseph Gé...",225050,78014.0
68,Cannonball (Snippet),Heartbreak Drake,2009-12-19,[Colin Munroe],[],"[Colin Munroe, Drake]",3175723,359632.0
470,Started From the Bottom Deluxe - Don’t Delete,<single>,,[],[],[],2318813,
50,Between Us,<single>,2009-09-01,[Slakah the Beatchild],[Häzel],"[Slakah the Beatchild, Drake]",81686,
142,Fall For Your Type,<single>,,[],[],[Drake],1194,
303,My New Shit,Black Diamond 8 (Audible Narcotic),,[],[Nottz],"[Nottz, Drake]",2954,924577.0
327,One Man Show,<single>,2010-05-10,[],[],[],3893523,
338,OVO Sound Radio Episode 13 Tracklist,OVO Sound Radio Tracklists - Season 1 & 2,2016-01-16,[],[],[],2408264,128797.0


In [85]:
commercial_releases = [
    'So Far Gone',
    'Thank Me Later',
    'Take Care',
    'Care Package',
    'Nothing Was the Same',
    'If You’re Reading This It’s Too Late',
    'Views',
    'What a Time To Be Alive',
    'More Life',
    'Scary Hours',
    'Scorpion',
    'The Best in the World Pack',
    'Dark Lane Demo Tapes',
    'Scary Hours 2',
    'Certified Lover Boy',
    'Honestly, Nevermind',
    'Her Loss'
]

In [86]:
df = df[(df['album'].isin(commercial_releases))]

In [87]:
df['album'].value_counts()

Scorpion                                24
More Life                               23
Take Care                               22
Certified Lover Boy                     20
Views                                   20
If You’re Reading This It’s Too Late    19
So Far Gone                             18
Nothing Was the Same                    16
Thank Me Later                          16
Honestly, Nevermind                     14
Care Package                            13
Dark Lane Demo Tapes                    13
Her Loss                                 4
Scary Hours 2                            3
The Best in the World Pack               2
What a Time To Be Alive                  1
Scary Hours                              1
Name: album, dtype: int64

In [88]:
df.sample(5)

Unnamed: 0,title,album,release_date,featured_artists,producer_artists,writer_artists,genius_track_id,genius_album_id
312,Nice For What,Scorpion,2018-04-06,[],"[Murda Beatz, BlaqNmilD, 40, Corey Litwin]","[Birdman, Raekwon, Glenishe Rowe, RZA, Gho...",3580132,420582.0
423,Pound Cake / Paris Morton Music 2,Nothing Was the Same,2013-09-24,[JAY-Z],"[The Order, Matthew Burnett, Detail, Jordan...","[Jim Eliot, Method Man, RZA, Ellie Goulding...",218330,40475.0
163,From Florida With Love,Dark Lane Demo Tapes,2020-05-01,[],"[40, MexikoDro]","[40, MexikoDro, Drake]",5447963,630537.0
469,Started from the Bottom,Nothing Was the Same,2013-02-01,[],"[40, Mike Zombie]","[Drake, 40, Mike Zombie]",113898,40475.0
252,Let’s Call It Off,So Far Gone,2009-02-13,[Peter Bjorn and John],[Björn Yttling],"[40, Peter Bjorn and John, Drake]",4663,2627.0


In [174]:
feature_count = {}

In [175]:
for artists in df['featured_artists']:
    if artists[0] != '':
        for artist in artists:
            if artist in feature_count:
                feature_count[artist] += 1
            else:
                feature_count[artist] = 0
