# Dependencies

In [1]:
import pandas as pd
import numpy as np
from musixmatch import Musixmatch
from API_Key import api_key3
import time
from pprint import pprint

In [2]:
import sqlalchemy
from sqlalchemy import create_engine, inspect, Column, Integer, Float, String, Sequence, Boolean
from sqlalchemy.orm import Session
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.ext.declarative import declarative_base

In [3]:
musixmatch = Musixmatch(api_key3)

# Setting up SQLlite

In [4]:
#Using declarative base
Base = declarative_base()
engine = create_engine('sqlite:///lyrics3.db', echo = False)

In [6]:
#defining the Tracks table
class Tracks(Base):
    __tablename__ = "tracks"
    
    id = Column(Integer, primary_key=True, nullable = False, unique = True, autoincrement = True)
    album_id = Column(Integer)
    artist_id = Column(Integer)
    artist_name = Column(String)
    explicit = Column(Integer)
    has_lyrics = Column(Integer)
    lyrics_id = Column(Integer)
    track_id = Column(Integer, unique = True)
    track_name = Column(String)

In [7]:
#defining the Lyrics table
class Lyrics(Base):
    __tablename__ = "lyrics"
    
    id = Column(Integer, primary_key=True, nullable = False, unique = True, autoincrement = True)
    explicit = Column(Integer)
    lyrics_body = Column(String)
    lyrics_id = Column(Integer, unique = True)
    lyrics_language = Column(String)
    lyrics_language_description = Column(String)

In [8]:
#defining the Artists table
class Artists(Base):
    __tablename__ = "artists"
    
    id = Column(Integer, primary_key=True, nullable = False, unique = True, autoincrement = True)
    artist_id = Column(Integer, unique = True)
    artist_name = Column(String)
    artist_q = Column(String)

In [9]:
#defining the Albums table
class Albums(Base):
    __tablename__ = "albums"
    
    id = Column(Integer, primary_key=True, nullable = False, unique = True, autoincrement = True)
    artist_id = Column(Integer)
    artist_name = Column(String)
    album_id = Column(Integer, unique = True)
    album_name = Column(String)

In [6]:
#defining the genres table
class Album_Genres(Base):
    __tablename__ = "album_genres"
    
    id = Column(Integer, primary_key=True, nullable = False, unique = True, autoincrement = True)
    artist_id = Column(Integer)
    artist_name = Column(String)
    album_id = Column(Integer)
    genre = Column(String)
    genre_id = Column(Integer)

In [5]:
#Creating the table metadata
Base.metadata.create_all(engine)
session = Session(engine)

In [6]:
#Binding engine to the metadata
meta = sqlalchemy.MetaData()
meta.reflect(bind = engine)

# MusixMatch API - Finding artists via text search from manually created list

In [11]:
artists_path = 'artist_names.xlsx'
artists_df = pd.read_excel(artists_path, sheetname = "Sheet1", header = None)
artists = list(artists_df[0].values)

In [12]:
#Connecting to tracks table
table = sqlalchemy.Table('artists', meta, autoload = True)

In [None]:
for artist in artists:
    print(f"getting artist names {artist}")
    result = musixmatch.artist_search(q_artist = artist, page = 1, page_size = 100, f_artist_id = '', f_artist_mbid = '')
    print('extracting results')
    artist_list = result['message']['body']['artist_list']
    artists = [artist['artist'] for artist in artist_list]
    print("creating package")
    for item in artists:
        package = {
            'artist_id': item['artist_id'],
            'artist_name': item['artist_name'],
            'artist_q': artist
        }
        try:
            print("writing package to database")
            session.execute(table.insert(), package)
        except:
            next
    print("committing to database")
    session.commit()

# MusixMatch API - Finding Albums from Artists

In [26]:
artists = pd.read_sql('artists', engine)
artists.dropna(inplace = True)
artist_ids = list(artists['artist_id'].values)

In [12]:
table = sqlalchemy.Table('albums', meta, autoload = True)

In [None]:
for artist_id in artist_ids:
    print(f"getting albums for artist {artist_id}")
    result = musixmatch.artist_albums_get(artist_id = artist_id, g_album_name = '', page = 1, page_size = 100, s_release_date = '')
    print("extracting the album_list")
    album_list = result['message']['body']['album_list']
    print("extracting individual albums")
    albums = [album['album'] for album in album_list]
    print("iterating through the list of albums")
    for item in albums:
        print("creating the package")
        package = {
            'album_id': item['album_id'],
            'album_name': item['album_name'],
            'artist_id': item['artist_id'],
            'artist_name': item['artist_name']
        }
        try:
            print("writing to the database")
            session.execute(table.insert(), package)
        except:
            next
    print("comitting to the database")
    session.commit()

# MusixMatch API - Scraping Tracks From Charts

In [19]:
#Connecting to tracks table
table = sqlalchemy.Table('tracks', meta, autoload = True)

In [10]:
country_codes = ['AU', 'CA', 'IE', 'IM', 'NZ', 'GB', 'US']

In [None]:
country_count = 1
for country in country_codes:
    
    print(f"Attempting country {country_count} for {country}")
    
    page_count = 1

    while page_count < 5:
        print(f"calling api attempt {page_count} for {country}")
        result = musixmatch.chart_tracks_get(page_count, 100, f_has_lyrics = True)
        tracks = result['message']['body']['track_list']
        track_list = [track['track'] for track in tracks]
        print(f"track list is {len(track_list)}")
        for track in track_list:
            package = {
                'album_id': track['album_id'],
                'artist_id': track['artist_id'],
                'artist_name': track['artist_name'],
                'explicit': track['explicit'],
                'has_lyrics': track['has_lyrics'],
                'lyrics_id': track['lyrics_id'],
                'track_id': track['track_id'],
                'track_name': track['track_name']
            }
            print(package['track_name'])
            print("writing to database")
            try:
                session.execute(table.insert(), package)
            except:
                next
        page_count += 1
        time.sleep(1)
        session.commit()
    
    country_count += 1

# MusixMatch API - finding tracks from albums

## Starting point is the tracks data from the charts

In [20]:
tracks = pd.read_sql_table('tracks', engine, index_col = "id", coerce_float = False)

In [21]:
album_ids = tracks['album_id'].values

In [24]:
#Connecting to tracks table
table = sqlalchemy.Table('tracks', meta, autoload = True)

In [None]:
album_count = 1

for album_id in album_ids:
    print(f"getting result {album_count} for {album_id}")
    result = musixmatch.album_tracks_get(album_id = album_id, album_mbid = "", page = 1, page_size = 100)
    print(f"getting track_list for {album_id}")
    album = result['message']['body']['track_list']
    print("creating track list")
    track_list = [track['track'] for track in album]
    track_count = 1
    print(f"Iterating through tracks in album")
    for track in track_list:
        print(f"track is {track['track_id']}")
        package = {
                'album_id': int(track['album_id']),
                'artist_id': int(track['artist_id']),
                'artist_name': track['artist_name'],
                'explicit': int(track['explicit']),
                'has_lyrics': int(track['has_lyrics']),
                'lyrics_id': int(track['lyrics_id']),
                'track_id': int(track['track_id']),
                'track_name': track['track_name']
            }
        print(package['track_name'])
        print("writing to database")
        try:
            session.execute(table.insert(), package)
        except:
            print("writing to database failed")
            next
        print("committing files to database")
        session.commit()
    album_count += 1

## Starting point is the album's data

In [15]:
albums = pd.read_sql_table('albums', engine, index_col = "id", coerce_float = False)
albums = albums.iloc[647:]
album_ids = list(albums['album_id'].values)

In [16]:
#Connecting to tracks table
table = sqlalchemy.Table('tracks', meta, autoload = True)

In [None]:
album_count = 1

for album_id in album_ids:
    print(f"getting result {album_count} for {album_id}")
    result = musixmatch.album_tracks_get(album_id = album_id, album_mbid = "", page = 1, page_size = 100)
    print(f"getting track_list for {album_id}")
    album = result['message']['body']['track_list']
    print("creating track list")
    track_list = [track['track'] for track in album]
    track_count = 1
    print(f"Iterating through tracks in album")
    for track in track_list:
        print(f"track is {track['track_id']}")
        package = {
                'album_id': int(track['album_id']),
                'artist_id': int(track['artist_id']),
                'artist_name': track['artist_name'],
                'explicit': int(track['explicit']),
                'has_lyrics': int(track['has_lyrics']),
                'lyrics_id': int(track['lyrics_id']),
                'track_id': int(track['track_id']),
                'track_name': track['track_name']
            }
        print(package['track_name'])
        print("writing to database")
        try:
            session.execute(table.insert(), package)
        except:
            print("writing to database failed")
            next
        print("committing files to database")
        session.commit()
    album_count += 1

## MusixMatch API - Scraping Lyrics from the Albums Data

In [28]:
#Connecting to lyrics table
table = sqlalchemy.Table('lyrics', meta, autoload = True)

In [29]:
tracks = pd.read_sql('tracks', engine, index_col = "id")
track_ids = tracks['track_id'].values

In [None]:
count = 1
for track in track_ids:
    print(f"calling API attempt {count} for track_id# {track}")
    print(f"track data type is {type(track)}")
    lyrics = musixmatch.track_lyrics_get(track)
    print("pulling out lyrics package")
    lyrics_package = lyrics['message']['body']['lyrics']
    print("pulling out package to insert into SQL database")
    package = {
        "explicit": lyrics_package['explicit'],
        "lyrics_body": lyrics_package["lyrics_body"][:-69],
        "lyrics_id": lyrics_package['lyrics_id'],
        "lyrics_language": lyrics_package['lyrics_language'],
        "lyrics_language_description": lyrics_package['lyrics_language_description'],
        "updated_time": lyrics_package['updated_time']
    }
    
    print(package['lyrics_body'][:10])
    print("writing to database")
    try:
        session.execute(table.insert(), package)
    except:
        next
    print("committing files to database")
    session.commit()
    count += 1 

# Acquiring Genre from Album

In [7]:
#Connecting to lyrics table
table = sqlalchemy.Table('album_genres', meta, autoload = True)

In [8]:
albums = pd.read_sql_table('tracks', engine, index_col = "id", coerce_float = False)
album_ids = list(albums['album_id'].values)

In [9]:
len(album_ids)

27742

In [10]:
album_array = np.array(album_ids)

In [11]:
album_unique = np.unique(album_array)
len(album_unique)

3398

In [12]:
genre_dic = {}

In [13]:
x = 0
for id in album_unique[0:2000]:
    
    result = musixmatch.album_get(album_id=id)
    album = result['message']['body']['album']['album_name']
    artist = result['message']['body']['album']['artist_name']
    print(f'album id is {id}')
    print(f'the album is {album}')
    print(f'the artist is {artist}')
    genre=''
    try:
        genre = result['message']['body']['album']['primary_genres']['music_genre_list'][0]['music_genre']['music_genre_name']
        #genre_list = [genre['music_genre'] for genre in genres]
        print(f'the album genre is {genre}')
    except IndexError:
        pass
    try:
        genre = result['message']['body']['album']['secondary_genres']['music_genre_list'][0]['music_genre']['music_genre_name']
        print(f'the album genre is {genre}')
    except IndexError:
        pass
    results_dic = {"artist": artist, "album": album, "genre": genre}
    genre_dic[f'{id}'] = results_dic
    print(f'---------- completed {x} of 1600')
    x+=1
#genre_dic['War']

album id is 10266031
the album is Big Willie Style
the artist is Will Smith
---------- completed 0 of 1600
album id is 10266041
the album is War
the artist is U2
the album genre is Rock
---------- completed 1 of 1600
album id is 10266127
the album is Led Zeppelin
the artist is Led Zeppelin
---------- completed 2 of 1600
album id is 10266180
the album is Power Windows
the artist is Rush
---------- completed 3 of 1600
album id is 10266202
the album is Willennium
the artist is Will Smith
---------- completed 4 of 1600
album id is 10266231
the album is Parachutes
the artist is Coldplay
---------- completed 5 of 1600
album id is 10266299
the album is Hold Your Fire
the artist is Rush
the album genre is Rock
---------- completed 6 of 1600
album id is 10266444
the album is Counterparts
the artist is Rush
---------- completed 7 of 1600
album id is 10267122
the album is 1956-1958: Cobra Recordings
the artist is Otis Rush
---------- completed 8 of 1600
album id is 10267496
the album is The Cobra

album id is 10290929
the album is Third Eye Open: The String Quartet Tribute to Tool
the artist is Vitamin String Quartet
---------- completed 80 of 1600
album id is 10291870
the album is - 1958
the artist is Otis Rush
---------- completed 81 of 1600
album id is 10291909
the album is Blues, Songs and Ballads
the artist is Tom Rush
---------- completed 82 of 1600
album id is 10292180
the album is Undisputed Attitude
the artist is Slayer
the album genre is Rock
---------- completed 83 of 1600
album id is 10292199
the album is Code Red
the artist is DJ Jazzy Jeff & The Fresh Prince
---------- completed 84 of 1600
album id is 10294108
the album is Metallica
the artist is Metallica
---------- completed 85 of 1600
album id is 10294187
the album is Heart Over Mind
the artist is Jennifer Rush
---------- completed 86 of 1600
album id is 10294194
the album is Jennifer Rush
the artist is Jennifer Rush
the album genre is Rock
---------- completed 87 of 1600
album id is 10295104
the album is Big Wi

album id is 10333185
the album is Greatest Hits
the artist is DJ Jazzy Jeff & The Fresh Prince
---------- completed 156 of 1600
album id is 10333681
the album is Big Ones
the artist is Aerosmith
---------- completed 157 of 1600
album id is 10334791
the album is Hell Awaits
the artist is Slayer
---------- completed 158 of 1600
album id is 10335664
the album is Rush
the artist is Darude
---------- completed 159 of 1600
album id is 10335742
the album is Next to You
the artist is Darude
---------- completed 160 of 1600
album id is 10336088
the album is Psychoanalysis: What Is It?
the artist is Prince Paul
---------- completed 161 of 1600
album id is 10338312
the album is Load
the artist is Metallica
---------- completed 162 of 1600
album id is 10338419
the album is And In This Corner...
the artist is DJ Jazzy Jeff & The Fresh Prince
the album genre is Hip Hop/Rap
---------- completed 163 of 1600
album id is 10338602
the album is St. Anger
the artist is Metallica
---------- completed 164 of

album id is 10410136
the album is Peace Love Death Metal
the artist is Eagles of Death Metal
---------- completed 231 of 1600
album id is 10411452
the album is Western Music
the artist is Will Oldham
---------- completed 232 of 1600
album id is 10411457
the album is The Whitey on the Moon UK LP
the artist is Department of Eagles
---------- completed 233 of 1600
album id is 10411460
the album is Black/Rich Music
the artist is Will Oldham
---------- completed 234 of 1600
album id is 10411468
the album is Happy Child / Forest Time
the artist is Bonnie "Prince" Billy
the album genre is Alternative
---------- completed 235 of 1600
album id is 10411497
the album is Hush Up!!!
the artist is Prince Buster
---------- completed 236 of 1600
album id is 10412323
the album is Double Trouble
the artist is Otis Rush
---------- completed 237 of 1600
album id is 10412407
the album is Black Dissimulation
the artist is Bonnie "Prince" Billy
---------- completed 238 of 1600
album id is 10412410
the album 

album id is 10496307
the album is The Everlasting Love (New and Best)
the artist is U2
---------- completed 307 of 1600
album id is 10499287
the album is Sandstorm
the artist is Darude
---------- completed 308 of 1600
album id is 10499507
the album is No More Workhorse Blues
the artist is Bonnie "Prince" Billy
the album genre is Alternative
---------- completed 309 of 1600
album id is 10503008
the album is The Hits/The B-Sides
the artist is Prince
---------- completed 310 of 1600
album id is 10504480
the album is Lost and Found
the artist is Will Smith
---------- completed 311 of 1600
album id is 10509748
the album is Switch
the artist is Will Smith
---------- completed 312 of 1600
album id is 10509865
the album is Any Place I'm Going
the artist is Otis Rush
---------- completed 313 of 1600
album id is 10511855
the album is Make Believe
the artist is Weezer
---------- completed 314 of 1600
album id is 10512229
the album is Trolling for Owls
the artist is Tom Rush
---------- completed 3

album id is 10590514
the album is ...To the Music
the artist is A*Teens
---------- completed 381 of 1600
album id is 10590516
the album is Gimme! Gimme! Gimme! (A Man After Midnight)
the artist is A*Teens
---------- completed 382 of 1600
album id is 10595952
the album is Happy People
the artist is Prince Ital Joe feat. Marky Mark
---------- completed 383 of 1600
album id is 10596058
the album is Gettin' Jiggy Wit It
the artist is Will Smith
---------- completed 384 of 1600
album id is 10596257
the album is Hell Awaits
the artist is Slayer
---------- completed 385 of 1600
album id is 10597438
the album is Silver and Gold 1973-1979
the artist is Prince Far I
---------- completed 386 of 1600
album id is 10597733
the album is Photograph
the artist is Weezer
---------- completed 387 of 1600
album id is 10603657
the album is Can’t Wait to Be With You
the artist is DJ Jazzy Jeff & The Fresh Prince
---------- completed 388 of 1600
album id is 10603667
the album is I’m Looking for the One (To B

album id is 10673756
the album is Cream
the artist is Prince & The New Power Generation
---------- completed 453 of 1600
album id is 10675945
the album is Psalms for I
the artist is Prince Far I
---------- completed 454 of 1600
album id is 10676661
the album is Sandstorm
the artist is Darude
---------- completed 455 of 1600
album id is 10676876
the album is B'Day
the artist is Beyoncé
---------- completed 456 of 1600
album id is 10678002
the album is B'Day
the artist is Beyoncé
---------- completed 457 of 1600
album id is 10678606
the album is Gettin’ Jiggy Wit It
the artist is Will Smith
---------- completed 458 of 1600
album id is 10678636
the album is Freakin' It
the artist is Will Smith
---------- completed 459 of 1600
album id is 10678639
the album is Freakin' It
the artist is Will Smith
---------- completed 460 of 1600
album id is 10678876
the album is Lose Yourself
the artist is Eminem
---------- completed 461 of 1600
album id is 10678897
the album is Bad Meets Evil: Scary Music

album id is 10783954
the album is Mamma Mia
the artist is A*Teens
---------- completed 529 of 1600
album id is 10791347
the album is The Hit Box
the artist is Jennifer Rush
---------- completed 530 of 1600
album id is 10792412
the album is Show No Mercy
the artist is Slayer
---------- completed 531 of 1600
album id is 10798568
the album is Seasons in the Abyss
the artist is Slayer
---------- completed 532 of 1600
album id is 10798940
the album is Jennifer Rush: International Version
the artist is Jennifer Rush
---------- completed 533 of 1600
album id is 10801148
the album is Pump
the artist is Aerosmith
---------- completed 534 of 1600
album id is 10801348
the album is Hotel California
the artist is Eagles
---------- completed 535 of 1600
album id is 10801643
the album is Chasing Pavements
the artist is Adele
---------- completed 536 of 1600
album id is 10801645
the album is Chasing Pavements
the artist is Adele
---------- completed 537 of 1600
album id is 10801647
the album is Hometo

album id is 10894840
the album is Wear It's At
the artist is The Rubettes
---------- completed 603 of 1600
album id is 10897782
the album is Wrong End of the Rainbow
the artist is Tom Rush
---------- completed 604 of 1600
album id is 10899585
the album is Tops
the artist is Otis Rush
the album genre is Blues
---------- completed 605 of 1600
album id is 10899726
the album is One Family: A Christmas Album
the artist is Kelly Price
---------- completed 606 of 1600
album id is 10901478
the album is Feliz Navidad
the artist is José Feliciano
---------- completed 607 of 1600
album id is 10902602
the album is El Scorcho
the artist is Weezer
---------- completed 608 of 1600
album id is 10911283
the album is Feel the Beat / Sandstorm
the artist is Darude
---------- completed 609 of 1600
album id is 10911995
the album is Zooropa
the artist is U2
the album genre is Rock
---------- completed 610 of 1600
album id is 10912871
the album is Dangerously in Love
the artist is Beyoncé
---------- complete

album id is 11036623
the album is New Years Dub
the artist is Musique vs. U2
---------- completed 679 of 1600
album id is 11038933
the album is Maxoom
the artist is Frank Marino & Mahogany Rush
---------- completed 680 of 1600
album id is 11038935
the album is Child of the Novelty
the artist is Frank Marino & Mahogany Rush
---------- completed 681 of 1600
album id is 11038936
the album is Strange Universe
the artist is Frank Marino & Mahogany Rush
---------- completed 682 of 1600
album id is 11042315
the album is Minneapolis Genius (The Historic 1977 Recordings)
the artist is 94 East
---------- completed 683 of 1600
album id is 11050719
the album is The Big Money
the artist is Rush
---------- completed 684 of 1600
album id is 11051769
the album is Raditude
the artist is Weezer
---------- completed 685 of 1600
album id is 11054055
the album is Stick to My Side
the artist is Pantha du Prince
---------- completed 686 of 1600
album id is 11056795
the album is Now Is the Hour
the artist is 

album id is 11338488
the album is The Twilight Saga: Breaking Dawn, Pt. 1 (Original Motion Picture Soundtrack) [Deluxe Version]
the artist is Various Artists
the album genre is Soundtrack
---------- completed 743 of 1600
album id is 11338989
the album is The Marshall Mathers LP
the artist is Eminem
---------- completed 744 of 1600
album id is 11339157
the album is Days are Gone
the artist is Haim
---------- completed 745 of 1600
album id is 11339238
the album is The Beavis and Butt-Head Experience
the artist is Beavis and Butt-Head, Aerosmith, Anthrax, Cher, Jackyl, Nirvana, Primus, Red Hot Chili Peppers, Run-DMC, Sir Mix-A-Lot & White Zombie
the album genre is Rock
---------- completed 746 of 1600
album id is 11339696
the album is iTunes Festival: London 2011
the artist is Coldplay
the album genre is Alternative
---------- completed 747 of 1600
album id is 11339714
the album is Parachutes
the artist is Coldplay
the album genre is Rock
---------- completed 748 of 1600
album id is 11339

album id is 12188405
the album is My Favorite Things
the artist is DJ Rush
---------- completed 797 of 1600
album id is 12245152
the album is Unfinal Call: A Sum of Bonnie 'Prince' Billy Worx
the artist is Bonnie "Prince" Billy
---------- completed 798 of 1600
album id is 12265955
the album is Hurley
the artist is Weezer
---------- completed 799 of 1600
album id is 12278456
the album is So Many Roads, So Many Trains / I'm Satisfied
the artist is Otis Rush
---------- completed 800 of 1600
album id is 12289717
the album is Die Grosse ABBA-Party
the artist is ABBA-Esque
---------- completed 801 of 1600
album id is 12300211
the album is Hurley
the artist is Weezer
the album genre is Alternative
---------- completed 802 of 1600
album id is 12316371
the album is The Ballad of Ronnie Drew
the artist is U2, The Dubliners, Kíla & A Band of Bowsies
---------- completed 803 of 1600
album id is 12752800
the album is Pinkerton
the artist is Weezer
---------- completed 804 of 1600
album id is 128712

album id is 13758093
the album is Eagles Greatest Hits, Volume 2
the artist is Eagles
---------- completed 871 of 1600
album id is 13758094
the album is Eagles Greatest Hits, Volume 2
the artist is Eagles
---------- completed 872 of 1600
album id is 13758095
the album is Eagles Greatest Hits, Volume 2
the artist is Eagles
---------- completed 873 of 1600
album id is 13758102
the album is Led Zeppelin
the artist is Led Zeppelin
---------- completed 874 of 1600
album id is 13758103
the album is Led Zeppelin II
the artist is Led Zeppelin
---------- completed 875 of 1600
album id is 13758150
the album is I See a Darkness
the artist is Bonnie "Prince" Billy
---------- completed 876 of 1600
album id is 13758160
the album is Test for Echo
the artist is Rush
---------- completed 877 of 1600
album id is 13758227
the album is Reign in Blood
the artist is Slayer
---------- completed 878 of 1600
album id is 13758228
the album is Reign in Blood
the artist is Slayer
---------- completed 879 of 1600


album id is 13770359
the album is Ease Down the Road
the artist is Bonnie "Prince" Billy
the album genre is Alternative
the album genre is Country
---------- completed 953 of 1600
album id is 13770360
the album is The Letting Go
the artist is Bonnie "Prince" Billy
the album genre is Alternative
the album genre is Country
---------- completed 954 of 1600
album id is 13770361
the album is The Letting Go
the artist is Bonnie "Prince" Billy
---------- completed 955 of 1600
album id is 13770364
the album is Master and Everyone
the artist is Bonnie "Prince" Billy
---------- completed 956 of 1600
album id is 13770366
the album is Greatest Palace Music
the artist is Bonnie "Prince" Billy
---------- completed 957 of 1600
album id is 13770427
the album is Diabolus In Musica
the artist is Slayer
the album genre is Heavy Metal
---------- completed 958 of 1600
album id is 13770536
the album is How to Dismantle an Atomic Bomb
the artist is U2
---------- completed 959 of 1600
album id is 13770542
the

album id is 13787602
the album is God Hates Us All
the artist is Slayer
---------- completed 1036 of 1600
album id is 13788286
the album is Please Come Home for Christmas
the artist is Eagles
---------- completed 1037 of 1600
album id is 13788287
the album is Please Come Home for Christmas
the artist is Eagles
---------- completed 1038 of 1600
album id is 13788581
the album is Permanent Waves
the artist is Rush
---------- completed 1039 of 1600
album id is 13788880
the album is In Through the Out Door
the artist is Led Zeppelin
---------- completed 1040 of 1600
album id is 13788884
the album is Led Zeppelin II
the artist is Led Zeppelin
---------- completed 1041 of 1600
album id is 13788887
the album is Physical Graffiti
the artist is Led Zeppelin
---------- completed 1042 of 1600
album id is 13789570
the album is Parachutes
the artist is Coldplay
---------- completed 1043 of 1600
album id is 13790121
the album is A Rush of Blood to the Head
the artist is Coldplay
---------- completed 

album id is 13802779
the album is Grace Under Pressure
the artist is Rush
---------- completed 1115 of 1600
album id is 13803399
the album is Diabolus in Musica
the artist is Slayer
---------- completed 1116 of 1600
album id is 13803768
the album is Endless Flight
the artist is Leo Sayer
---------- completed 1117 of 1600
album id is 13804273
the album is Led Zeppelin
the artist is Led Zeppelin
---------- completed 1118 of 1600
album id is 13804274
the album is Led Zeppelin II
the artist is Led Zeppelin
---------- completed 1119 of 1600
album id is 13804275
the album is Led Zeppelin III
the artist is Led Zeppelin
---------- completed 1120 of 1600
album id is 13804459
the album is …and Justice for All
the artist is Metallica
---------- completed 1121 of 1600
album id is 13805435
the album is Rush
the artist is Rush
---------- completed 1122 of 1600
album id is 13805567
the album is Permanent Vacation
the artist is Aerosmith
---------- completed 1123 of 1600
album id is 13805672
the album

album id is 13824581
the album is How to Dismantle an Atomic Bomb
the artist is U2
---------- completed 1198 of 1600
album id is 13824582
the album is How to Dismantle an Atomic Bomb
the artist is U2
---------- completed 1199 of 1600
album id is 13824583
the album is How to Dismantle an Atomic Bomb
the artist is U2
---------- completed 1200 of 1600
album id is 13825012
the album is Hurley
the artist is Weezer
---------- completed 1201 of 1600
album id is 13825380
the album is The Essential John Denver
the artist is John Denver
---------- completed 1202 of 1600
album id is 13825583
the album is Kill 'em All
the artist is Metallica
---------- completed 1203 of 1600
album id is 13825584
the album is Ride the Lightning
the artist is Metallica
---------- completed 1204 of 1600
album id is 13825586
the album is …and Justice for All
the artist is Metallica
---------- completed 1205 of 1600
album id is 13826126
the album is South of Heaven
the artist is Slayer
the album genre is Heavy Metal
--

album id is 13845607
the album is 21
the artist is Adele
---------- completed 1276 of 1600
album id is 13845666
the album is 4
the artist is Beyoncé
the album genre is Pop
---------- completed 1277 of 1600
album id is 13845668
the album is 4
the artist is Beyoncé
---------- completed 1278 of 1600
album id is 13845830
the album is Reign in Blood
the artist is Slayer
---------- completed 1279 of 1600
album id is 13845982
the album is Hell: The Sequel
the artist is Bad Meets Evil
---------- completed 1280 of 1600
album id is 13845983
the album is Hell: The Sequel
the artist is Bad Meets Evil
---------- completed 1281 of 1600
album id is 13846671
the album is Hell: The Sequel
the artist is Bad Meets Evil
the album genre is Hip Hop/Rap
---------- completed 1282 of 1600
album id is 13846731
the album is Musicology
the artist is Prince
---------- completed 1283 of 1600
album id is 13846736
the album is Crystal Ball
the artist is Prince
---------- completed 1284 of 1600
album id is 13846738
th

album id is 13893747
the album is The Power of Rhythm
the artist is B.G. The Prince of Rap
---------- completed 1350 of 1600
album id is 13894407
the album is Wolfroy Goes to Town
the artist is Bonnie "Prince" Billy
---------- completed 1351 of 1600
album id is 13895232
the album is Mylo Xyloto
the artist is Coldplay
the album genre is Rock
---------- completed 1352 of 1600
album id is 13895258
the album is If I Ruled the World (feat. Iyaz)
the artist is Big Time Rush
---------- completed 1353 of 1600
album id is 13895361
the album is When I Need You / You Make Me Feel Like Dancing
the artist is Leo Sayer
---------- completed 1354 of 1600
album id is 13897457
the album is Mylo Xyloto
the artist is Coldplay
the album genre is Alternative
---------- completed 1355 of 1600
album id is 13897930
the album is Lulu
the artist is Lou Reed & Metallica
the album genre is Rock
---------- completed 1356 of 1600
album id is 13898604
the album is Achtung Baby
the artist is U2
---------- completed 13

album id is 14178092
the album is Leo Sayer
the artist is Leo Sayer
---------- completed 1424 of 1600
album id is 14179197
the album is 19
the artist is Adele
---------- completed 1425 of 1600
album id is 14179735
the album is Sector 1
the artist is Rush
---------- completed 1426 of 1600
album id is 14179801
the album is Pump
the artist is Aerosmith
---------- completed 1427 of 1600
album id is 14179937
the album is Pump
the artist is Aerosmith
---------- completed 1428 of 1600
album id is 14181077
the album is Big Time Movie Soundtrack
the artist is Big Time Rush
the album genre is Pop
---------- completed 1429 of 1600
album id is 14181097
the album is BTR
the artist is Big Time Rush
---------- completed 1430 of 1600
album id is 14182836
the album is Addicted
the artist is Prince Royce
the album genre is Pop
the album genre is Latin
---------- completed 1431 of 1600
album id is 14184573
the album is How to Dismantle an Atomic Bomb
the artist is U2
---------- completed 1432 of 1600
alb

album id is 14228501
the album is God Hates Us All
the artist is Slayer
---------- completed 1501 of 1600
album id is 14228713
the album is Kill 'em All
the artist is Metallica
---------- completed 1502 of 1600
album id is 14230587
the album is War
the artist is U2
---------- completed 1503 of 1600
album id is 14230639
the album is FABulous Greatest Hits
the artist is Prince Buster
---------- completed 1504 of 1600
album id is 14230930
the album is Heart Over Mind
the artist is Jennifer Rush
---------- completed 1505 of 1600
album id is 14231047
the album is Now Here's My Plan
the artist is Bonnie "Prince" Billy
the album genre is Alternative
the album genre is Singer/Songwriter
---------- completed 1506 of 1600
album id is 14231323
the album is The Marshall Mathers LP
the artist is Eminem
---------- completed 1507 of 1600
album id is 14231895
the album is Now Here's My Plan
the artist is Bonnie "Prince" Billy
---------- completed 1508 of 1600
album id is 14233238
the album is Physical

the album is Graffiti Bridge
the artist is Prince
---------- completed 1575 of 1600
album id is 14297861
the album is Ultimate
the artist is Prince
---------- completed 1576 of 1600
album id is 14299842
the album is Hold Your Fire
the artist is Rush
---------- completed 1577 of 1600
album id is 14300052
the album is South of Heaven
the artist is Slayer
---------- completed 1578 of 1600
album id is 14301031
the album is A Rush of Blood to the Head
the artist is Coldplay
---------- completed 1579 of 1600
album id is 14303134
the album is Get Your Wings
the artist is Aerosmith
---------- completed 1580 of 1600
album id is 14303498
the album is Led Zeppelin III
the artist is Led Zeppelin
---------- completed 1581 of 1600
album id is 14303499
the album is Led Zeppelin II
the artist is Led Zeppelin
---------- completed 1582 of 1600
album id is 14303503
the album is Houses of the Holy
the artist is Led Zeppelin
---------- completed 1583 of 1600
album id is 14303505
the album is Physical Graff

album id is 15357566
the album is Joe FM Hitarchief: 1973
the artist is Demis Roussos, The Osmonds, ABBA, Albert Hammond, Argent, Billy Paul, Bonnie St. Claire, Carly Simon, Charlie Rich, Chi Coltrane, Chris Montez, Cliff Richard, David Essex, Dawn, Dolly Parton, Earth, Elton John, Fire, Freddy Breck, George Baker Selection, Golden Earring, Harold Melvin, Left Side, Lou Reed, Michael Jackson, Mort Shuman, Mott the Hoople, Nick McKenzie, Peter Koelewijn, Raza, Santana, Stealers Wheel, Suzi Quatro, Sweet, Teddy Pendergrass, The Blue Notes, The Edgar Winter Group, The Isley Brothers, The O'Jays, The Three Degrees, Unit Gloria & Van Morrison
the album genre is Pop
---------- completed 1657 of 1600
album id is 15401458
the album is Days Are Gone
the artist is Haim
---------- completed 1658 of 1600
album id is 15413792
the album is Tribute To: Aerosmith
the artist is Aerosmith Tribute Band
the album genre is Pop
---------- completed 1659 of 1600
album id is 15421230
the album is Take Me to t

album id is 15507672
the album is Psalms for I
the artist is Prince Far I
the album genre is Reggae
the album genre is World
---------- completed 1711 of 1600
album id is 15509960
the album is Bonnie "Prince" Billy
the artist is Bonnie "Prince" Billy
---------- completed 1712 of 1600
album id is 15510096
the album is Music from Another Dimension!
the artist is Aerosmith
---------- completed 1713 of 1600
album id is 15511228
the album is Maladroit
the artist is Weezer
---------- completed 1714 of 1600
album id is 15514959
the album is Lose Yourself
the artist is Eminem
the album genre is Hip Hop/Rap
---------- completed 1715 of 1600
album id is 15514963
the album is Curtain Call - The Hits
the artist is Eminem
the album genre is Hip Hop/Rap
---------- completed 1716 of 1600
album id is 15514971
the album is Relapse
the artist is Eminem
the album genre is Hip Hop/Rap
---------- completed 1717 of 1600
album id is 15514973
the album is Encore
the artist is Eminem
the album genre is Hip Hop

album id is 15601680
the album is Phase II
the artist is Prince Royce
the album genre is Salsa y Tropical
---------- completed 1766 of 1600
album id is 15608349
the album is Heart Over Mind
the artist is Jennifer Rush
the album genre is Pop
---------- completed 1767 of 1600
album id is 15610407
the album is Big Shiny Tunes 13
the artist is Buckcherry, Coldplay, Die Mannequin, Disturbed, LINKIN PARK, Panic! At the Disco, Protest the Hero, Queens of the Stone Age, Sam Roberts, Saving Abel, Seether, The Kooks, The Mission District, The Raconteurs, The Verve, Tokyo Police Club, U.S.S, Weezer & Wintersleep
the album genre is Rock
---------- completed 1768 of 1600
album id is 15614470
the album is My Name Is Prince
the artist is Prince & The New Power Generation
---------- completed 1769 of 1600
album id is 15617379
the album is Will Smith: Greatest Hits
the artist is Will Smith
the album genre is Pop
---------- completed 1770 of 1600
album id is 15617381
the album is Will Smith: Greatest Hi

album id is 15714250
the album is Long Road Out of Eden
the artist is Eagles
the album genre is Rock
the album genre is Country
---------- completed 1828 of 1600
album id is 15714258
the album is Please Come Home for Christmas / Funky New Year
the artist is Eagles
the album genre is Holiday
---------- completed 1829 of 1600
album id is 15714721
the album is Fast Lane
the artist is Bad Meets Evil
the album genre is Hip Hop/Rap
---------- completed 1830 of 1600
album id is 15717642
the album is Otis Rush: Live at Montreux 1986
the artist is Otis Rush
the album genre is Blues
---------- completed 1831 of 1600
album id is 15718958
the album is Reign In Blood
the artist is Slayer
the album genre is Rock
---------- completed 1832 of 1600
album id is 15718961
the album is God Hates Us All
the artist is Slayer
the album genre is Rock
---------- completed 1833 of 1600
album id is 15718965
the album is Christ Illusion
the artist is Slayer
the album genre is Heavy Metal
---------- completed 1834 

album id is 15946709
the album is The Psychedelic Years (1966-1968)
the artist is The Revolution
the album genre is Rock
---------- completed 1891 of 1600
album id is 15947111
the album is Label This!
the artist is Darude
the album genre is Pop
---------- completed 1892 of 1600
album id is 15948606
the album is The Revolution: The Singles Album
the artist is The Revolution
the album genre is Rock
---------- completed 1893 of 1600
album id is 15953296
the album is Voices In My Head
the artist is Leo Sayer
the album genre is Pop
---------- completed 1894 of 1600
album id is 15955874
the album is Lost and Found
the artist is Will Smith
the album genre is R&B/Soul
---------- completed 1895 of 1600
album id is 15957317
the album is Leo Sayer: Live In 1975
the artist is Leo Sayer
the album genre is Pop
---------- completed 1896 of 1600
album id is 15959490
the album is Endless Journey – The Essential Leo Sayer
the artist is Leo Sayer
the album genre is Pop
the album genre is Vocal
----------

album id is 16169189
the album is The Hit Box
the artist is Jennifer Rush
the album genre is Pop
---------- completed 1952 of 1600
album id is 16169190
the album is Jennifer Rush: Superhits
the artist is Jennifer Rush
the album genre is Pop
---------- completed 1953 of 1600
album id is 16169191
the album is Jennifer Rush: Hit Collection
the artist is Jennifer Rush
the album genre is Pop
---------- completed 1954 of 1600
album id is 16169192
the album is The Very Best of (Her EMI / Virgin Years)
the artist is Jennifer Rush
the album genre is Pop
the album genre is Vocal
---------- completed 1955 of 1600
album id is 16169193
the album is Jennifer Rush: Best of 1983-2010
the artist is Jennifer Rush
the album genre is Pop
---------- completed 1956 of 1600
album id is 16169194
the album is The Power of Jennifer Rush
the artist is Jennifer Rush
the album genre is Pop
---------- completed 1957 of 1600
album id is 16169195
the album is Essential Jennifer Rush
the artist is Jennifer Rush
the al

In [14]:
total_df = pd.DataFrame.from_dict(genre_dic, orient='index')

In [16]:
total_df.head()

Unnamed: 0,artist,album,genre
10266031,Will Smith,Big Willie Style,
10266041,U2,War,Rock
10266127,Led Zeppelin,Led Zeppelin,
10266180,Rush,Power Windows,
10266202,Will Smith,Willennium,


In [18]:
total_df.to_csv('scrape_test_data.csv')

In [36]:
from bs4 import BeautifulSoup
from splinter import Browser
from selenium import webdriver
import requests

driver = webdriver.Chrome()

In [43]:
genres = []
for i in range(10):
    artist = total_df['artist'][i]
    album = total_df['album'][i]
    web = 'https://www.allmusic.com/search/albums/'
    
    #Create a splinter of the webpage to start scraping
    executable_path = {"executable_path": "resources/chromedriver.exe"}
    browser = Browser('chrome', **executable_path, headless=False)
    
    url = web + artist + " " + album
    
    browser.visit(url)
    
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    genre = soup.find("div", class_='genres').get_text()
    print(f"{artist}'s album, {album}, genre is {genre}")
    driver = webdriver.Chrome()
    browser.Close()

Will Smith's album, Big Willie Style, genre is 
                Rap            


AttributeError: 'WebDriver' object has no attribute 'Close'

In [22]:
total_df['artist'][0]

'Will Smith'

In [None]:
## THIS IS FOR TESTING ONLY
import pprint
data = musixmatch.album_get(album_id='10295432')
pprint.pprint(data)

In [None]:
album_count = 1

for album_id in album_unique:
    print(f"getting result {album_count} for {album_id}")
    result = musixmatch.album_get(album_id = album_id)
    print(f"getting information for {album_id}")
    album = result['message']['body']['album']
    print("creating genre list")
    genres = result['message']['body']['album']['primary_genres']['music_genre_list']
    genre_list = [genre['music_genre'] for genre in genres]
    print(f"Iterating through album genres")
    for genre in track_list:
        package = {
                'album_id': int(album['album_id']),
                'artist_id': int(album['artist_id']),
                'artist_name': album['artist_name'],
                'genre': genre['music_genre_name'],
                'genre_id': genre['music_genre_id']
                
            }
        print(package['album_id'])
        print("writing to database")
        try:
            session.execute(table.insert(), package)
        except:
            print("writing to database failed")
            next
        print("committing files to database")
        session.commit()
    album_count += 1