# Dependencies

In [1]:
import pandas as pd
import numpy as np
from musixmatch import Musixmatch
from API_Key import api_key
import time
from pprint import pprint

In [2]:
import sqlalchemy
from sqlalchemy import create_engine, inspect, Column, Integer, Float, String, Sequence, Boolean
from sqlalchemy.orm import Session
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.ext.declarative import declarative_base

In [3]:
musixmatch = Musixmatch(api_key)

# Setting up SQLlite

In [4]:
#Using declarative base
Base = declarative_base()
engine = create_engine('sqlite:///lyrics3.db', echo = False)

In [6]:
#defining the Tracks table
class Tracks(Base):
    __tablename__ = "tracks"
    
    id = Column(Integer, primary_key=True, nullable = False, unique = True, autoincrement = True)
    album_id = Column(Integer)
    artist_id = Column(Integer)
    artist_name = Column(String)
    explicit = Column(Integer)
    has_lyrics = Column(Integer)
    lyrics_id = Column(Integer)
    track_id = Column(Integer, unique = True)
    track_name = Column(String)

In [7]:
#defining the Lyrics table
class Lyrics(Base):
    __tablename__ = "lyrics"
    
    id = Column(Integer, primary_key=True, nullable = False, unique = True, autoincrement = True)
    explicit = Column(Integer)
    lyrics_body = Column(String)
    lyrics_id = Column(Integer, unique = True)
    lyrics_language = Column(String)
    lyrics_language_description = Column(String)

In [8]:
#defining the Artists table
class Artists(Base):
    __tablename__ = "artists"
    
    id = Column(Integer, primary_key=True, nullable = False, unique = True, autoincrement = True)
    artist_id = Column(Integer, unique = True)
    artist_name = Column(String)
    artist_q = Column(String)

In [9]:
#defining the Albums table
class Albums(Base):
    __tablename__ = "albums"
    
    id = Column(Integer, primary_key=True, nullable = False, unique = True, autoincrement = True)
    artist_id = Column(Integer)
    artist_name = Column(String)
    album_id = Column(Integer, unique = True)
    album_name = Column(String)

In [6]:
#defining the genres table
class Album_Genres(Base):
    __tablename__ = "album_genres"
    
    id = Column(Integer, primary_key=True, nullable = False, unique = True, autoincrement = True)
    artist_id = Column(Integer)
    artist_name = Column(String)
    album_id = Column(Integer)
    genre = Column(String)
    genre_id = Column(Integer)

In [5]:
#Creating the table metadata
Base.metadata.create_all(engine)
session = Session(engine)

In [6]:
#Binding engine to the metadata
meta = sqlalchemy.MetaData()
meta.reflect(bind = engine)

# MusixMatch API - Finding artists via text search from manually created list

In [11]:
artists_path = 'artist_names.xlsx'
artists_df = pd.read_excel(artists_path, sheetname = "Sheet1", header = None)
artists = list(artists_df[0].values)

In [12]:
#Connecting to tracks table
table = sqlalchemy.Table('artists', meta, autoload = True)

In [None]:
for artist in artists:
    print(f"getting artist names {artist}")
    result = musixmatch.artist_search(q_artist = artist, page = 1, page_size = 100, f_artist_id = '', f_artist_mbid = '')
    print('extracting results')
    artist_list = result['message']['body']['artist_list']
    artists = [artist['artist'] for artist in artist_list]
    print("creating package")
    for item in artists:
        package = {
            'artist_id': item['artist_id'],
            'artist_name': item['artist_name'],
            'artist_q': artist
        }
        try:
            print("writing package to database")
            session.execute(table.insert(), package)
        except:
            next
    print("committing to database")
    session.commit()

# MusixMatch API - Finding Albums from Artists

In [26]:
artists = pd.read_sql('artists', engine)
artists.dropna(inplace = True)
artist_ids = list(artists['artist_id'].values)

In [12]:
table = sqlalchemy.Table('albums', meta, autoload = True)

In [None]:
for artist_id in artist_ids:
    print(f"getting albums for artist {artist_id}")
    result = musixmatch.artist_albums_get(artist_id = artist_id, g_album_name = '', page = 1, page_size = 100, s_release_date = '')
    print("extracting the album_list")
    album_list = result['message']['body']['album_list']
    print("extracting individual albums")
    albums = [album['album'] for album in album_list]
    print("iterating through the list of albums")
    for item in albums:
        print("creating the package")
        package = {
            'album_id': item['album_id'],
            'album_name': item['album_name'],
            'artist_id': item['artist_id'],
            'artist_name': item['artist_name']
        }
        try:
            print("writing to the database")
            session.execute(table.insert(), package)
        except:
            next
    print("comitting to the database")
    session.commit()

# MusixMatch API - Scraping Tracks From Charts

In [19]:
#Connecting to tracks table
table = sqlalchemy.Table('tracks', meta, autoload = True)

In [10]:
country_codes = ['AU', 'CA', 'IE', 'IM', 'NZ', 'GB', 'US']

In [None]:
country_count = 1
for country in country_codes:
    
    print(f"Attempting country {country_count} for {country}")
    
    page_count = 1

    while page_count < 5:
        print(f"calling api attempt {page_count} for {country}")
        result = musixmatch.chart_tracks_get(page_count, 100, f_has_lyrics = True)
        tracks = result['message']['body']['track_list']
        track_list = [track['track'] for track in tracks]
        print(f"track list is {len(track_list)}")
        for track in track_list:
            package = {
                'album_id': track['album_id'],
                'artist_id': track['artist_id'],
                'artist_name': track['artist_name'],
                'explicit': track['explicit'],
                'has_lyrics': track['has_lyrics'],
                'lyrics_id': track['lyrics_id'],
                'track_id': track['track_id'],
                'track_name': track['track_name']
            }
            print(package['track_name'])
            print("writing to database")
            try:
                session.execute(table.insert(), package)
            except:
                next
        page_count += 1
        time.sleep(1)
        session.commit()
    
    country_count += 1

# MusixMatch API - finding tracks from albums

## Starting point is the tracks data from the charts

In [20]:
tracks = pd.read_sql_table('tracks', engine, index_col = "id", coerce_float = False)

In [21]:
album_ids = tracks['album_id'].values

In [24]:
#Connecting to tracks table
table = sqlalchemy.Table('tracks', meta, autoload = True)

In [None]:
album_count = 1

for album_id in album_ids:
    print(f"getting result {album_count} for {album_id}")
    result = musixmatch.album_tracks_get(album_id = album_id, album_mbid = "", page = 1, page_size = 100)
    print(f"getting track_list for {album_id}")
    album = result['message']['body']['track_list']
    print("creating track list")
    track_list = [track['track'] for track in album]
    track_count = 1
    print(f"Iterating through tracks in album")
    for track in track_list:
        print(f"track is {track['track_id']}")
        package = {
                'album_id': int(track['album_id']),
                'artist_id': int(track['artist_id']),
                'artist_name': track['artist_name'],
                'explicit': int(track['explicit']),
                'has_lyrics': int(track['has_lyrics']),
                'lyrics_id': int(track['lyrics_id']),
                'track_id': int(track['track_id']),
                'track_name': track['track_name']
            }
        print(package['track_name'])
        print("writing to database")
        try:
            session.execute(table.insert(), package)
        except:
            print("writing to database failed")
            next
        print("committing files to database")
        session.commit()
    album_count += 1

## Starting point is the album's data

In [15]:
albums = pd.read_sql_table('albums', engine, index_col = "id", coerce_float = False)
albums = albums.iloc[647:]
album_ids = list(albums['album_id'].values)

In [16]:
#Connecting to tracks table
table = sqlalchemy.Table('tracks', meta, autoload = True)

In [None]:
album_count = 1

for album_id in album_ids:
    print(f"getting result {album_count} for {album_id}")
    result = musixmatch.album_tracks_get(album_id = album_id, album_mbid = "", page = 1, page_size = 100)
    print(f"getting track_list for {album_id}")
    album = result['message']['body']['track_list']
    print("creating track list")
    track_list = [track['track'] for track in album]
    track_count = 1
    print(f"Iterating through tracks in album")
    for track in track_list:
        print(f"track is {track['track_id']}")
        package = {
                'album_id': int(track['album_id']),
                'artist_id': int(track['artist_id']),
                'artist_name': track['artist_name'],
                'explicit': int(track['explicit']),
                'has_lyrics': int(track['has_lyrics']),
                'lyrics_id': int(track['lyrics_id']),
                'track_id': int(track['track_id']),
                'track_name': track['track_name']
            }
        print(package['track_name'])
        print("writing to database")
        try:
            session.execute(table.insert(), package)
        except:
            print("writing to database failed")
            next
        print("committing files to database")
        session.commit()
    album_count += 1

## MusixMatch API - Scraping Lyrics from the Albums Data

In [28]:
#Connecting to lyrics table
table = sqlalchemy.Table('lyrics', meta, autoload = True)

In [29]:
tracks = pd.read_sql('tracks', engine, index_col = "id")
track_ids = tracks['track_id'].values

In [None]:
count = 1
for track in track_ids:
    print(f"calling API attempt {count} for track_id# {track}")
    print(f"track data type is {type(track)}")
    lyrics = musixmatch.track_lyrics_get(track)
    print("pulling out lyrics package")
    lyrics_package = lyrics['message']['body']['lyrics']
    print("pulling out package to insert into SQL database")
    package = {
        "explicit": lyrics_package['explicit'],
        "lyrics_body": lyrics_package["lyrics_body"][:-69],
        "lyrics_id": lyrics_package['lyrics_id'],
        "lyrics_language": lyrics_package['lyrics_language'],
        "lyrics_language_description": lyrics_package['lyrics_language_description'],
        "updated_time": lyrics_package['updated_time']
    }
    
    print(package['lyrics_body'][:10])
    print("writing to database")
    try:
        session.execute(table.insert(), package)
    except:
        next
    print("committing files to database")
    session.commit()
    count += 1 

# Acquiring Genre from Album

In [7]:
#Connecting to lyrics table
table = sqlalchemy.Table('album_genres', meta, autoload = True)

In [8]:
albums = pd.read_sql_table('tracks', engine, index_col = "id", coerce_float = False)
album_ids = list(albums['album_id'].values)

In [9]:
len(album_ids)

6035

In [10]:
album_array = np.array(album_ids)

In [11]:
album_unique = np.unique(album_array)
len(album_unique)

598

In [12]:
album_unique[1]

10268810

In [41]:
import pprint
data = musixmatch.album_get(album_id=album_unique[5])
pprint.pprint(data)

{'message': {'body': {'album': {'album_copyright': '',
                                'album_coverart_100x100': 'http://s.mxmcdn.net/images-storage/albums/nocover.png',
                                'album_coverart_350x350': '',
                                'album_coverart_500x500': '',
                                'album_coverart_800x800': '',
                                'album_edit_url': 'https://www.musixmatch.com/album/Led-Zeppelin/Led-Zeppelin-III?utm_source=application&utm_campaign=api&utm_medium=George+Washington+University%3A1409617864811',
                                'album_id': 10276578,
                                'album_label': '',
                                'album_mbid': '7aadcfa2-df82-480e-8d2d-7ec4d0b41172',
                                'album_name': 'Led Zeppelin III',
                                'album_pline': '',
                                'album_rating': 100,
                                'album_release_date': '2012-11-28',
   

In [35]:
find = data['message']['body']['album']['primary_genres']['music_genre_list']
pprint.pprint(find)

[]


In [30]:
find2 = data['message']['body']['album']['album_name']
pprint.pprint(find2)

'Hell Awaits'


In [40]:
genre_dic = {}
for id in album_unique:
    
    result = musixmatch.album_get(album_id=id)
    album = result['message']['body']['album']['album_name']
    print(f'the album is {album}')
    try:
        genre = result['message']['body']['album']['primary_genres']['music_genre_list'][0]['music_genre']['music_genre_name']
        #genre_list = [genre['music_genre'] for genre in genres]
        print(f'the album genre is {genre}')
    except IndexError:
        pass
    try:
        genre = result['message']['body']['album']['secondary_genres']['music_genre_list'][0]['music_genre']['music_genre_name']
        print(f'the album genre is {genre}')
    except IndexError:
        genre="NA"
    genre_dic[f'{album}'] = genre

the album is Led Zeppelin
the album is Hell Awaits
the album genre is Rock
the album is Show No Mercy
the album genre is Rock
the album is ...And Justice for All
the album genre is Rock
the album is Houses of the Holy
the album is Led Zeppelin III
the album is Metallica
the album is Divine Intervention
the album genre is Rock
the album genre is Alternative
the album is Pinkerton
the album genre is Rock
the album is Led Zeppelin II
the album is Physical Graffiti
the album is In Through the Out Door
the album is Reign in Blood
the album is Seasons In the Abyss
the album genre is Rock
the album is Diabolus In Musica
the album genre is Rock
the album is God Hates Us All
the album genre is Hard Rock
the album is Diabolus in Musica
the album is Weezer
the album is South of Heaven
the album is Weezer
the album is Reign In Blood
the album genre is Heavy Metal
the album is Third Eye Open: The String Quartet Tribute to Tool
the album is Undisputed Attitude
the album genre is Rock
the album is Me

the album is When I Need You / You Make Me Feel Like Dancing
the album is Led Zeppelin II
the album is Led Zeppelin III
the album is Houses of the Holy
the album is Physical Graffiti
the album is Vitamin String Quartet Performs Muse
the album is Pinkerton
the album is Physical Graffiti
the album is Led Zeppelin II
the album is Led Zeppelin II
the album is Seasons in the Abyss
the album is Undisputed Attitude
the album is Undisputed Attitude
the album genre is Heavy Metal
the album is South of Heaven
the album is Leo Sayer
the album is Physical Graffiti
the album is Divine Intervention
the album is Leo Sayer
the album is South of Heaven
the album is Show No Mercy
the album is Led Zeppelin
the album is Led Zeppelin II
the album is Led Zeppelin III
the album is Led Zeppelin
the album is Just a Boy
the album is Reign in Blood
the album is Seasons in the Abyss
the album is More Than I Can Say / Only Fooling
the album is Live In Glasgow
the album is Reign in Blood
the album is God Hates Us A

the album is Traveller
the album genre is Country
the album is Led Zeppelin
the album is The String Quartet Tribute to the Smiths
the album genre is Alternative
the album genre is Adult Alternative
the album is The String Quartet Tribute To Radiohead: Enigmatic
the album genre is Alternative
the album genre is Rock
the album is Repentless
the album genre is Heavy Metal
the album is Immortalized (Deluxe Version)
the album genre is Rock
the album is Repentless
the album genre is Heavy Metal
the album is Thank God For Girls
the album genre is Alternative
the album is Do You Wanna Get High?
the album genre is Alternative
the album is Night Visions
the album is Christ Illusion
the album is A Salute To Led Zeppelin
the album is News Of The World
the album is Waterloo (Deluxe Edition)
the album is Show No Mercy
the album is Hurley
the album is Raditude
the album is Raditude
the album is Weezer
the album is Make Believe
the album is Make Believe
the album is Pinkerton - Deluxe Edition
the albu

the album is Scorpion
the album genre is Hip Hop/Rap
the album is Burn The Ships
the album genre is Christian & Gospel
the album is Smile Bitch
the album genre is Hip Hop/Rap
the album is Jumpsuit / Nico And The Niners
the album genre is Alternative
the album is Summertime Magic
the album is Mamma Mia! Here We Go Again (Original Motion Picture Soundtrack)
the album is Eastside
the album genre is Pop
the album is Desperate Man
the album genre is Country
the album is All of It
the album genre is Country
the album is Look Up Child
the album genre is Christian & Gospel
the album is Natural
the album genre is Alternative
the album is When The Curtain Falls
the album is Level Up
the album is you should see me in a crown
the album is Happy Now
the album genre is Dance
the album is FEFE (feat. Nicki Minaj & Murda Beatz)
the album is Attention
the album is Queen Naija
the album genre is R&B/Soul
the album is SWISH
the album genre is Hip Hop/Rap
the album is I Don't Want: The Gold Fire Sessions


[]

In [None]:
album_count = 1

for album_id in album_unique:
    print(f"getting result {album_count} for {album_id}")
    result = musixmatch.album_get(album_id = album_id)
    print(f"getting information for {album_id}")
    album = result['message']['body']['album']
    print("creating genre list")
    genres = result['message']['body']['album']['primary_genres']['music_genre_list']
    genre_list = [genre['music_genre'] for genre in genres]
    print(f"Iterating through album genres")
    for genre in track_list:
        package = {
                'album_id': int(album['album_id']),
                'artist_id': int(album['artist_id']),
                'artist_name': album['artist_name'],
                'genre': genre['music_genre_name'],
                'genre_id': genre['music_genre_id']
                
            }
        print(package['album_id'])
        print("writing to database")
        try:
            session.execute(table.insert(), package)
        except:
            print("writing to database failed")
            next
        print("committing files to database")
        session.commit()
    album_count += 1