In [226]:
import pandas as pd
import numpy as np

# Death Metal / Black Metal Album Cover Art Genre Classification

Import a list of band / album pairs  
The list for each respective genre was taken from the top all time list from RYM  
Split releases and compilations were skipped  
Bias away from popular bands like Cannibal Corpse, Dimmu Borgir, etc  

## Read in album data

In [227]:
bands = []
albums = []
with open('death_metal.txt', 'r', encoding='utf-8') as f:
    for line in f:
        line = line.strip()
        band, album = line.split('\t')
        bands.append(band)
        albums.append(album)

In [228]:
death = pd.DataFrame({'band': bands, 'album': albums})
death

Unnamed: 0,band,album
0,Death,Symbolic
1,Death,Human
2,Death,The Sound of Perseverance
3,Cryptopsy,None So Vile
4,Gorguts,Obscura
...,...,...
512,Eluveitie,Spirit
513,Incantation,Dirges of Elysium
514,Pungent Stench,For God Your Soul... For Me Your Flesh
515,Unleashed,Hammer Battalion


In [229]:
bands = []
albums = []
with open('black_metal.txt', 'r', encoding='utf-8') as f:
    for line in f:
        line = line.strip()
        band, album = line.split('\t')
        bands.append(band)
        albums.append(album)

In [230]:
black = pd.DataFrame({'band': bands, 'album': albums})
black

Unnamed: 0,band,album
0,Burzum,Filosofem
1,Dissection,Storm of the Light's Bane
2,Emperor,In the Nightside Eclipse
3,Emperor,Anthems to the Welkin at Dusk
4,Ulver,Bergtatt: Et eeventyr I 5 capitler
...,...,...
501,møl,Jord
502,Mütiilation,"Remains of a Ruined, Dead, Cursed Soul"
503,Ravencult,Morbid Blood
504,Forteresse,Crépuscule d'octobre


Within each genre, there are no duplicated album titles

In [231]:
death[death['album'].duplicated()]

Unnamed: 0,band,album


In [232]:
black[black['album'].duplicated()]

Unnamed: 0,band,album


Find albums that appear in both lists

In [233]:
death['full'] = death['band'] + ' - ' + death['album']
black['full'] = black['band'] + ' - ' + black['album']

In [234]:
crossover = np.intersect1d(death['full'], black['full'])
crossover

array(['1914 - The Blind Leading the Blind',
       'Archgoat - Whore of Bethlehem', 'Behemoth - The Satanist',
       'Black Curse - Endless Wound', 'Convulsing - Grievous',
       'Esoctrilihum - Eternity of Shaog', 'Esoctrilihum - Inhüma',
       'Esoctrilihum - The Telluric Ashes of the Ö Vrth Immemorial Gods',
       'Infernal Coil - Within a World Forgotten', 'Mefitis - Emberdawn',
       'Necrophobic - Darkside',
       'Necros Christos - Triune Impurity Rites',
       'Shade Empire - Omega Arcane',
       'Sulphur Aeon - The Scythe of Cosmic Chaos',
       'Tchornobog - Tchornobog', 'Teitanblood - Death',
       'Teitanblood - The Baneful Choir',
       'The Moaning - Blood From Stone',
       'Thy Darkened Shade - Liber Lvcifer I: Khem Sedjet',
       'Unanimated - Ancient God of Evil', 'Zhrine - Unortheta'],
      dtype=object)

Save the albums with crossover to see what the model says later

In [235]:
crossover_albums = death[death['full'].isin(crossover)]
crossover_albums = crossover_albums.drop(['full'], axis=1)
crossover_albums

Unnamed: 0,band,album
83,Behemoth,The Satanist
204,Necrophobic,Darkside
210,Tchornobog,Tchornobog
228,Unanimated,Ancient God of Evil
234,Sulphur Aeon,The Scythe of Cosmic Chaos
251,The Moaning,Blood From Stone
253,Esoctrilihum,Inhüma
256,Convulsing,Grievous
260,Esoctrilihum,Eternity of Shaog
312,Teitanblood,Death


In [236]:
death = death[~death['full'].isin(crossover)]
death = death.drop(['full'], axis=1)
black = black[~black['full'].isin(crossover)]
black = black.drop(['full'], axis=1)

In [237]:
death

Unnamed: 0,band,album
0,Death,Symbolic
1,Death,Human
2,Death,The Sound of Perseverance
3,Cryptopsy,None So Vile
4,Gorguts,Obscura
...,...,...
511,Infernal War,Redesekration: The Gospel of Hatred and Apothe...
512,Eluveitie,Spirit
513,Incantation,Dirges of Elysium
514,Pungent Stench,For God Your Soul... For Me Your Flesh


In [238]:
black

Unnamed: 0,band,album
0,Burzum,Filosofem
1,Dissection,Storm of the Light's Bane
2,Emperor,In the Nightside Eclipse
3,Emperor,Anthems to the Welkin at Dusk
4,Ulver,Bergtatt: Et eeventyr I 5 capitler
...,...,...
501,møl,Jord
502,Mütiilation,"Remains of a Ruined, Dead, Cursed Soul"
503,Ravencult,Morbid Blood
504,Forteresse,Crépuscule d'octobre


We have about the same number of death metal and black metal examples (500)

## EDA

In [239]:
death.groupby('band').size().sort_values(ascending=False).nlargest(10)

band
Napalm Death        9
Bolt Thrower        8
Insomnium           8
Septicflesh         8
Dark Tranquility    8
Vader               7
Death               7
Immolation          6
Morbid Angel        6
Nile                5
dtype: int64

In [240]:
black.groupby('band').size().sort_values(ascending=False).nlargest(10)

band
Enslaved            9
Blut aus Nord       7
Inquisition         6
Drudkh              6
Paysage d'Hiver     6
Rotting Christ      6
Summoning           6
Deathspell Omega    5
Anaal Nathrakh      5
Lunar Aurora        5
dtype: int64

Group albums by band for easier downloading

In [241]:
death_grouped = death.groupby('band')['album'].apply(list).reset_index(name='albums')
death_grouped

Unnamed: 0,band,albums
0,!T.O.O.H.!,"[Pod vládou bice, Order and Punishment]"
1,7 H.Target,[Fast-Slow Demolition]
2,A Canorous Quintet,[Silence of the World Beyond]
3,Abyssal,[Antikatastaseis]
4,Ad Nauseam,[Nihil quam vacuitas ordinatum est]
...,...,...
257,Vomitory,"[Revelation Nausea, Blood Rapture]"
258,Vuvr,[Pilgrimage]
259,Whispered,[Metsutan: Songs of the Void]
260,Wintersun,[Wintersun]


In [151]:
black_grouped = black.groupby('band')['album'].apply(list).reset_index(name='albums')
black_grouped

Unnamed: 0,band,albums
0,A Forest of Stars,"[Grave Mounds and Grave Mistakes, A Shadowplay..."
1,Abigail,[Intercourse & Lust]
2,Abigor,"[Nachthymnen (From the Twilight Kingdom), Verw..."
3,Absu,"[Tara, Abzu, The Sun of tiphareth]"
4,Abyssic Hate,[Suicidal Emotions]
...,...,...
250,Àrsaidh,[Roots]
251,Батюшка [Batushka],"[Литургия (Litourgiya), Панихида (Panihida)]"
252,Крода [Kroda],"[До небокраю життя..., Schwarzpfad, Поплач мен..."
253,Лютомысл [Lutomysl],[De Profundis]


## Metal Archives scraping

Download album art from Metal Archives using the metallum library: https://github.com/lcharlick/python-metallum/blob/master/metallum.py

In [155]:
import metallum

In [192]:
def get_cover_urls(df):
    urls = []
    not_found = []
    for _, row in df.iterrows():
        band = row['band']
        album = row['album']
        search = metallum.album_search(album, band=band, strict=False)
        if not search:
            not_found.append((band, album))
            continue
        result = search[0].get()
        url = result.cover
        urls.append((band, album, url))
    return urls, not_found

In [193]:
death_urls, death_nf = get_cover_urls(death)

In [195]:
death_nf

[('Dismember', 'Like an Ever Flowing Stream'),
 ('Atheist', 'Piece of Jtime'),
 ('Dark Tranquility', 'The Gallery'),
 ('Dark Tranquility', 'Damage Done'),
 ('Dismember', 'Indecent & Obscene'),
 ('!T.O.O.H.!', 'Pod vládou bice'),
 ('Dark Tranquility', 'Fiction'),
 ('Dark Tranquility', 'Character'),
 ('Extol', 'Udneceived'),
 ('!T.O.O.H.!', 'Order and Punishment'),
 ('Deceased...', 'As the Weird Travel On'),
 ('Anata', "The Condustor's Departure"),
 ('Necophagist', 'Onset of Putrefaction'),
 ('Atrocity', 'Longing for Death'),
 ('Dark Tranquility', 'Projector'),
 ('Deceased...', 'Ghostly White'),
 ('Deceased...', 'Fearless Undead Machines'),
 ('Phrenelith', 'Desolate Landscape'),
 ('Deceased...', 'Surreal Overdose'),
 ('Necophagist', 'Epitaph'),
 ('Dark Tranquility', "The Mind's I"),
 ('Dark Tranquility', 'Haven'),
 ('Séance', 'Fornever Laid To Rest'),
 ('Dark Tranquility', 'Atoma'),
 ('Dark Throne', 'Soulside Journey'),
 ('Beyond Creation', 'Fatal Power of Death'),
 ('Adversarial', 'D.E.

In [194]:
import requests
import shutil

def download_cover(url, folder, img_name):
    headers = {
        'User-Agent': 'My User Agent 1.0',
        'From': 'email@domain.com'
    }
    
    response = requests.get(url, headers=headers, stream=True)
    filename = f'data/{folder}/{img_name}.jpg'
    with open(filename, 'wb') as out:
        shutil.copyfileobj(response.raw, out)
    del response

In [242]:
death.assign(url='')
for row in death_urls:
    band, album, url = row
    death.loc[(death['band'] == band) & (death['album'] == album), 'url'] = url

In [244]:
death.to_csv('death_urls.csv')

The URLs for the above albums were fixed manually in the CSV. For the most part, these were caused by typing/spelling errors, or punctuation that affected the search

In [253]:
death = pd.read_csv('death_urls.csv', index_col=0)
death

Unnamed: 0,band,album,url
0,Death,Symbolic,https://www.metal-archives.com/images/6/1/6/61...
1,Death,Human,https://www.metal-archives.com/images/6/0/6/60...
2,Death,The Sound of Perseverance,https://www.metal-archives.com/images/6/1/8/61...
3,Cryptopsy,None So Vile,https://www.metal-archives.com/images/1/9/9/19...
4,Gorguts,Obscura,https://www.metal-archives.com/images/3/0/0/9/...
...,...,...,...
511,Infernal War,Redesekration: The Gospel of Hatred and Apothe...,https://www.metal-archives.com/images/1/4/9/9/...
512,Eluveitie,Spirit,https://www.metal-archives.com/images/1/0/5/4/...
513,Incantation,Dirges of Elysium,https://www.metal-archives.com/images/4/0/9/5/...
514,Pungent Stench,For God Your Soul... For Me Your Flesh,https://www.metal-archives.com/images/1/0/8/8/...


In [255]:
for i, row in death.iterrows():
    band = row['band']
    url = row['url']
    img = f'{band}_{i}'
    download_cover(url, 'death', img)

In [271]:
urls = []
not_found = []

for i in range(len(black_grouped)):
    row = black_grouped.iloc[i]
    band = row['band']
    albums = row['albums']
    search = metallum.band_search(band)
    if not search:
        not_found.append((band, ''))
        continue
    band = search[0].get()
    for album in albums:
        alb_search = band.albums.search(title=album)
        if not alb_search:
            not_found.append((band, album))
            continue
        urls.append((band, album, alb_search[0].cover))

In [272]:
urls

[(<Band: A Forest of Stars>,
  'Grave Mounds and Grave Mistakes',
  'https://www.metal-archives.com/images/7/2/5/9/725974.jpg'),
 (<Band: A Forest of Stars>,
  'A Shadowplay for Yesterdays',
  'https://www.metal-archives.com/images/3/4/1/7/341704.jpg'),
 (<Band: Abigor>,
  'Nachthymnen (From the Twilight Kingdom)',
  'https://www.metal-archives.com/images/3/2/3/5/3235.jpg'),
 (<Band: Abigor>,
  'Verwüstung / Invoke the Dark Age',
  'https://www.metal-archives.com/images/3/2/3/3/3233.jpg'),
 (<Band: Abigor>,
  'Fractal Possession',
  'https://www.metal-archives.com/images/1/4/7/2/147277.jpg'),
 (<Band: Abyssic Hate>,
  'Suicidal Emotions',
  'https://www.metal-archives.com/images/8/1/2/9/812964.jpg'),
 (<Band: Aeternus>,
  'Beyond the Wandering Moon',
  'https://www.metal-archives.com/images/2/4/0/3/2403.jpg'),
 (<Band: Agalloch>,
  'Pale Folklore',
  'https://www.metal-archives.com/images/1/1/5/0/1150.jpg'),
 (<Band: Agalloch>,
  'Marrow of the Spirit',
  'https://www.metal-archives.co

In [273]:
not_found

[(<Band: Abigail>, 'Intercourse & Lust'),
 (<Band: Absu>, 'Tara'),
 (<Band: Absu>, 'Abzu'),
 (<Band: Absu>, 'The Sun of tiphareth'),
 (<Band: Aeternus>, '…And So the Night Became'),
 (<Band: Altar of Plagues>, 'Teethed Glory and Injury'),
 (<Band: Anaal Nathrakh>, 'Hell Is Empoty, and All the Devils Are Here'),
 (<Band: Aosoth>, 'IV: An Arrow in Heart'),
 (<Band: Aosoth>, 'III: Violence & Variations'),
 (<Band: Armagedda>, 'Ond Spiritism - Djæfvulens Skalder'),
 (<Band: Ascension>, 'Consolamentum'),
 (<Band: Bathory>, 'The Return…'),
 (<Band: Blasphemy>, 'Fallen Angel of Doom…'),
 (<Band: Borknagar>, 'Empricism'),
 ('Bölzer', ''),
 (<Band: Chapel>, "Satan's Rock 'n' Roll"),
 (<Band: Cobalt>, 'Gin'),
 (<Band: Cobalt>, 'Eater of Birds'),
 (<Band: Covenant>, 'Nexus Polaris'),
 (<Band: Cradle of Filth>, 'Dusk…and Her Embrace'),
 (<Band: Craft>, 'Terror Propaganda'),
 (<Band: Cultes des Ghoules>, 'Cultes des Ghoules'),
 (<Band: Dark Forest>, 'Aurora Borealis'),
 (<Band: Dark Fortress>, 'Séa

In [281]:
black.assign(url='')
for row in urls:
    band, album, url = row
    band = band.name # accidentally used the Metallum result for the band here
    black.loc[(black['band'] == band) & (black['album'] == album), 'url'] = url

In [282]:
black

Unnamed: 0,band,album,url
0,Burzum,Filosofem,https://www.metal-archives.com/images/3/8/2/38...
1,Dissection,Storm of the Light's Bane,
2,Emperor,In the Nightside Eclipse,
3,Emperor,Anthems to the Welkin at Dusk,
4,Ulver,Bergtatt: Et eeventyr I 5 capitler,
...,...,...,...
501,møl,Jord,
502,Mütiilation,"Remains of a Ruined, Dead, Cursed Soul",https://www.metal-archives.com/images/2/3/5/5/...
503,Ravencult,Morbid Blood,https://www.metal-archives.com/images/2/9/6/5/...
504,Forteresse,Crépuscule d'octobre,https://www.metal-archives.com/images/3/2/0/8/...


In [284]:
black.to_csv('black_urls.csv')

The missing URLs were found manually and fixed in the CSV. There were far more missing here, possibly because of the wider variety of punctuation and foreign characters, but more likely because there was some issue with the script

In [291]:
black = pd.read_csv('black_urls.csv', index_col=0)
black

Unnamed: 0,band,album,url
0,Burzum,Filosofem,https://www.metal-archives.com/images/3/8/2/38...
1,Dissection,Storm of the Light's Bane,https://www.metal-archives.com/images/7/6/8/76...
2,Emperor,In the Nightside Eclipse,https://www.metal-archives.com/images/9/3/93.jpg
3,Emperor,Anthems to the Welkin at Dusk,https://www.metal-archives.com/images/9/2/92.jpg
4,Ulver,Bergtatt: Et eeventyr I 5 capitler,https://www.metal-archives.com/images/3/6/0/9/...
...,...,...,...
501,møl,Jord,https://www.metal-archives.com/images/6/9/4/1/...
502,Mütiilation,"Remains of a Ruined, Dead, Cursed Soul",https://www.metal-archives.com/images/2/3/5/5/...
503,Ravencult,Morbid Blood,https://www.metal-archives.com/images/2/9/6/5/...
504,Forteresse,Crépuscule d'octobre,https://www.metal-archives.com/images/3/2/0/8/...


In [292]:
black[black['url'].isnull()]

Unnamed: 0,band,album,url
127,Kvelertak,Kvelertak,
257,Farsot,…En Their Medh Riki Fara…,


There are some null values that I could not find with a manual search, so we will drop them

In [293]:
black = black.dropna()

In [295]:
for i, row in black.iterrows():
    band = row['band']
    url = row['url']
    img = f'{band}_{i}'
    download_cover(url, 'black', img)