In [None]:
from copy import deepcopy
from collections import Counter
import logging
import os
import re
import json

import musicbrainzngs

In [None]:
pattern = re.compile(
    r".*I used [@#]?Shazam to discover (?P<title>.*) by (?P<artist>.*)\. https.*"
)

In [None]:
def filter_recordings_by_artist(work, artist_id):
    for recording_info in work["recording-relation-list"]:
        recording = musicbrainzngs.get_recording_by_id(
            recording_info["recording"]["id"],
            includes=["artist-credits", "releases", "tags"],
        )

        for artist in recording["recording"]["artist-credit"]:
            try:
                if artist["artist"]["id"] == artist_id:
                    yield recording
                    break
            except TypeError:
                # Needed to avoid some weird parsing errors
                continue

In [None]:
def get_earliest_recording_and_year(recordings):
    min_year = year = float("inf")
    earliest_recording = None

    for recording in recordings:
        for release in recording["recording"]["release-list"]:
            if "date" in release:
                year = int(release["date"][:4])
            if year <= min_year:
                min_year = year
                earliest_recording = recording

    return earliest_recording, min_year

In [None]:
def get_most_likely_genre(recordings):
    tags = Counter()
    for recording in recordings:
        for tag_dict in recording["recording"].get("tag-list", []):
            tags[tag_dict["name"]] += int(tag_dict["count"])

    return tags.most_common()[0][0]

In [None]:
def musicbrainz(string):
    musicbrainzngs.set_useragent("geosong", "2", "pycamp-es2022")

    song_title, artist_name = pattern.search(string).groups()

    related_artists = musicbrainzngs.search_artists(artist=artist_name, type="group")
    chosen_artist = max(related_artists["artist-list"], key=lambda a: int(a["ext:score"]))

    related_works = musicbrainzngs.search_works(song_title)
    work = max(related_works["work-list"], key=lambda w: int(w["ext:score"]))

    # Store in list because I will use them twice
    possible_recordings = list(filter_recordings_by_artist(work, chosen_artist["id"]))

    _, year = get_earliest_recording_and_year(possible_recordings)
    genre = get_most_likely_genre(possible_recordings)

    return {
        "artist": chosen_artist["name"],
        "title": work["title"],
        "year": year,
        "genre": genre,
    }

In [None]:
text = (
    'I used @Shazam to discover Yes Sir, I Can Boogie by Baccara. '
    'https://t.co/JaCq9MNPhM'
)

In [None]:
musicbrainz(text)