In [65]:
import musicbrainzngs
import pandas as pd
import time
musicbrainzngs.set_useragent("MyMusicApp", "0.1", "myemail@example.com")


In [72]:
def fetch_songs_by_year(year, max_songs):
    all_songs = []  # Store collected songs
    batch_size = 100  # Max allowed per request
    offset = 0  # Start position

    while len(all_songs) < max_songs:
        try:
            print(f"Fetching songs {offset + 1} to {offset + batch_size} for year {year}...")
            result = musicbrainzngs.search_releases(
                date=year,  
                status="official",
                primarytype="single",
                limit=batch_size,
                offset=offset
            )
            songs = result.get("release-list", [])

            for song in songs:
                # Extract the year from the release date
                release_date = song.get("date", "")
                release_year = release_date.split("-")[0] if release_date else None

                # Only include songs from the specified year
                if release_year == str(year):
                    title = song.get("title", "Unknown Title")
                    artist_credit = song.get("artist-credit", [])
                    artist = artist_credit[0]["name"] if artist_credit else "Unknown Artist"
                    all_songs.append({"Title": title, "Artist": artist, "Year": release_date})

            offset += batch_size

            # Avoid hitting API rate limits, wait 1 second between calls
            time.sleep(1)

        except musicbrainzngs.WebServiceError as e:
            print(f"Error: {e}")
            break  # Exit the loop if there's an error

    # Ensure exactly max_songs results are returned
    return all_songs[:max_songs]

# Fetch 200 songs from 2017
all_songs = []

for year in range(2010, 2012):
    print(f"Fetching songs for year {year}...")
    songs = fetch_songs_by_year(year, 616)  # Fetch 616 songs per year
    all_songs.extend(songs)  # Add songs to the main list

    # Convert to DataFrame & Save to CSV
df = pd.DataFrame(all_songs)
df.to_csv(f"musicbrainz_songs_{year}.csv", index=False)

# Check how many songs were retrieved
print(f"Successfully saved {len(df)} songs to 'musicbrainz_songs_{year}.csv'")
df

Unnamed: 0,Title,Artist,Year
0,Kick1 Kick3,Timo Maas,2010-10
1,Broken Arrow,Pixie Lott,2010
2,Het Potatis,Testicle Hazard,2010
3,Void of Nebulae,Infestum,2010-02
4,Pittsburgh Penguins Theme Song - Boys Of Winter,Icarus Witch,2010-04-28
...,...,...,...
1227,I Could Fall in Love,Karen Rodriguez,2011-03-10
1228,All by Myself,Pia Toscano,2011-03-10
1229,Mountain Bitch,Black Wizard,2011-02-18
1230,Kiss,Glee Cast,2011-03-08


In [74]:
import musicbrainzngs
import time
import pandas as pd

def fetch_songs_by_year(year, max_songs):
    all_songs = []  # Store collected songs
    batch_size = 100  # Max allowed per request
    offset = 0  # Start position

    while len(all_songs) < max_songs:
        try:
            print(f"Fetching songs {offset + 1} to {offset + batch_size} for year {year}...")
            result = musicbrainzngs.search_releases(
                date=year,  
                status="official",
                primarytype=["album", "single", "ep"],
                limit=batch_size,
                offset=offset
            )

            # Filter out releases with secondarytype "audiobook"
            filtered_songs = [
                song for song in result.get("release-list", [])
                if "release-group" not in song or "secondary-type-list" not in song["release-group"] or "audiobook" not in [stype.lower() for stype in song["release-group"]["secondary-type-list"]]
            ]

            for song in filtered_songs:
                # Extract the year from the release date
                release_date = song.get("date", "")
                release_year = release_date.split("-")[0] if release_date else None

                # Validate the year
                if release_year and release_year.isdigit():
                    release_year = int(release_year)
                    # Only include songs from the specified year and within a reasonable range
                    if release_year == year and 1900 <= release_year <= 2100:  # Adjust range as needed
                        title = song.get("title", "Unknown Title")
                        artist_credit = song.get("artist-credit", [])
                        artist = artist_credit[0]["name"] if artist_credit else "Unknown Artist"

                        # Fetch genre (tags) for the release group
                        release_group_id = song.get("release-group", {}).get("id")
                        genre = "Unknown Genre"
                        if release_group_id:
                            try:
                                release_group_info = musicbrainzngs.get_release_group_by_id(release_group_id, includes=["tags"])
                                tags = release_group_info.get("release-group", {}).get("tag-list", [])
                                genre = ", ".join([tag["name"] for tag in tags]) if tags else "Unknown Genre"
                            except musicbrainzngs.WebServiceError as e:
                                print(f"Error fetching genre for release group {release_group_id}: {e}")

                        all_songs.append({"Title": title, "Artist": artist, "Year": release_year, "Genre": genre})

            offset += batch_size

            # Avoid hitting API rate limits, wait 1 second between calls
            time.sleep(1)

        except musicbrainzngs.WebServiceError as e:
            print(f"Error: {e}")
            break  # Exit the loop if there's an error

    # Ensure exactly max_songs results are returned
    return all_songs[:max_songs]

# Set up MusicBrainz user agent (required)
musicbrainzngs.set_useragent("my-app", "0.1", "my-email@example.com")

# Fetch songs for each year from 2010 to 2023 and accumulate them in one list
all_songs = []  # Initialize the list to accumulate all songs
for year in range(2010, 2011):
    print(f"Fetching songs for year {year}...")
    songs = fetch_songs_by_year(year, 616)  # Fetch 616 songs per year
    all_songs.extend(songs)  # Add songs to the main list

# Convert the accumulated list to a DataFrame
df = pd.DataFrame(all_songs)

# Save all songs to a single CSV file
output_file = "musicbrainz_songs_with_genre_2010_to_2023.csv"
df.to_csv(output_file, index=False)

# Check how many songs were retrieved
print(f"Successfully saved {len(df)} songs to '{output_file}'")
print(df)

KeyboardInterrupt: 