The goal of this notebook is to tie the songs I know into the era that they are from. 

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
import numpy as np

# Load the music data
df = pd.read_csv('all.csv')

print(f"Total songs: {len(df)}")
print(f"Total unique artists: {df['Artist'].nunique()}")
print(f"Columns: {list(df.columns)}")
print("\nFirst few rows:")
df.head()

Total songs: 2223
Total unique artists: 863
Columns: ['#', 'Song', 'Artist', 'Popularity', 'BPM', 'Genres', 'Album', 'Album Date', 'Time', 'Dance', 'Energy', 'Acoustic', 'Instrumental', 'Happy', 'Speech', 'Live', 'Loud (Db)', 'Key', 'Time Signature', 'Added At', 'Spotify Track Id', 'Camelot', 'ISRC']

First few rows:


Unnamed: 0,#,Song,Artist,Popularity,BPM,Genres,Album,Album Date,Time,Dance,...,Happy,Speech,Live,Loud (Db),Key,Time Signature,Added At,Spotify Track Id,Camelot,ISRC
0,1,Gabriel Ratchet,Deca,41,82,jazz rap,The Ocean,2013-09-23,03:11,68,...,76,30,10,-5,G Major,4,2025-08-03,4uQjhQYQyVZBKDtve537qZ,9B,TCABQ1315888
1,2,Lifestylez Ov Da Poor & Dangerous,Big L,45,85,"east coast hip hop, old school hip hop, hardco...",Lifestylez Ov Da Poor & Dangerous,1995-03-28,03:21,68,...,40,30,0,-9,D#/E♭ Minor,4,2025-08-03,5GseQckHgP7uWyaiCd5ZW3,2A,USSM10022975
2,3,Danger Zone,Big L,42,171,"east coast hip hop, old school hip hop, hardco...",Lifestylez Ov Da Poor & Dangerous,1995-03-28,03:36,52,...,34,30,20,-11,G Major,4,2025-08-03,0kU9oWqYnPeqEca3x2auV9,9B,USSM10022974
3,4,Hive (feat. Vince Staples & Casey Veggies),"Earl Sweatshirt,Vince Staples,Casey Veggies",55,145,"experimental hip hop, alternative hip hop,",Doris,2013-08-16,04:37,62,...,78,50,10,-7,C♯/D♭ Major,4,2025-08-03,6JKoTnberZoBgUuxa6l4Kh,3B,USQX91301103
4,5,King Kunta,Kendrick Lamar,73,107,"hip hop, west coast hip hop",To Pimp A Butterfly,2015-03-16,03:54,88,...,49,10,10,-5,F#/G♭ Minor,4,2025-08-03,0N3W5peJUQtI4eyR6GJT5O,11A,USUM71502494


In [3]:
# Create a cleaned up artists that grabs the first artists before any comma
df["Cleaned_Artist"] = df["Artist"].str.split(",").str[0]

In [4]:
# if Bach (in any case) is in the album name, change the artist to Bach, do this permenently
df.loc[df["Album"].str.contains("Bach", case=False), "Cleaned_Artist"] = "Johann Sebastian Bach "

In [6]:
df["Cleaned_Artist"].unique()

array(['Deca', 'Big L', 'Earl Sweatshirt', 'Kendrick Lamar',
       'Little Simz', 'Ms. Lauryn Hill', 'Bach', 'Johannes Brahms',
       'Caroline Shaw', 'Frédéric Chopin', 'Frederick Delius',
       'Ludwig van Beethoven', 'Claude Debussy',
       'Wolfgang Amadeus Mozart', 'Edward Elgar', 'John Adams',
       'Orchestral Academy Of Los Angeles', 'Edvard Grieg',
       'Camille Saint-Saëns', 'Víkingur Ólafsson', 'Johann Strauss I',
       'Robert Schumann', 'William Grant Still', 'Gustav Mahler',
       'Arnold Schoenberg', 'Igor Stravinsky', 'George Frideric Handel',
       'Jules Massenet', 'Henryk Górecki', 'Max Bruch', 'Jean Sibelius',
       'Philip Glass', 'Laurence Equilbey', 'Carducci String Quartet',
       'Hörður Áskelsson', 'Christopher Dennis Coleman', 'Henry Purcell',
       'Eric Whitacre', 'Anton Arensky', 'Arvo Pärt', 'Ola Gjeilo',
       'John Cage', 'Karl Jenkins', 'Gabriel Fauré', 'Evgeny Kissin',
       'Johann Sebastian Bach', 'Arash Safaian', 'Franz Schubert',
  

In [None]:
# split the df into two, one for classical, and one not.
# Include additional genres as classical: medieval, gregorian chant, chamber music
pattern = r'(Classical|Medieval|Gregorian Chant|Chamber Music)'
classical_df = df[df['Genres'].astype(str).str.contains(pattern, case=False, regex=True, na=False)]
non_classical_df = df[~df['Genres'].astype(str).str.contains(pattern, case=False, regex=True, na=False)]

In [17]:
## show all genres in non classical, genres are in a comma seperated list
non_classical_genres = non_classical_df['Genres'].str.cat(sep=', ')
for genre in non_classical_genres.split(', '):
    print(genre)

jazz rap
east coast hip hop
old school hip hop
hardcore hip hop
boom bap
hip hop
east coast hip hop
old school hip hop
hardcore hip hop
boom bap
hip hop
experimental hip hop
alternative hip hop

hip hop
west coast hip hop
neo soul
jazz rap
minimalism
chamber music
minimalism
avant-garde
opera
orchestra
minimalism
avant-garde
opera
choral
choral
choral
choral
choral
requiem
gregorian chant
choral
gregorian chant
choral
free jazz
choral
free jazz
choral
choral
free jazz
choral
free jazz
choral
free jazz
choral
free jazz
choral
choral
minimalism
chamber music
tango

choral
gregorian chant
chamber music
minimalism
chamber music
choral
gregorian chant
medieval
requiem
choral
requiem

choral
choral
choral
free jazz
choral
jazz
cool jazz
bebop
hard bop
tango
choral
gregorian chant
medieval
requiem
choral
gregorian chant
medieval
requiem
requiem
requiem
choral
yacht rock
folk
singer-songwriter
folk rock
folk
singer-songwriter
singer-songwriter
vocal jazz
french jazz
jazz
jazz rap
bossa nova
ja

In [10]:
# Artist Analysis
print("ARTIST ANALYSIS")
print("=" * 50)

# Use primary artist name before any comma (e.g., "Artist1, Artist2" -> "Artist1")
df['PrimaryArtist'] = df['Artist'].astype(str).str.split(',').str[0].str.strip()

# Top 20 most frequent primary artists
print("Top 20 Artists by Song Count:")
artist_counts = df['PrimaryArtist'].value_counts().head(20)
print(artist_counts)

# Display all unique primary artists (first 100)
unique_primary = df['PrimaryArtist'].nunique()
print(f"\nAll Artists in Your Collection (showing first 100 of {unique_primary}):")
all_artists = sorted(df['PrimaryArtist'].unique())
for i, artist in enumerate(all_artists[:100], 1):
    print(f"{i:3d}. {artist}")

if len(all_artists) > 100:
    print(f"... and {len(all_artists) - 100} more artists")

ARTIST ANALYSIS
Top 20 Artists by Song Count:
PrimaryArtist
Johann Sebastian Bach       88
Elliott Smith               77
of Montreal                 60
The Shins                   52
Philip Glass                45
Frédéric Chopin             37
Göran Söllscher             34
Catrin Finch                33
The Strokes                 33
The Beatles                 32
Radiohead                   31
András Schiff               31
Wolfgang Amadeus Mozart     29
Green Day                   28
Julian Rachlin              27
Augsburger Domsingknaben    24
The Beach Boys              23
Franz Ferdinand             22
Emerson String Quartet      21
Maurice Ravel               20
Name: count, dtype: int64

All Artists in Your Collection (showing first 100 of 618):
  1. A Tribe Called Quest
  2. A Winged Victory for the Sullen
  3. ABBA
  4. AZ
  5. Academy of Ancient Music
  6. Academy of St. Martin in the Fields Wind Ensemble
  7. Accademia Bizantina
  8. Adam Holzman
  9. Adrianne Lenker
 10.