In [40]:
import lyricsgenius
import glob, os 
import pandas as pd
import time 
from lyricsgenius import Genius 

In [41]:
DATA_PATH = "/Users/shannon/COMM 557/COMM 557 - Final Project/data/"
files = glob.glob(os.path.join(DATA_PATH, "*.csv"))

print("Found files:")
for f in files:
    print("-", os.path.basename(f))

Found files:
- TikTok_songs_2019.csv
- TikTok_songs_2021.csv
- TikTok_songs_2020.csv
- TikTok_songs_2022.csv
- spotify_top_charts_19.csv
- spotify_top_charts_22.csv
- spotify_top_charts_20.csv
- spotify_top_charts_21.csv


In [42]:
# -----------------------
# 1. SETUP
# -----------------------

# Your local data folder
DATA_PATH = "/Users/shannon/COMM 557/COMM 557 - Final Project/data"

# Define output file paths
PARTIAL_PATH = os.path.join(DATA_PATH, "combined_with_lyrics_partial.csv")
FINAL_PATH = os.path.join(DATA_PATH, "final_songs_with_lyrics.csv")

# Genius API key (you’ll need to insert yours here)
GENIUS_API_KEY = "V4olPhuPZPvQh__QUgLeUbPif5jGwvwNJSSVPF3iQeQEgRRTEBb85YTIAcvv0gZn"
genius = Genius(GENIUS_API_KEY, timeout=15, skip_non_songs=True, remove_section_headers=True)

# -----------------------
# 2. LOAD CSVs
# -----------------------

def load_spotify_data(data_path):
    spotify_files = glob.glob(os.path.join(data_path, "spotify_*.csv"))
    dfs = []
    for file in spotify_files:
        df = pd.read_csv(file)
        df["source"] = "spotify"
        dfs.append(df)
    return pd.concat(dfs, ignore_index=True)

def load_tiktok_data(data_path):
    tiktok_files = glob.glob(os.path.join(data_path, "TikTok_*.csv"))
    dfs = []
    for file in tiktok_files:
        df = pd.read_csv(file)
        df["source"] = "tiktok"
        dfs.append(df)
    return pd.concat(dfs, ignore_index=True)

spotify_df = load_spotify_data(DATA_PATH)
tiktok_df = load_tiktok_data(DATA_PATH)

# -----------------------
# 3. STANDARDIZE COLUMNS
# -----------------------

# Make sure both have matching column names for merging later
spotify_df.rename(columns={
    "artist_names": "artist_name"
}, inplace=True)

# Align common columns
common_cols = [
    "track_name", "artist_name", "danceability", "energy", "loudness", "mode",
    "key", "speechiness", "acousticness", "instrumentalness", "liveness",
    "tempo", "time_signature", "duration_ms", "source"
]

spotify_df = spotify_df[common_cols]
tiktok_df = tiktok_df[common_cols]

# Combine both
combined_df = pd.concat([spotify_df, tiktok_df], ignore_index=True)
combined_df.drop_duplicates(subset=["track_name", "artist_name"], inplace=True)

# -----------------------
# 4. FETCH LYRICS
# -----------------------

def get_lyrics(title, artist):
    """Fetch lyrics from Genius API."""
    try:
        song = genius.search_song(title=title, artist=artist)
        if song and song.lyrics:
            return song.lyrics
        else:
            return None
    except Exception as e:
        print(f"Error fetching {title} by {artist}: {e}")
        return None

# Add an empty lyrics column
combined_df["lyrics"] = None

# Track progress and handle saving partials
for i, row in combined_df.iterrows():
    if pd.isna(row["lyrics"]):
        lyrics = get_lyrics(row["track_name"], row["artist_name"])
        combined_df.at[i, "lyrics"] = lyrics

        # Save partial progress every 10 songs
        if i % 10 == 0:
            print(f"Processed {i}/{len(combined_df)} songs… saving partial results.")
            combined_df.to_csv(PARTIAL_PATH, index=False)

        # Avoid rate limits
        time.sleep(2)

# -----------------------
# 5. SAVE FINAL DATA
# -----------------------

combined_df.to_csv(FINAL_PATH, index=False)
print("✅ All done! Saved final file to:", FINAL_PATH)


Searching for "Sunflower - Spider-Man: Into the Spider-Verse" by Post Malone, Swae Lee...
No results found for: 'Sunflower - Spider-Man: Into the Spider-Verse Post Malone, Swae Lee'
Processed 0/3744 songs… saving partial results.
Searching for "Wow." by Post Malone...
Done.
Searching for "thank u, next" by Ariana Grande...
Done.
Searching for "Without Me" by Halsey...
Done.
Searching for "Calma - Remix" by Pedro Capó, Farruko...
Done.
Searching for "Sweet but Psycho" by Ava Max...
Done.
Searching for "Taki Taki (with Selena Gomez, Ozuna & Cardi B)" by DJ Snake, Selena Gomez, Ozuna, Cardi B...
No results found for: 'Taki Taki (with Selena Gomez, Ozuna & Cardi B) DJ Snake, Selena Gomez, Ozuna, Cardi B'
Searching for "Dancing With A Stranger (with Normani)" by Sam Smith, Normani...
Done.
Searching for "High Hopes" by Panic! At The Disco...
Done.
Searching for "MIA (feat. Drake)" by Bad Bunny, Drake...
Done.
Searching for "Happier" by Marshmello, Bastille...
Done.
Processed 10/3744 songs… 