In [3]:
pip install spotipy pandas python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [4]:
pip install python-dotenv


Note: you may need to restart the kernel to use updated packages.


In [6]:
import csv
import os
import time
from dotenv import load_dotenv
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy
from datetime import datetime

# .envファイルから環境変数をロード
load_dotenv()

# Spotify APIの認証情報を環境変数から取得
CLIENT_ID = os.getenv('SPOTIFY_CLIENT_ID')
CLIENT_SECRET = os.getenv('SPOTIFY_CLIENT_SECRET')

if not CLIENT_ID or not CLIENT_SECRET:
    raise ValueError("Spotify APIの認証情報が見つかりません。SPOTIFY_CLIENT_IDとSPOTIFY_CLIENT_SECRETを.envに設定してください。")

# Spotify APIクライアントの認証
auth_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
spotify = spotipy.Spotify(auth_manager=auth_manager)

def get_tracks_with_features(artist_name, artist_id, valid_artist_ids):
    """アーティスト名からフィーチャリングされている楽曲を取得（offset最大10まで）"""
    tracks = []
    offset = 0
    limit = 50

    while offset <= 19:
        search_results = spotify.search(q=f'artist:{artist_name}', type='track', limit=limit, offset=offset)
        items = search_results['tracks']['items']
        
        if not items:  # データが空になったら終了
            break
        
        for track in items:
            # 全アーティスト情報を取得
            all_artists = track['artists']
            
            # アーティストが複数いない場合はスキップ
            if len(all_artists) <= 1:
                continue

            # メインアーティストのIDとフィーチャリングアーティストのIDを取得
            all_artist_ids = [artist['id'] for artist in all_artists]

            # すべてのアーティストが有効なリストに含まれているかチェック
            if all(aid in valid_artist_ids for aid in all_artist_ids):
                track_info = {
                    'id': track['id'],
                    'release_year': track['album']['release_date'].split('-')[0],
                    'artists': all_artist_ids,  # 全アーティストIDを保存
                    'track_name': track['name']
                }
                tracks.append(track_info)
        
        offset += 1  # 次のページに進む
        time.sleep(0.01)  # API制限を考慮してスリープ

    return tracks

def main():
    # 現在時刻を取得してフォーマット
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    output_dir = f"./{timestamp}"
    
    # 出力用ディレクトリを作成
    os.makedirs(output_dir, exist_ok=True)

    # CSVファイルからラッパー名とIDを読み込み
    with open('./rappers_with_spotify_ids.csv', 'r', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        rappers = [{'id': row['id'], 'name': row['name']} for row in reader]

    # 有効なアーティストIDのリストを作成
    valid_artist_ids = {rapper['id'] for rapper in rappers}

    all_tracks = []

    # 各ラッパーの楽曲を取得
    for rapper in rappers:
        print(f"Fetching tracks for {rapper['name']}...")
        tracks = get_tracks_with_features(rapper['name'], rapper['id'], valid_artist_ids)
        all_tracks.extend(tracks)

    # 曲名で重複を削除
    unique_tracks = {track['track_name']: track for track in all_tracks}.values()

    # 出力ファイルパスを作成
    output_file = os.path.join(output_dir, 'featured_tracks.csv')

    # 結果をCSVに保存
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['id', 'release_year', 'artists', 'track_name']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for track in unique_tracks:
            # アーティストIDのリストを文字列として保存
            writer.writerow({
                'id': track['id'],
                'release_year': track['release_year'],
                'artists': ', '.join(track['artists']),  # リストを文字列に変換
                'track_name': track['track_name']
            })

    print(f"フィーチャリング楽曲のCSVファイルが '{output_file}' に保存されました。")

if __name__ == "__main__":
    main()

ImportError: cannot import name 'load_dotenv' from 'dotenv' (unknown location)

# offsetを使用しない場合

In [None]:
import csv
import os
import time
from dotenv import load_dotenv
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy
from datetime import datetime

# .envファイルから環境変数をロード
load_dotenv()

# Spotify APIの認証情報を環境変数から取得
CLIENT_ID = os.getenv('SPOTIFY_CLIENT_ID')
CLIENT_SECRET = os.getenv('SPOTIFY_CLIENT_SECRET')

if not CLIENT_ID or not CLIENT_SECRET:
    raise ValueError("Spotify APIの認証情報が見つかりません。SPOTIFY_CLIENT_IDとSPOTIFY_CLIENT_SECRETを.envに設定してください。")

# Spotify APIクライアントの認証
auth_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
spotify = spotipy.Spotify(auth_manager=auth_manager)

def get_tracks_with_features(artist_name, artist_id, valid_artist_ids):
    """アーティスト名からフィーチャリングされている楽曲を取得（クエリ分割方式）"""
    tracks = []
    queries = [
        f'artist:{artist_name}',  # アーティスト名そのまま
        f'artist:{artist_name} feature',  # フィーチャー楽曲を対象
        f'artist:{artist_name} remix',  # リミックスを対象
        f'artist:{artist_name} collaboration'  # コラボレーションを対象
    ]
    limit = 50

    for query in queries:
        search_results = spotify.search(q=query, type='track', limit=limit)
        items = search_results['tracks']['items']

        for track in items:
            # 全アーティスト情報を取得
            all_artists = track['artists']
            
            # アーティストが複数いない場合はスキップ
            if len(all_artists) <= 1:
                continue

            # メインアーティストのIDとフィーチャリングアーティストのIDを取得
            all_artist_ids = [artist['id'] for artist in all_artists]

            # すべてのアーティストが有効なリストに含まれているかチェック
            if all(aid in valid_artist_ids for aid in all_artist_ids):
                track_info = {
                    'id': track['id'],
                    'release_year': track['album']['release_date'].split('-')[0],
                    'artists': all_artist_ids,  # 全アーティストIDを保存
                    'track_name': track['name']
                }
                tracks.append(track_info)
        
        time.sleep(0.1)  # API制限を考慮してスリープ

    return tracks

def main():
    # 現在時刻を取得してフォーマット
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    output_dir = f"./{timestamp}"
    
    # 出力用ディレクトリを作成
    os.makedirs(output_dir, exist_ok=True)

    # CSVファイルからラッパー名とIDを読み込み
    with open('./rappers_with_spotify_ids.csv', 'r', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        rappers = [{'id': row['id'], 'name': row['name']} for row in reader]

    # 有効なアーティストIDのリストを作成
    valid_artist_ids = {rapper['id'] for rapper in rappers}

    all_tracks = []

    # 各ラッパーの楽曲を取得
    for rapper in rappers:
        print(f"Fetching tracks for {rapper['name']}...")
        tracks = get_tracks_with_features(rapper['name'], rapper['id'], valid_artist_ids)
        all_tracks.extend(tracks)

    # 曲名で重複を削除
    unique_tracks = {track['track_name']: track for track in all_tracks}.values()

    # 出力ファイルパスを作成
    output_file = os.path.join(output_dir, 'featured_tracks.csv')

    # 結果をCSVに保存
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['id', 'release_year', 'artists', 'track_name']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for track in unique_tracks:
            # アーティストIDのリストを文字列として保存
            writer.writerow({
                'id': track['id'],
                'release_year': track['release_year'],
                'artists': ', '.join(track['artists']),  # リストを文字列に変換
                'track_name': track['track_name']
            })

    print(f"フィーチャリング楽曲のCSVファイルが '{output_file}' に保存されました。")

if __name__ == "__main__":
    main()


# 少数ラッパーから3階層行く場合

In [5]:
import csv
import os
import time
from dotenv import load_dotenv
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy
from datetime import datetime

# .envファイルから環境変数をロード
load_dotenv()

# Spotify APIの認証情報を環境変数から取得
CLIENT_ID = os.getenv('SPOTIFY_CLIENT_ID')
CLIENT_SECRET = os.getenv('SPOTIFY_CLIENT_SECRET')

if not CLIENT_ID or not CLIENT_SECRET:
    raise ValueError("Spotify APIの認証情報が見つかりません。SPOTIFY_CLIENT_IDとSPOTIFY_CLIENT_SECRETを.envに設定してください。")

# Spotify APIクライアントの認証
auth_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
spotify = spotipy.Spotify(auth_manager=auth_manager)

def get_artist_names_by_ids(artist_ids):
    """Spotify APIを使用して複数のアーティストIDから名前を取得"""
    artist_names = {}
    try:
        # アーティスト情報を一括取得
        artists = spotify.artists(artist_ids)['artists']
        for artist in artists:
            artist_names[artist['id']] = artist['name']
    except Exception as e:
        print(f"Error fetching artist names: {e}")
    return artist_names

def get_tracks_with_features(artist_name, artist_id, valid_artist_ids):
    """アーティスト名からフィーチャリングされている楽曲を取得（クエリ分割方式）"""
    tracks = []
    new_artists = set()
    query = f'artist:{artist_name}'  # アーティスト名そのまま
    limit = 50

    try:
        search_results = spotify.search(q=query, type='track', limit=limit)
        items = search_results['tracks']['items']

        for track in items:
            all_artists = track['artists']
            
            if len(all_artists) <= 1:
                continue

            all_artist_ids = [artist['id'] for artist in all_artists]
            new_artists.update(all_artist_ids)

            if all(aid in valid_artist_ids for aid in all_artist_ids):
                tracks.append({
                    'id': track['id'],
                    'release_year': track['album']['release_date'].split('-')[0],
                    'artists': all_artist_ids,
                    'track_name': track['name']
                })
    except Exception as e:
        print(f"Error fetching tracks for {artist_name}: {e}")
    
    return tracks, new_artists

def save_all_rappers_to_csv(rappers, filename):
    """すべてのラッパー（アーティスト）をCSVに保存"""
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['id', 'name']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        unique_rappers = {rapper['id']: rapper for rapper in rappers}.values()
        writer.writerows(unique_rappers)
    print(f"全てのラッパーの情報が '{filename}' に保存されました。")

def main():
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    output_dir = f"./{timestamp}"
    os.makedirs(output_dir, exist_ok=True)

    with open('./rappers_with_spotify_ids_limited.csv', 'r', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        initial_rappers = [{'id': row['id'], 'name': row['name']} for row in reader]

    rappers_generations = [initial_rappers]
    valid_artist_ids = {rapper['id'] for rapper in initial_rappers}
    all_tracks = []

    for iteration in range(3):  # 3回繰り返し
        print(f"Iteration {iteration + 1}...")
        new_rappers = []

        for rapper in rappers_generations[-1]:
            print(f"Fetching tracks for {rapper['name']}...")
            tracks, new_artists = get_tracks_with_features(rapper['name'], rapper['id'], valid_artist_ids)
            all_tracks.extend(tracks)
            new_rappers.extend([{'id': aid} for aid in new_artists if aid not in valid_artist_ids])

        if not new_rappers:
            break

        # 新しいアーティストの名前を一括取得
        new_artist_ids = [rapper['id'] for rapper in new_rappers]
        artist_names = get_artist_names_by_ids(new_artist_ids)
        for rapper in new_rappers:
            rapper['name'] = artist_names.get(rapper['id'], f"Artist {rapper['id']}")

        unique_new_rappers = {rapper['id']: rapper for rapper in new_rappers}.values()
        rappers_generations.append(list(unique_new_rappers))
        valid_artist_ids.update(new_artist_ids)

    unique_tracks = {track['track_name']: track for track in all_tracks}.values()

    output_file = os.path.join(output_dir, 'featured_tracks.csv')
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['id', 'release_year', 'artists', 'track_name']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(unique_tracks)

    all_rappers = [rapper for generation in rappers_generations for rapper in generation]
    save_all_rappers_to_csv(all_rappers, os.path.join(output_dir, 'all_rappers.csv'))

if __name__ == "__main__":
    main()


Iteration 1...
Fetching tracks for Kanye West...


KeyboardInterrupt: 

In [None]:
import csv
import os
import time
from dotenv import load_dotenv
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy
from datetime import datetime

# .envファイルから環境変数をロード
load_dotenv()

# Spotify APIの認証情報を環境変数から取得
CLIENT_ID = os.getenv('SPOTIFY_CLIENT_ID')
CLIENT_SECRET = os.getenv('SPOTIFY_CLIENT_SECRET')

if not CLIENT_ID or not CLIENT_SECRET:
    raise ValueError("Spotify APIの認証情報が見つかりません。SPOTIFY_CLIENT_IDとSPOTIFY_CLIENT_SECRETを.envに設定してください。")

# Spotify APIクライアントの認証
auth_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
spotify = spotipy.Spotify(auth_manager=auth_manager)

def get_artist_names_by_ids(artist_ids):
    """Spotify APIを使用して複数のアーティストIDから名前を取得"""
    artist_names = {}
    try:
        # アーティスト情報を一括取得
        artists = spotify.artists(artist_ids)['artists']
        for artist in artists:
            artist_names[artist['id']] = artist['name']
    except Exception as e:
        print(f"Error fetching artist names: {e}")
    return artist_names

def get_tracks_with_features(artist_name, artist_id, valid_artist_ids):
    """アーティスト名からフィーチャリングされている楽曲を取得（クエリ分割方式）"""
    tracks = []
    new_artists = set()
    query = f'artist:{artist_name}'  # アーティスト名そのまま
    limit = 50

    try:
        search_results = spotify.search(q=query, type='track', limit=limit)
        items = search_results['tracks']['items']

        for track in items:
            all_artists = track['artists']
            
            if len(all_artists) <= 1:
                continue

            all_artist_ids = [artist['id'] for artist in all_artists]
            new_artists.update(all_artist_ids)

            if all(aid in valid_artist_ids for aid in all_artist_ids):
                tracks.append({
                    'id': track['id'],
                    'release_year': track['album']['release_date'].split('-')[0],
                    'artists': all_artist_ids,
                    'track_name': track['name']
                })
    except Exception as e:
        print(f"Error fetching tracks for {artist_name}: {e}")
    
    return tracks, list(new_artists)[:20]  # 次世代アーティストを最大20人に制限

def save_all_rappers_to_csv(rappers, filename):
    """すべてのラッパー（アーティスト）をCSVに保存"""
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['id', 'name']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        unique_rappers = {rapper['id']: rapper for rapper in rappers}.values()
        writer.writerows(unique_rappers)
    print(f"全てのラッパーの情報が '{filename}' に保存されました。")

def main():
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    output_dir = f"./{timestamp}"
    os.makedirs(output_dir, exist_ok=True)

    with open('./rappers_with_spotify_ids_limited.csv', 'r', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        initial_rappers = [{'id': row['id'], 'name': row['name']} for row in reader]

    rappers_generations = [initial_rappers]
    valid_artist_ids = {rapper['id'] for rapper in initial_rappers}
    all_tracks = []

    for iteration in range(3):  # 3回繰り返し
        print(f"Iteration {iteration + 1}...")
        new_rappers = []

        for rapper in rappers_generations[-1]:
            print(f"Fetching tracks for {rapper['name']}...")
            tracks, new_artists = get_tracks_with_features(rapper['name'], rapper['id'], valid_artist_ids)
            all_tracks.extend(tracks)
            new_rappers.extend([{'id': aid} for aid in new_artists if aid not in valid_artist_ids])

        if not new_rappers:
            break

        # 新しいアーティストの名前を一括取得
        new_artist_ids = [rapper['id'] for rapper in new_rappers]
        artist_names = get_artist_names_by_ids(new_artist_ids)
        for rapper in new_rappers:
            rapper['name'] = artist_names.get(rapper['id'], f"Artist {rapper['id']}")

        unique_new_rappers = {rapper['id']: rapper for rapper in new_rappers}.values()
        rappers_generations.append(list(unique_new_rappers))
        valid_artist_ids.update(new_artist_ids)

    unique_tracks = {track['track_name']: track for track in all_tracks}.values()

    output_file = os.path.join(output_dir, 'featured_tracks.csv')
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['id', 'release_year', 'artists', 'track_name']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(unique_tracks)

    all_rappers = [rapper for generation in rappers_generations for rapper in generation]
    save_all_rappers_to_csv(all_rappers, os.path.join(output_dir, 'all_rappers.csv'))

if __name__ == "__main__":
    main()


In [None]:
import csv
import os
import time
from dotenv import load_dotenv
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy
from datetime import datetime

# .envファイルから環境変数をロード
load_dotenv()

# Spotify APIの認証情報を環境変数から取得
CLIENT_ID = os.getenv('SPOTIFY_CLIENT_ID')
CLIENT_SECRET = os.getenv('SPOTIFY_CLIENT_SECRET')

if not CLIENT_ID or not CLIENT_SECRET:
    raise ValueError("Spotify APIの認証情報が見つかりません。SPOTIFY_CLIENT_IDとSPOTIFY_CLIENT_SECRETを.envに設定してください。")

# Spotify APIクライアントの認証
auth_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
spotify = spotipy.Spotify(auth_manager=auth_manager)

def get_artist_names_by_ids(artist_ids):
    """Spotify APIを使用して複数のアーティストIDから名前を取得"""
    artist_names = {}
    try:
        # アーティストIDを50件ずつ分割してリクエスト
        for i in range(0, len(artist_ids), 50):
            chunk = artist_ids[i:i+50]  # 最大50件のチャンク
            artists = spotify.artists(chunk)['artists']
            for artist in artists:
                artist_names[artist['id']] = artist['name']
    except Exception as e:
        print(f"Error fetching artist names: {e}")
    return artist_names

def get_tracks_with_features(artist_name, artist_id, valid_artist_ids):
    """アーティスト名からフィーチャリングされている楽曲を取得（クエリ分割方式）"""
    tracks = []
    new_artists = set()
    query = f'artist:{artist_name}'  # アーティスト名そのまま
    limit = 50

    try:
        search_results = spotify.search(q=query, type='track', limit=limit)
        items = search_results['tracks']['items']

        for track in items:
            all_artists = track['artists']
            
            if len(all_artists) <= 1:
                continue

            all_artist_ids = [artist['id'] for artist in all_artists]
            new_artists.update(all_artist_ids)

            if all(aid in valid_artist_ids for aid in all_artist_ids):
                tracks.append({
                    'id': track['id'],
                    'release_year': track['album']['release_date'].split('-')[0],
                    'artists': all_artist_ids,
                    'track_name': track['name']
                })
    except Exception as e:
        print(f"Error fetching tracks for {artist_name}: {e}")
    
    return tracks, list(new_artists)[:20]  # 次世代アーティストを最大20人に制限

def save_all_rappers_to_csv(rappers, filename):
    """すべてのラッパー（アーティスト）をCSVに保存"""
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['id', 'name']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        unique_rappers = {rapper['id']: rapper for rapper in rappers}.values()
        writer.writerows(unique_rappers)
    print(f"全てのラッパーの情報が '{filename}' に保存されました。")

def main():
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    output_dir = f"./{timestamp}"
    os.makedirs(output_dir, exist_ok=True)

    with open('./rappers_with_spotify_ids_limited.csv', 'r', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        initial_rappers = [{'id': row['id'], 'name': row['name']} for row in reader]

    rappers_generations = [initial_rappers]
    valid_artist_ids = {rapper['id'] for rapper in initial_rappers}
    all_tracks = []

    for iteration in range(3):  # 3回繰り返し
        print(f"Iteration {iteration + 1}...")
        new_rappers = []
        for rapper in rappers_generations[-1]:
            print(f"Fetching tracks for {rapper['name']}...")
            tracks, new_artists = get_tracks_with_features(rapper['name'], rapper['id'], valid_artist_ids)
            all_tracks.extend(tracks)

            # 新しいアーティストを追加
            for artist_id in new_artists:
                if artist_id not in valid_artist_ids:
                    new_rappers.append({'id': artist_id})  # 名前は後で取得

        if not new_rappers:
            break

        # 新しいアーティストの名前を一括取得
        new_artist_ids = [rapper['id'] for rapper in new_rappers]
        if new_artist_ids:  # 新しいIDが存在する場合のみ取得
            artist_names = get_artist_names_by_ids(new_artist_ids)
            for rapper in new_rappers:
                rapper['name'] = artist_names.get(rapper['id'], f"Artist {rapper['id']}")

        unique_new_rappers = {rapper['id']: rapper for rapper in new_rappers}.values()
        rappers_generations.append(list(unique_new_rappers))
        valid_artist_ids.update(new_artist_ids)

    unique_tracks = {track['track_name']: track for track in all_tracks}.values()

    output_file = os.path.join(output_dir, 'featured_tracks.csv')
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['id', 'release_year', 'artists', 'track_name']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(unique_tracks)

    all_rappers = [rapper for generation in rappers_generations for rapper in generation]
    save_all_rappers_to_csv(all_rappers, os.path.join(output_dir, 'all_rappers.csv'))

if __name__ == "__main__":
    main()


# データを刷新

In [2]:
import csv
import os
from dotenv import load_dotenv
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy
from datetime import datetime

# .envファイルから環境変数をロード
load_dotenv()

# Spotify APIの認証情報を環境変数から取得
CLIENT_ID = os.getenv('SPOTIFY_CLIENT_ID')
CLIENT_SECRET = os.getenv('SPOTIFY_CLIENT_SECRET')

if not CLIENT_ID or not CLIENT_SECRET:
    raise ValueError("Spotify APIの認証情報が見つかりません。SPOTIFY_CLIENT_IDとSPOTIFY_CLIENT_SECRETを.envに設定してください。")

# Spotify APIクライアントの認証
auth_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
spotify = spotipy.Spotify(auth_manager=auth_manager)


def get_artist_details_by_ids(artist_ids):
    """Spotify APIを使用して複数のアーティストIDから詳細情報を取得"""
    print(f"Fetching details for {len(artist_ids)} artists...")
    artist_details = []
    try:
        for i in range(0, len(artist_ids), 50):
            chunk = artist_ids[i:i+50]
            print(f"Fetching chunk: {chunk}")
            artists = spotify.artists(chunk)['artists']
            artist_details.extend(artists)
    except Exception as e:
        print(f"Error fetching artist details: {e}")
    print(f"Fetched {len(artist_details)} artist details.")
    return artist_details


def get_tracks_by_artist_id(artist_id):
    """Spotify APIを使用してアーティストIDから楽曲を取得"""
    print(f"Fetching tracks for artist ID: {artist_id}")
    tracks = []
    try:
        results = spotify.artist_top_tracks(artist_id)
        for track in results['tracks']:
            artist_ids = [artist['id'] for artist in track['artists']]
            tracks.append({
                'track_id': track['id'],
                'track_name': track['name'],
                'album_name': track['album']['name'],
                'release_date': track['album']['release_date'],
                'popularity': track['popularity'],
                'artist_ids': artist_ids
            })
        print(f"Fetched {len(tracks)} tracks for artist ID: {artist_id}")
    except Exception as e:
        print(f"Error fetching tracks for artist {artist_id}: {e}")
    return tracks


def get_artist_names_by_ids(artist_ids):
    """Spotify APIを使用して複数のアーティストIDから名前を取得"""
    print(f"Fetching names for {len(artist_ids)} artists...")
    artist_names = {}
    try:
        for i in range(0, len(artist_ids), 50):
            chunk = artist_ids[i:i+50]
            print(f"Fetching chunk: {chunk}")
            artists = spotify.artists(chunk)['artists']
            for artist in artists:
                artist_names[artist['id']] = artist['name']
    except Exception as e:
        print(f"Error fetching artist names: {e}")
    print(f"Fetched {len(artist_names)} artist names.")
    return artist_names


def get_tracks_with_features(artist_name, artist_id, valid_artist_ids):
    """アーティスト名からフィーチャリングされている楽曲を取得"""
    print(f"Fetching tracks with features for artist: {artist_name}")
    tracks = []
    new_artists = set()
    query = f'artist:{artist_name}'
    limit = 50

    try:
        search_results = spotify.search(q=query, type='track', limit=limit)
        items = search_results['tracks']['items']

        for track in items:
            all_artists = track['artists']
            if len(all_artists) <= 1:
                continue

            all_artist_ids = [artist['id'] for artist in all_artists]
            new_artists.update(all_artist_ids)

            if all(aid in valid_artist_ids for aid in all_artist_ids):
                tracks.append({
                    'id': track['id'],
                    'release_year': track['album']['release_date'].split('-')[0],
                    'artists': all_artist_ids,
                    'track_name': track['name']
                })
        print(f"Fetched {len(tracks)} tracks with features for artist: {artist_name}")
    except Exception as e:
        print(f"Error fetching tracks for {artist_name}: {e}")

    return tracks, list(new_artists)[:20]


def save_tracks_to_csv(tracks, filename):
    """楽曲データをCSVに保存"""
    print(f"Saving {len(tracks)} tracks to CSV: {filename}")
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['track_id', 'track_name', 'album_name', 'release_date', 'popularity', 'artist_ids']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(tracks)
    print(f"Saved tracks to '{filename}'.")


def save_all_rappers_to_csv(rappers, filename):
    """すべてのラッパー（アーティスト）をCSVに保存"""
    print(f"Saving {len(rappers)} rappers to CSV: {filename}")
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['id', 'name', 'genres', 'popularity', 'followers']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(rappers)
    print(f"Saved all rappers to '{filename}'.")


def main():
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    output_dir = f"./{timestamp}"
    os.makedirs(output_dir, exist_ok=True)

    print("Loading initial rapper data...")
    with open('./rappers_with_spotify_ids_limited.csv', 'r', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        initial_rappers = [{'id': row['id'], 'name': row['name']} for row in reader]
    print(f"Loaded {len(initial_rappers)} initial rappers.")

    rappers_generations = [initial_rappers]
    valid_artist_ids = {rapper['id'] for rapper in initial_rappers}
    all_tracks = []
    artist_tracks = []

    for iteration in range(2):
        print(f"Iteration {iteration + 1}...")
        new_rappers = []
        for rapper in rappers_generations[-1]:
            print(f"Fetching tracks for {rapper['name']}...")
            tracks, new_artists = get_tracks_with_features(rapper['name'], rapper['id'], valid_artist_ids)
            all_tracks.extend(tracks)

            for artist_id in new_artists:
                if artist_id not in valid_artist_ids:
                    new_rappers.append({'id': artist_id})

        if not new_rappers:
            print("No new rappers found. Stopping iterations.")
            break

        new_artist_ids = [rapper['id'] for rapper in new_rappers]
        if new_artist_ids:
            print(f"Fetching details for {len(new_artist_ids)} new artists...")
            artist_details = get_artist_details_by_ids(new_artist_ids)
            for artist in artist_details:
                new_rappers.append({
                    'id': artist['id'],
                    'name': artist['name'],
                    'genres': ', '.join(artist['genres']),
                    'popularity': artist['popularity'],
                    'followers': artist['followers']['total']
                })

        rappers_generations.append(new_rappers)
        valid_artist_ids.update(new_artist_ids)

        print(f"Fetching tracks for {len(new_artist_ids)} new artists...")
        for artist in new_rappers:
            tracks = get_tracks_by_artist_id(artist['id'])
            artist_tracks.extend(tracks)

    save_tracks_to_csv(artist_tracks, os.path.join(output_dir, 'tracks.csv'))
    save_all_rappers_to_csv([rapper for generation in rappers_generations for rapper in generation], 
                            os.path.join(output_dir, 'all_rappers.csv'))


if __name__ == "__main__":
    main()


Loading initial rapper data...
Loaded 1 initial rappers.
Iteration 1...
Fetching tracks for Kanye West...
Fetching tracks with features for artist: Kanye West
Fetched 0 tracks with features for artist: Kanye West
Fetching details for 20 new artists...
Fetching details for 20 artists...
Fetching chunk: ['7dGJo4pcD2V6oG8kP0tJRR', '2HPaUgqeutzr3jx5a9WyDV', '6GEykX11lQqp92UVOQQCC7', '3wyVrVrFCkukjdVIdirGVY', '21E3waRsmPlU7jZsS13rcj', '3TVXtAsR1Inumwj472S9r4', '63wjoROpeh5f11Qm93UiJ1', '01QTIT5P1pFP3QnnFSdsJf', '3aQeKQSyrW4qWr35idm0cy', '6vbY3hOaCAhC7VjucswgdS', '15UsOTVnJzReFVN1VCnxy4', '0hCNtLu0JehylgoiP8L4Gh', '1sBkRIssrMs1AbVkOJbc7a', '5pKCCKE2ajJHZ9KAiaK11H', '4xPQFgDA5M2xa0ZGo5iIsv', '7u6LfVyYpEzMpHLL7jTyvU', '1cNDP5yjU5vjeR8qMf4grg', '1lE6SEy8f84Zhjvp7r8yTD', '4STHEaNw4mPZ2tzheohgXB', '7LnaAXbDVIL75IVPnndf7w']


KeyboardInterrupt: 

In [3]:
import csv
import os
from dotenv import load_dotenv
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy
from datetime import datetime
import pandas as pd
from concurrent.futures import ThreadPoolExecutor

# .envファイルから環境変数をロード
load_dotenv()

# Spotify APIの認証情報を環境変数から取得
CLIENT_ID = os.getenv('SPOTIFY_CLIENT_ID')
CLIENT_SECRET = os.getenv('SPOTIFY_CLIENT_SECRET')
print(CLIENT_ID, CLIENT_SECRET)

if not CLIENT_ID or not CLIENT_SECRET:
    raise ValueError("Spotify APIの認証情報が見つかりません。SPOTIFY_CLIENT_IDとSPOTIFY_CLIENT_SECRETを.envに設定してください。")

# Spotify APIクライアントの認証
auth_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
spotify = spotipy.Spotify(auth_manager=auth_manager)


def get_artist_details_by_ids(artist_ids):
    """Spotify APIを使用して複数のアーティストIDから詳細情報を取得（並列処理対応）"""
    artist_details = []
    try:
        with ThreadPoolExecutor(max_workers=10) as executor:
            futures = []
            for i in range(0, len(artist_ids), 50):
                chunk = artist_ids[i:i + 50]
                futures.append(executor.submit(spotify.artists, chunk))
            for future in futures:
                artists = future.result()['artists']
                artist_details.extend(artists)
    except Exception as e:
        print(f"Error fetching artist details: {e}")
    return artist_details


def get_tracks_by_artist_id(artist_id):
    """Spotify APIを使用してアーティストIDから楽曲を取得"""
    tracks = []
    try:
        results = spotify.artist_top_tracks(artist_id)
        for track in results['tracks']:
            artist_ids = [artist['id'] for artist in track['artists']]
            tracks.append({
                'track_id': track['id'],
                'track_name': track['name'],
                'album_name': track['album']['name'],
                'release_date': track['album']['release_date'],
                'popularity': track['popularity'],
                'artist_ids': artist_ids
            })
    except Exception as e:
        print(f"Error fetching tracks for artist {artist_id}: {e}")
    return tracks


def get_tracks_with_features(artist_name, artist_id, valid_artist_ids):
    """アーティスト名からフィーチャリングされている楽曲を取得"""
    tracks = []
    new_artists = set()
    query = f'artist:{artist_name}'
    limit = 50

    try:
        search_results = spotify.search(q=query, type='track', limit=limit, market='US')
        items = search_results['tracks']['items']

        for track in items:
            all_artists = track['artists']
            if len(all_artists) <= 1:
                continue

            all_artist_ids = [artist['id'] for artist in all_artists]
            new_artists.update(all_artist_ids)

            if all(aid in valid_artist_ids for aid in all_artist_ids):
                tracks.append({
                    'id': track['id'],
                    'release_year': track['album']['release_date'].split('-')[0],
                    'artists': all_artist_ids,
                    'track_name': track['name']
                })
    except Exception as e:
        print(f"Error fetching tracks for {artist_name}: {e}")

    return tracks, list(new_artists)[:2]


def save_tracks_to_csv(tracks, filename):
    """楽曲データをCSVに保存"""
    df = pd.DataFrame(tracks)
    df.to_csv(filename, index=False)
    print(f"楽曲データが '{filename}' に保存されました。")


def save_all_rappers_to_csv(rappers, filename):
    """すべてのラッパーをCSVに保存"""
    df = pd.DataFrame(rappers)
    df.to_csv(filename, index=False)
    print(f"全てのラッパーの情報が '{filename}' に保存されました。")


def update_is_rapper_column(csv_path):
    """genreに'rap'が含まれている場合、is_rapperを1に設定"""
    df = pd.read_csv(csv_path)

    if 'is_rapper' not in df.columns:
        df['is_rapper'] = 0

    df['is_rapper'] = df['genres'].str.contains('rap', case=False, na=False).astype(int)
    df.to_csv(csv_path, index=False)
    print(f"'is_rapper'カラムが更新されました: {csv_path}")


def fetch_all_tracks_parallel(artist_ids):
    """複数のアーティストIDのトラックを並列で取得"""
    all_tracks = []
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = {executor.submit(get_tracks_by_artist_id, artist_id): artist_id for artist_id in artist_ids}
        for future in futures:
            try:
                tracks = future.result()
                all_tracks.extend(tracks)
            except Exception as e:
                print(f"Error fetching tracks for {futures[future]}: {e}")
    return all_tracks


def main():
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    output_dir = f"./{timestamp}"
    os.makedirs(output_dir, exist_ok=True)

    with open('./rappers_with_spotify_ids_limited.csv', 'r', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        initial_rappers = [{'id': row['id'], 'name': row['name']} for row in reader]

    rappers_generations = [initial_rappers]
    valid_artist_ids = {rapper['id'] for rapper in initial_rappers}
    all_tracks = []
    artist_tracks = []

    for iteration in range(2):
        print(f"Iteration {iteration + 1}...")
        new_rappers = []
        for rapper in rappers_generations[-1]:
            print(f"Fetching tracks for {rapper['name']}...")
            tracks, new_artists = get_tracks_with_features(rapper['name'], rapper['id'], valid_artist_ids)
            all_tracks.extend(tracks)

            for artist_id in new_artists:
                if artist_id not in valid_artist_ids:
                    new_rappers.append({'id': artist_id})

        if not new_rappers:
            break

        new_artist_ids = [rapper['id'] for rapper in new_rappers]
        if new_artist_ids:
            artist_details = get_artist_details_by_ids(new_artist_ids)
            for artist in artist_details:
                new_rappers.append({
                    'id': artist['id'],
                    'name': artist['name'],
                    'genres': ', '.join(artist['genres']),
                    'popularity': artist['popularity'],
                    'followers': artist['followers']['total']
                })

        unique_new_rappers = {rapper['id']: rapper for rapper in new_rappers}.values()
        rappers_generations.append(list(unique_new_rappers))
        valid_artist_ids.update(new_artist_ids)

        artist_tracks.extend(fetch_all_tracks_parallel(new_artist_ids))

    save_tracks_to_csv(artist_tracks, os.path.join(output_dir, 'tracks.csv'))

    unique_tracks = {track['track_name']: track for track in all_tracks}.values()
    output_file = os.path.join(output_dir, 'featured_tracks.csv')
    pd.DataFrame(unique_tracks).to_csv(output_file, index=False)

    all_rappers = [rapper for generation in rappers_generations for rapper in generation]
    save_all_rappers_to_csv(all_rappers, os.path.join(output_dir, 'all_rappers.csv'))

    rappers_csv_path = os.path.join(output_dir, 'all_rappers.csv')
    update_is_rapper_column(rappers_csv_path)


if __name__ == "__main__":
    main()


Iteration 1...
Fetching tracks for Kanye West...


In [13]:
import pandas as pd

def update_is_rapper_column(csv_path):
    """CSVに'is_rapper'カラムを追加し、genresに'rap'が含まれる場合にフラグを1に設定"""
    # CSVをDataFrameとして読み込む
    df = pd.read_csv(csv_path)

    # 'is_rapper' カラムが存在しない場合は追加
    if 'is_rapper' not in df.columns:
        df['is_rapper'] = 0

    # genres列に'rap'が含まれる場合、フラグを1に設定
    df['is_rapper'] = df['genres'].str.contains('rap', case=False, na=False).astype(int)

    # 更新されたDataFrameをCSVに書き戻す
    df.to_csv(csv_path, index=False)
    print(f"'is_rapper'カラムが更新されました: {csv_path}")

# 実行例
csv_path = "20241209_163800/all_rappers.csv"  # 対象のCSVファイルパス
update_is_rapper_column(csv_path)


'is_rapper'カラムが更新されました: 20241209_163800/all_rappers.csv


In [1]:
import os
import requests
from dotenv import load_dotenv

# 環境変数を読み込む
load_dotenv()

CLIENT_ID = os.getenv("SPOTIFY_CLIENT_ID")
CLIENT_SECRET = os.getenv("SPOTIFY_CLIENT_SECRET")
print("clientid",CLIENT_ID)
print("clientsecret",CLIENT_SECRET)

def get_spotify_token():
    url = "https://accounts.spotify.com/api/token"
    headers = {
        "Content-Type": "application/x-www-form-urlencoded"
    }
    data = {
        "grant_type": "client_credentials",
        "client_id": CLIENT_ID,
        "client_secret": CLIENT_SECRET
    }
    response = requests.post(url, headers=headers, data=data)
    response.raise_for_status()
    return response.json()["access_token"]

def fetch_spotify_data():
    token = get_spotify_token()
    url = "https://api.spotify.com/v1/artists/5K4W6rqBFWDnAN6FQUkS6x/top-tracks"
    headers = {
        "Authorization": f"Bearer {token}"
    }
    params = {
        "market": "US"
    }
    response = requests.get(url, headers=headers, params=params)

    # レスポンスヘッダーを表示
    print("Rate Limit Headers:")
    print(f"X-RateLimit-Limit: {response.headers.get('X-RateLimit-Limit')}")
    print(f"X-RateLimit-Remaining: {response.headers.get('X-RateLimit-Remaining')}")
    print(f"X-RateLimit-Reset: {response.headers.get('X-RateLimit-Reset')}")

    if response.status_code == 429:
        print(f"Rate limit reached. Retry-After: {response.headers.get('Retry-After')} seconds")
    else:
        print("Request succeeded:", response.json())

if __name__ == "__main__":
    fetch_spotify_data()


clientid 9f227df8c7de4963a8f571caddd8dcfa
clientsecret aca9f59402df471abcd7c4d5e6673a40
Rate Limit Headers:
X-RateLimit-Limit: None
X-RateLimit-Remaining: None
X-RateLimit-Reset: None
Request succeeded: {'tracks': [{'album': {'album_type': 'album', 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/5K4W6rqBFWDnAN6FQUkS6x'}, 'href': 'https://api.spotify.com/v1/artists/5K4W6rqBFWDnAN6FQUkS6x', 'id': '5K4W6rqBFWDnAN6FQUkS6x', 'name': 'Kanye West', 'type': 'artist', 'uri': 'spotify:artist:5K4W6rqBFWDnAN6FQUkS6x'}], 'external_urls': {'spotify': 'https://open.spotify.com/album/3WFTGIO6E3Xh4paEOBY9OU'}, 'href': 'https://api.spotify.com/v1/albums/3WFTGIO6E3Xh4paEOBY9OU', 'id': '3WFTGIO6E3Xh4paEOBY9OU', 'images': [{'url': 'https://i.scdn.co/image/ab67616d0000b273346d77e155d854735410ed18', 'height': 640, 'width': 640}, {'url': 'https://i.scdn.co/image/ab67616d00001e02346d77e155d854735410ed18', 'height': 300, 'width': 300}, {'url': 'https://i.scdn.co/image/ab67616d00004851

In [None]:
import os
import pandas as pd
from datetime import datetime
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from dotenv import load_dotenv

# .envファイルから環境変数を読み込む
load_dotenv()

# Spotify APIの認証情報を環境変数から取得
client_id = os.getenv("SPOTIFY_CLIENT_ID")
client_secret = os.getenv("SPOTIFY_CLIENT_SECRET")
print(f"SPOTIFY_CLIENT_ID: {client_id}")
print(f"SPOTIFY_CLIENT_SECRET: {client_secret}")

# Spotify APIクライアントの設定
if not client_id or not client_secret:
    raise ValueError("SPOTIFY_CLIENT_IDまたはSPOTIFY_CLIENT_SECRETが設定されていません。")

sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id, client_secret))

# 入力ファイルと出力ファイルの設定
input_file = "rappers_with_spotify_ids.csv"
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f"./{current_time}"
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, "tracks_rapper.csv")

# アーティストごとの楽曲情報を取得してCSVに保存
def fetch_artist_tracks(artist_id):
    tracks = []
    # アーティストのアルバムを取得
    albums = sp.artist_albums(artist_id, album_type='album', limit=50)
    album_ids = [album['id'] for album in albums['items']]
    
    # 各アルバムのトラックを取得
    for album_id in album_ids:
        album_tracks = sp.album_tracks(album_id)
        for track in album_tracks['items']:
            tracks.append({
                "track_id": track["id"],
                "track_name": track["name"],
                "album_id": album_id,
                "album_name": track["album"]["name"] if "album" in track else "Unknown",
                "artist_id": artist_id,
                "artist_name": ", ".join([artist["name"] for artist in track["artists"]]),
                "duration_ms": track["duration_ms"],
                "preview_url": track["preview_url"],
            })
    
    return tracks

# アーティストIDと名前を読み込む
rappers_df = pd.read_csv(input_file)
all_tracks = []

for _, row in rappers_df.iterrows():
    artist_id = row["id"]
    artist_name = row["name"]
    print(f"Fetching tracks for {artist_name} (ID: {artist_id})...")
    tracks = fetch_artist_tracks(artist_id)
    all_tracks.extend(tracks)

# 結果をデータフレームに変換してCSVに保存
tracks_df = pd.DataFrame(all_tracks)
tracks_df.to_csv(output_file, index=False, encoding="utf-8")
print(f"Tracks saved to {output_file}")


In [4]:
import os
import pandas as pd
from datetime import datetime
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from dotenv import load_dotenv

# .envファイルから環境変数を読み込む
load_dotenv()

# Spotify APIの認証情報を環境変数から取得
client_id = os.getenv("SPOTIFY_CLIENT_ID")
client_secret = os.getenv("SPOTIFY_CLIENT_SECRET")
print(f"SPOTIFY_CLIENT_ID: {client_id}")
print(f"SPOTIFY_CLIENT_SECRET: {client_secret}")

# Spotify APIクライアントの設定
if not client_id or not client_secret:
    raise ValueError("SPOTIFY_CLIENT_IDまたはSPOTIFY_CLIENT_SECRETが設定されていません。")

sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id, client_secret))

# 入力ファイルと出力ファイルの設定
input_file = "rappers_with_spotify_ids.csv"
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f"./{current_time}"
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, "tracks_rapper.csv")

# アーティストごとの楽曲情報を取得してCSVに保存
def fetch_artist_tracks(artist_id):
    tracks = []
    # アーティストのアルバムを取得
    albums = sp.artist_albums(artist_id, album_type='album', limit=50)
    album_items = albums['items']
    
    for album in album_items:
        album_id = album['id']
        album_name = album['name']
        release_date = album.get('release_date', 'Unknown')  # リリース日を取得
        release_year = release_date.split('-')[0] if release_date != 'Unknown' else 'Unknown'  # 年のみ抽出

        # 各アルバムのトラックを取得
        album_tracks = sp.album_tracks(album_id)
        for track in album_tracks['items']:
            tracks.append({
                "track_id": track["id"],
                "track_name": track["name"],
                "album_id": album_id,
                "album_name": album_name,
                "artist_id": artist_id,
                "artist_name": ", ".join([artist["name"] for artist in track["artists"]]),
                "duration_ms": track["duration_ms"],
                "preview_url": track["preview_url"],
                "release_year": release_year,  # リリース年を追加
            })
    
    return tracks

# アーティストIDと名前を読み込む
rappers_df = pd.read_csv(input_file)
all_tracks = []

for _, row in rappers_df.iterrows():
    artist_id = row["id"]
    artist_name = row["name"]
    print(f"Fetching tracks for {artist_name} (ID: {artist_id})...")
    tracks = fetch_artist_tracks(artist_id)
    all_tracks.extend(tracks)

# 結果をデータフレームに変換してCSVに保存
tracks_df = pd.DataFrame(all_tracks)
tracks_df.to_csv(output_file, index=False, encoding="utf-8")
print(f"Tracks saved to {output_file}")


SPOTIFY_CLIENT_ID: 9f227df8c7de4963a8f571caddd8dcfa
SPOTIFY_CLIENT_SECRET: aca9f59402df471abcd7c4d5e6673a40
Fetching tracks for Kanye West (ID: 5K4W6rqBFWDnAN6FQUkS6x)...
Fetching tracks for Jay-Z (ID: 3nFkdlSjzX9mRTtwJOzDYB)...
Fetching tracks for Eminem (ID: 7dGJo4pcD2V6oG8kP0tJRR)...
Fetching tracks for Drake (ID: 3TVXtAsR1Inumwj472S9r4)...
Fetching tracks for Kendrick Lamar (ID: 2YZyLoL8N0Wb9xBt1NhZWg)...
Fetching tracks for Travis Scott (ID: 0Y5tJX1MQlPlqiwlOH1tJY)...
Fetching tracks for Nicki Minaj (ID: 0hCNtLu0JehylgoiP8L4Gh)...
Fetching tracks for Cardi B (ID: 4kYSro6naA4h99UJvo89HB)...
Fetching tracks for Lil Wayne (ID: 55Aa2cqylxrFIXC767Z865)...
Fetching tracks for Snoop Dogg (ID: 7hJcb9fa4alzcOq3EaNPoG)...
Fetching tracks for 50 Cent (ID: 3q7HBObVc0L8jNeTe5Gofh)...
Fetching tracks for Future (ID: 1RyvyyTE3xzB2ZywiAwp0i)...
Fetching tracks for Post Malone (ID: 246dkjvS1zLTtiykXe5h60)...
Fetching tracks for A$AP Rocky (ID: 13ubrt8QOOCPljQ2FL1Kca)...
Fetching tracks for J. Cole

In [3]:
import os
import csv
from datetime import datetime
from dotenv import load_dotenv
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# Load environment variables
load_dotenv()

# Spotify API authentication
client_id = os.getenv("SPOTIFY_CLIENT_ID")
client_secret = os.getenv("SPOTIFY_CLIENT_SECRET")

if not client_id or not client_secret:
    raise ValueError("SPOTIFY_CLIENT_ID or SPOTIFY_CLIENT_SECRET is not set in the environment variables.")

sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id, client_secret))

# Input CSV file
INPUT_CSV = "rappers_with_spotify_ids.csv"

# Output directory based on current timestamp
def get_output_directory():
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_dir = os.path.join(os.getcwd(), timestamp)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    return output_dir

# Function to get artist data from Spotify API
def get_artist_info(artist_id):
    try:
        artist = sp.artist(artist_id)
        return {
            "id": artist.get("id"),
            "name": artist.get("name"),
            "followers": artist.get("followers", {}).get("total"),
            "genres": ", ".join(artist.get("genres", [])),
            "popularity": artist.get("popularity"),
            "spotify_url": artist.get("external_urls", {}).get("spotify"),
            "images": [image.get("url") for image in artist.get("images", [])],
            "type": artist.get("type"),
            "uri": artist.get("uri")
        }
    except Exception as e:
        print(f"Failed to fetch data for artist ID {artist_id}: {e}")
        return None

# Function to save artist info to CSV
def save_to_csv(artists_info, output_csv):
    with open(output_csv, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        # Write header
        writer.writerow(["id", "name", "followers", "genres", "popularity", "spotify_url", "images", "type", "uri"])
        # Write artist data
        for artist in artists_info:
            writer.writerow([
                artist.get("id"),
                artist.get("name"),
                artist.get("followers"),
                artist.get("genres"),
                artist.get("popularity"),
                artist.get("spotify_url"),
                ", ".join(artist.get("images", [])),
                artist.get("type"),
                artist.get("uri")
            ])

# Main function to process the artists
def main():
    output_dir = get_output_directory()
    output_csv = os.path.join(output_dir, "rappers_info.csv")

    artists_info = []

    # Read input CSV
    with open(INPUT_CSV, mode="r", encoding="utf-8") as file:
        reader = csv.DictReader(file)
        for row in reader:
            artist_id = row.get("id")
            if artist_id:
                artist_info = get_artist_info(artist_id)
                if artist_info:
                    artists_info.append(artist_info)

    # Save to output CSV
    save_to_csv(artists_info, output_csv)
    print(f"Artist information saved to {output_csv}")

if __name__ == "__main__":
    main()

Artist information saved to /Users/nishiyamasuisei/uni/Capstone-project-B-kons/data/20241212_133548/rappers_info.csv


In [13]:
import os
import pandas as pd
import requests
from datetime import datetime

# 入力ファイルと出力ファイルの設定
input_dir = "20241212_143142"
input_file = os.path.join(input_dir, "tracks_rapper.csv")
output_file = os.path.join(input_dir, "tracks_rapper_with_lyrics.csv")

# Lyrics APIのURLフォーマット
LYRICS_API_URL = "https://api.lyrics.ovh/v1/{artist_name}/{track_name}"

# lyricsを取得する関数
def fetch_lyrics(artist_name, track_name):
    try:
        url = LYRICS_API_URL.format(artist_name=artist_name, track_name=track_name)
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            return data.get("lyrics", "")  # lyricsフィールドがなければ空文字を返す
        else:
            print(f"Failed to fetch lyrics for {artist_name} - {track_name}: {response.status_code}")
            print(artist_name)
            print(track_name)
            return ""
    except Exception as e:
        print(f"Error fetching lyrics for {artist_name} - {track_name}: {e}")
        return ""

# CSVを読み込む
df = pd.read_csv(input_file)

# 新しい列を追加
df["lyrics"] = ""

# 各レコードのartist_nameを分割してAPIを試す
for index, row in df.iterrows():
    track_name = row["track_name"]
    artist_names = row["artist_name"].split(", ")  # アーティストを分割

    # それぞれのアーティスト名でAPIを叩く
    lyrics_found = False
    for artist_name in artist_names:
        lyrics = fetch_lyrics(artist_name, track_name)
        if lyrics:  # 空でないlyricsを取得したら保存して終了
            print(f"Lyrics found for track: {track_name} by artist: {artist_name},{lyrics}")
            df.at[index, "lyrics"] = lyrics
            lyrics_found = True
            break

    if not lyrics_found:
        print(f"Lyrics not found for track: {track_name} by artists: {artist_names}")

# 結果を新しいCSVに保存
df.to_csv(output_file, index=False, encoding="utf-8")
print(f"Tracks with lyrics saved to {output_file}")


Failed to fetch lyrics for ¥$ - SLIDE: 404
¥$
SLIDE
Failed to fetch lyrics for Kanye West - SLIDE: 404
Kanye West
SLIDE
Failed to fetch lyrics for Ty Dolla $ign - SLIDE: 404
Ty Dolla $ign
SLIDE
Lyrics not found for track: SLIDE by artists: ['¥$', 'Kanye West', 'Ty Dolla $ign']
Failed to fetch lyrics for ¥$ - TIME MOVING SLOW: 404
¥$
TIME MOVING SLOW
Failed to fetch lyrics for Kanye West - TIME MOVING SLOW: 404
Kanye West
TIME MOVING SLOW
Failed to fetch lyrics for Ty Dolla $ign - TIME MOVING SLOW: 404
Ty Dolla $ign
TIME MOVING SLOW
Lyrics not found for track: TIME MOVING SLOW by artists: ['¥$', 'Kanye West', 'Ty Dolla $ign']
Failed to fetch lyrics for ¥$ - FIELD TRIP: 404
¥$
FIELD TRIP
Failed to fetch lyrics for Kanye West - FIELD TRIP: 404
Kanye West
FIELD TRIP
Failed to fetch lyrics for Ty Dolla $ign - FIELD TRIP: 404
Ty Dolla $ign
FIELD TRIP
Lyrics not found for track: FIELD TRIP by artists: ['¥$', 'Kanye West', 'Ty Dolla $ign']
Failed to fetch lyrics for ¥$ - FRIED: 404
¥$
FRIED
Fa

In [6]:
import os
import pandas as pd
import requests
from datetime import datetime

# 入力ファイルと出力ファイルの設定
input_dir = "20241212_143142"
input_file = os.path.join(input_dir, "tracks_rapper.csv")
output_file = os.path.join(input_dir, "tracks_rapper_with_lyrics.csv")

# Lyrics APIのURLフォーマット
LYRICS_API_URL = "https://api.lyrics.ovh/v1/{artist_name}/{track_name}"

# requests.Session()を使いまわす
session = requests.Session()

# 取得済みのlyricsをキャッシュする辞書 {(artist, track): lyrics}
lyrics_cache = {}

def fetch_lyrics(artist_name, track_name):
    # キャッシュチェック
    cache_key = (artist_name.lower().strip(), track_name.lower().strip())
    if cache_key in lyrics_cache:
        return lyrics_cache[cache_key]
    
    url = LYRICS_API_URL.format(artist_name=artist_name, track_name=track_name)
    try:
        response = session.get(url)
        if response.status_code == 200:
            data = response.json()
            lyrics = data.get("lyrics", "")
            lyrics_cache[cache_key] = lyrics
            return lyrics
        else:
            # 必要ならprint出力を控えることで若干の速度改善
            print(f"Failed to fetch lyrics for {artist_name} - {track_name}: {response.status_code}")
            lyrics_cache[cache_key] = ""
            return ""
    except Exception as e:
        print(f"Error fetching lyrics for {artist_name} - {track_name}: {e}")
        lyrics_cache[cache_key] = ""
        return ""

# CSVを読み込む
df = pd.read_csv(input_file)

# 新しい列を追加（すでに存在する場合は上書き）
df["lyrics"] = ""

# 各レコードのartist_nameを分割してAPIを試す
for index, row in df.iterrows():
    track_name = row["track_name"]
    artist_names = row["artist_name"].split(", ")  # アーティストを分割

    # 各アーティスト名でAPIを叩く
    # 成功するまで繰り返し、成功したら即break
    for artist_name in artist_names:
        lyrics = fetch_lyrics(artist_name, track_name)
        if lyrics:
            print(f'sucess fetching lyrics for {artist_name} - {track_name}') 
            df.at[index, "lyrics"] = lyrics
            break

# 結果を新しいCSVに保存
df.to_csv(output_file, index=False, encoding="utf-8")
print(f"Tracks with lyrics saved to {output_file}")


sucess fetching lyrics for Kanye West - Blame Game
sucess fetching lyrics for Kanye West - Lost In The World
sucess fetching lyrics for Kanye West - Who Will Survive In America
sucess fetching lyrics for Kanye West - Say You Will
sucess fetching lyrics for Kanye West - Welcome To Heartbreak
sucess fetching lyrics for Kanye West - Heartless
sucess fetching lyrics for Kanye West - Amazing
sucess fetching lyrics for Kanye West - Love Lockdown
sucess fetching lyrics for Kanye West - Paranoid
sucess fetching lyrics for Kanye West - RoboCop
sucess fetching lyrics for Kanye West - Street Lights
sucess fetching lyrics for Kanye West - Bad News
sucess fetching lyrics for Kanye West - See You In My Nightmares
sucess fetching lyrics for Kanye West - Coldest Winter
sucess fetching lyrics for Kanye West - Pinocchio Story
sucess fetching lyrics for Kanye West - Good Morning
sucess fetching lyrics for Kanye West - Champion
sucess fetching lyrics for Kanye West - Stronger
sucess fetching lyrics for Ka

KeyboardInterrupt: 