## 앨범 정보에서 스크래핑 대상
: 발매일, 장르, 발매사, 기획사

In [2]:
import os
import pandas as pd

# 저장했던 CSV 파일 경로
csv_dir = os.path.join(os.getcwd() , 'csv')
csv_file_path = os.path.join(csv_dir, 'melon_yearly_top100.csv')

try:
    df = pd.read_csv(csv_file_path, encoding='utf-8')
    
    # 데이터 확인
    print("\n--- DataFrame 상위 5개 행 ---")
    print(df.head())
    
    print("\n--- DataFrame 정보 (컬럼, 데이터 타입) ---")
    df.info()

except FileNotFoundError:
    print(f"❌ 오류: '{csv_file_path}' 파일을 찾을 수 없습니다. 경로를 확인해 주세요.")
except Exception as e:
    print(f"❌ CSV 파일을 읽어오는 중 오류가 발생했습니다: {e}")


--- DataFrame 상위 5개 행 ---
   year  rank   song_id  album_id source
0  2024     1  37140709  11391902  melon
1  2024     2  37138469  11402655  melon
2  2024     3  37524037  11487023  melon
3  2024     4  37145732  11404142  melon
4  2024     5  31927275  10307346  melon

--- DataFrame 정보 (컬럼, 데이터 타입) ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   year      500 non-null    int64 
 1   rank      500 non-null    int64 
 2   song_id   500 non-null    int64 
 3   album_id  500 non-null    int64 
 4   source    500 non-null    object
dtypes: int64(4), object(1)
memory usage: 19.7+ KB


In [3]:
unique_album_id = set(df['album_id'].to_list())
len(unique_album_id)

307

In [4]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager

from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import re

In [5]:
options = Options()
options.add_argument(
    "--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
    "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36"
)
driver = webdriver.Chrome(options=options, service=Service(ChromeDriverManager().install()))

album_meta_data = []

for album_id in list(unique_album_id):
    print(album_id)
    url = f'https://www.melon.com/album/detail.htm?albumId={album_id}'
    driver.get(url)
    wait = WebDriverWait(driver, 10)
    meta_data = driver.find_element(By.CLASS_NAME, 'list')
    dd_list = meta_data.find_elements(By.TAG_NAME, 'dd')
    release_date = dd_list[0].text
    genre = dd_list[1].text
    distributor = dd_list[2].text
    enterteinment = dd_list[3].text
    
    # print('release_date >> ', release_date)
    # print('genre >> ', genre)
    # print('distributor >> ', distributor)
    # print('enterteinment >> ', enterteinment)    
    # print("=" * 20)

    data = {
        "album_id":album_id,
        "release_date":release_date,
        "genre":genre,
        "distributor":distributor,
        "enterteinment":enterteinment,    
    }
    album_meta_data.append(data)

driver.quit()

10377241
10324004
10735654
10827816
10469416
10975276
11262009
11610170
10459197
10821699
10903658
1286252
10614899
10680450
10377346
11309190
10645654
10346650
11102375
11444397
11376816
10043575
2232505
10604729
11620540
10408131
11067591
11382987
10784974
11239639
11454681
11215072
11362544
2148596
11421941
11180278
10580227
11280645
10336518
10348811
10731792
10426648
10412319
399659
10412335
10979636
10600760
28985
10903868
10436942
11454802
10359126
10391899
11372896
11229537
11127145
11000171
10463600
11258233
10260858
10871162
10359162
10918269
11057545
10529161
11227533
10381712
11315612
10359196
10359204
10926502
10613163
11415997
10570193
11219412
10385896
10842611
10265079
11125247
11201031
10527241
10801677
10670618
11211297
10566182
10314289
11475530
10609232
10990161
10416723
10590812
10283614
11340391
10451566
10320500
10648182
10117789
10298014
10539679
10447520
10367659
10326710
10601149
11121346
11356868
10453701
10623687
10465994
10037969
10402513
10775252
11385577


In [6]:
len(album_meta_data)

307

In [7]:
album_meta_data[0]

{'album_id': 10377241,
 'release_date': '2020.01.19',
 'genre': '국내드라마, 발라드',
 'distributor': '지니뮤직',
 'enterteinment': 'Stone Music Entertainment'}

In [10]:
# csv 저장
import csv
import os

# 저장할 파일명
csv_dir = os.path.join(os.getcwd(), 'csv')
csv_file_path = os.path.join(csv_dir, 'melon_album_info.csv')

# 딕셔너리의 키를 필드 이름(헤더)으로 사용합니다.
fieldnames = album_meta_data[0].keys()

try:
    with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:
        # DictWriter 객체 생성
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        # 헤더(필드 이름) 쓰기
        writer.writeheader()
        
        # 데이터를 한 줄씩 쓰기
        writer.writerows(album_meta_data)
        
    print(f"✅ 성공적으로 저장")

except Exception as e:
    print(f"❌ 파일 저장 중 오류 발생: {e}")

✅ 성공적으로 저장


In [11]:
df = pd.read_csv(csv_file_path, encoding='utf-8')
df.head()

Unnamed: 0,album_id,release_date,genre,distributor,enterteinment
0,10377241,2020.01.19,"국내드라마, 발라드",지니뮤직,Stone Music Entertainment
1,10324004,2019.09.01,발라드,주식회사 플랜비뮤직,에스에스엠(SH SPACE MUSIC)
2,10735654,2021.10.11,"국내드라마, 발라드",(주)엔에이치엔벅스,모스트콘텐츠
3,10827816,2021.12.29,"발라드, 포크/블루스",카카오엔터테인먼트,EDAM엔터테인먼트
4,10469416,2020.07.30,랩/힙합,카카오엔터테인먼트,피네이션


In [12]:
df.shape

(307, 5)