In [3]:
import re
import time
import datetime
import requests
import pandas as pd
from bs4 import BeautifulSoup

#webサイトを取得し、テキスト形式で出力
def load(url):
    res = requests.get(url)
    #HTTPリクエストが失敗したステータスコードを返した場合、HTTPErrorを送出
    res.raise_for_status()
    #レスポンスボディをテキスト形式で入手
    return res.text

#htmlタグの取得
def get_tag(html, find_tag):
    soup = BeautifulSoup(str(html), 'html.parser')
    tag = soup.find(find_tag)
    return tag

#htmlタグの取得
def get_tags(html, find_tag):
    soup = BeautifulSoup(str(html), 'html.parser')
    tag = soup.find_all(find_tag)
    return tag

#htmlのid取得
def get_id(html, find_id):
    soup = BeautifulSoup(str(html), 'html.parser')
    html_id = soup.select(find_id)
    return html_id

#プログラムで扱えるデータ構造に変換
def parse(html):
    soup = BeautifulSoup(html, 'html.parser')
    #htmlタグの削除
    simple_row = soup.getText()
    simple_row = simple_row.replace('　', '')    
    return simple_row

def parse_lyric(html):
    soup = BeautifulSoup(html, 'html.parser')
    #htmlタグの削除
    simple_row = soup.get_text(separator=" ").strip()
    simple_row = simple_row.replace('　', ' ')

    return simple_row

#それぞれ歌の情報の取得
def get_info(url):
    base_url = 'https://www.uta-net.com/'
    html = load(url)
    #曲ごとのurlを格納
    song_url = []
    #歌を格納
    song_info = []
    songs_info=[]

    #曲のurlを取得
    #tdのurlを格納
    for td in get_tags(html, 'td'):
        #a要素の取得
        for a in get_tags(td, 'a'):
            #href属性にsongを含むか否か
            if 'song' in a.get ('href'):
                #urlを配列に追加
                song_url.append(base_url + a.get('href'))

    #曲の情報の取得
    for i, page in enumerate(song_url):
        print('{}曲目:{}'.format(i + 1, page))
        html = load(page)
        song_info = []

        #Song
        for tag in get_tag(html, 'h2'):
            #id検索を行うため、一度strにキャスト
            tag = str(tag)
            simple_row = parse(tag)
            song_info.append(simple_row)                

        #Artist
        for tag in get_tags(html, 'h3'):
            tag = str(tag)
            if r'itemprop="byArtist name"' in tag:
                simple_row = parse(tag)
                song_info.append(simple_row)

        #Lyricist
        for tag in get_tags(html, 'a'):
            tag = str(tag)
            if r'itemprop="lyricist"' in tag:
                simple_row = parse(tag)
                song_info.append(simple_row)

        #Composer
        for tag in get_tags(html, 'a'):
            tag = str(tag)
            if r'itemprop="composer"' in tag:
                simple_row = parse(tag)
                song_info.append(simple_row)

        #Lyric
        for id_ in get_id(html, '#kashi_area'):
            id_ = str(id_)
            if r'id="kashi_area"' in id_:
                simple_row = parse_lyric(id_)
                song_info.append(simple_row)
                songs_info.append(song_info)

                #1秒待機(サーバの負荷を軽減)
                time.sleep(1)
                break

    return songs_info

#csv
def create_df(URL):
    file_name = URL[0]
    url = URL[1]
    # データフレームを作成
    df = pd.DataFrame(get_info(url))
    df = df.rename(columns={0:'Song', 1:'Artist', 2:'Lyricist', 3:'Composer', 4:'Lyric'})
    # CSV ファイル出力
    csv = df.to_csv("csv_file/{}.csv".format(file_name))    
    return csv

#一括インストール
def whole(URL):
    for i in range(len(URL)):
        URLS = URL[i]
        create_df(URLS)
    return

In [7]:
#定期的に更新されるランキング
URL_fre = [['{}_daily'.format(datetime.date.today()), 'https://www.uta-net.com/user/ranking/daily.html'],
           ['{}_weekly'.format(datetime.date.today()), 'https://www.uta-net.com/user/ranking/weekly.html'],
           ['{}_monthly'.format(datetime.date.today()), 'https://www.uta-net.com/user/ranking/monthly.html']
          ]

#リストの一部
URL_setting = [['YOASOBI', 'https://www.uta-net.com/artist/28370/'],
               ['Official髭男dism','https://www.uta-net.com/artist/18093/'],
               ['King Gnu', 'https://www.uta-net.com/artist/23343/'],
               ['LiSA','https://www.uta-net.com/artist/561/'],
               ['瑛人', 'https://www.uta-net.com/artist/28712/'],
               ['BTS', 'https://www.uta-net.com/artist/16377/'],
               ['NiziU', 'https://www.uta-net.com/artist/28720/'],
               ['あいみょん', 'https://www.uta-net.com/artist/17598/'],
               ['米津玄師', 'https://www.uta-net.com/artist/12795/'],
               ['Mrs.GREEN APPLE', 'https://www.uta-net.com/artist/18526/'],
               ['菅田将暉', 'https://www.uta-net.com/artist/22091/'],
               ['Rin音','https://www.uta-net.com/artist/28222/'],
               ['yama', 'https://www.uta-net.com/artist/28506/'],
               ['TWICE', 'https://www.uta-net.com/artist/21906/'],
               ['Novelbright', 'https://www.uta-net.com/artist/25427/'],
               ['Tani Yuuki', 'https://www.uta-net.com/artist/28745/'],
               ['平井大', 'https://www.uta-net.com/artist/12164/'],
               ['ヨルシカ', 'https://www.uta-net.com/artist/22653/'],
               ['オレンジスパイニクラブ', 'https://www.uta-net.com/artist/28763/'],
               ['空音', 'https://www.uta-net.com/artist/27630/'],
               ['back number', 'https://www.uta-net.com/artist/8613/'],
               ['Uru', 'https://www.uta-net.com/artist/20238/'],
               ['ちゃんみな', 'https://www.uta-net.com/artist/21640/'],
               ['優里', 'https://www.uta-net.com/artist/28773/'],
               ['BLACKPINK', 'https://www.uta-net.com/artist/22998/'],
               ['wacci', 'https://www.uta-net.com/artist/13831/'],
               ['ONE OK ROCK','https://www.uta-net.com/artist/7063/'],
               ['MAMAMOO', 'https://www.uta-net.com/artist/25418/'],
               ['もさを。', 'https://www.uta-net.com/artist/28805/'],
               ['Reol', 'https://www.uta-net.com/artist/18631/'],
               ['マカロニえんぴつ', 'https://www.uta-net.com/artist/21729/'],
               ['神はサイコロを振らない', 'https://www.uta-net.com/artist/22591/'],
               ['ITZY', 'https://www.uta-net.com/artist/31338/'],
               ['三浦春馬', 'https://www.uta-net.com/artist/26916/'],
               ['川崎鷹也', 'https://www.uta-net.com/artist/29268/'],
               ['ずっと真夜中でいいのに。', 'https://www.uta-net.com/artist/26722/'],
               ['Aimer', 'https://www.uta-net.com/artist/11629/'],
               ['Foorin', 'https://www.uta-net.com/artist/25074/'],
               ['家入レオ', 'https://www.uta-net.com/artist/12361/'],
               ['iri', 'https://www.uta-net.com/artist/21002/'],
               ['DAOKO×米津玄師', 'https://www.uta-net.com/artist/22862/']
              ]

In [8]:
whole(URL_setting)

1曲目:https://www.uta-net.com//song/311353/
2曲目:https://www.uta-net.com//song/284749/
3曲目:https://www.uta-net.com//song/296108/
4曲目:https://www.uta-net.com//song/310521/
5曲目:https://www.uta-net.com//song/304848/
6曲目:https://www.uta-net.com//song/304176/
7曲目:https://www.uta-net.com//song/296109/
8曲目:https://www.uta-net.com//song/290004/
9曲目:https://www.uta-net.com//song/310522/
10曲目:https://www.uta-net.com//song/304177/
11曲目:https://www.uta-net.com//song/307610/
12曲目:https://www.uta-net.com//song/288086/
13曲目:https://www.uta-net.com//song/310520/
14曲目:https://www.uta-net.com//song/310523/
15曲目:https://www.uta-net.com//song/295523/
16曲目:https://www.uta-net.com//song/285262/
17曲目:https://www.uta-net.com//song/309835/
18曲目:https://www.uta-net.com//song/301855/
19曲目:https://www.uta-net.com//song/305494/
20曲目:https://www.uta-net.com//song/296725/
21曲目:https://www.uta-net.com//song/284748/
22曲目:https://www.uta-net.com//song/305932/
1曲目:https://www.uta-net.com//song/183915/
2曲目:https://www.uta-n