In [None]:
import traceback
import pandas as pd
import numpy as np
import urllib.parse
import urllib.request
from bs4 import BeautifulSoup
import re
from time import sleep 
import warnings
warnings.simplefilter('ignore')
from concurrent import futures

def id2kaisai(race_id):
    num = int(race_id[4:6])
    if num == 1:
        return race_id[0:4]+race_id[7]+"札幌"+race_id[9]
    elif num == 2:
        return race_id[0:4]+race_id[7]+"函館"+race_id[9]
    elif num == 3:
        return race_id[0:4]+race_id[7]+"福島"+race_id[9]
    elif num == 4:
        return race_id[0:4]+race_id[7]+"新潟"+race_id[9]
    elif num == 5:
        return race_id[0:4]+race_id[7]+"東京"+race_id[9]
    elif num == 6:
        return race_id[0:4]+race_id[7]+"中山"+race_id[9]
    elif num == 7:
        return race_id[0:4]+race_id[7]+"中京"+race_id[9]
    elif num == 8:
        return race_id[0:4]+race_id[7]+"京都"+race_id[9]
    elif num == 9:
        return race_id[0:4]+race_id[7]+"阪神"+race_id[9]
    else:
        return race_id[0:4]+race_id[7]+"小倉"+race_id[9]
    
def get_table(target_url):
    with urllib.request.urlopen(target_url) as response:
        html = response.read()
        return pd.read_html(html)
    
def make_data(race_id):
    target_url = 'https://race.netkeiba.com/race/result.html?race_id='+race_id+'&rf=race_list'
    dfs = pd.read_html(target_url)
    df = dfs[0]
    df_1 = dfs[1]
    df_2 = dfs[2]
    odds_all = []
    for df_ in [df_1,df_2]:
        for i in range(len(df_)):
            l = []
            for j in range(3):
                l.append(df_.iloc[i,j])

            odds_all.append("|".join(l))

    odds_info = ";".join(odds_all).replace(",","")
    with urllib.request.urlopen(target_url) as response:
        html = response.read()
        # 馬場距離天気
        soup = BeautifulSoup(html, "html.parser")
   
    #レース情報作成
    # "race_id;siba_da;length;weather;baba;prize"
    data_1 = soup.select_one("#page > div.RaceColumn01 > div > div.RaceMainColumn > div.RaceList_NameBox > div.RaceList_Item02 > div.RaceData01").text
    result_1 = re.findall(r'/\s([^\s]+)',data_1)
    siba_da = result_1[0][0]
    length = result_1[0][1:-1]
    weather = result_1[1][-1]
    baba = result_1[2][-1]
    
    data_2 = soup.select_one("#page > div.RaceColumn01 > div > div.RaceMainColumn > div.RaceList_NameBox > div.RaceList_Item02 > div.RaceData02 > span:nth-child(10)").text
    result_2 = re.findall(r'本賞金:([^,]+),',data_2)
    prize = result_2[0]
    # 着順・馬名・性齢・騎手名
    
    #new_columns = ["レース番号","芝/ダ","距離","天気","馬場","賞金","odds_all"]
    list_race = [race_id,siba_da,length,weather,baba,prize]
    race_info = ";".join(list_race)
    
    #各馬情報作成
    # "着順;馬番;馬名;馬齢;騎手;単勝オッズ;調教師;馬主;生産者;父馬;母馬;それまでの獲得賞金
    df["着順"] = [i for i in range(1,len(df)+1)]
    df["馬齢"] = [x[1] for x in df["性齢"]]
    df["馬体重"] = [x[0:3] for x in df["馬体重(増減)"]]
    df["増減"] = [x[4:-1] for x in df["馬体重(増減)"]]
    df = df[["着順","馬番","馬名","馬齢","騎手","単勝オッズ","馬体重","増減"]]
    df = df.sort_values('馬番')
    horse_urls = []
    for i in range(len(df)):
        horse_url = soup.select_one("#All_Result_Table > tbody > tr:nth-child("+str(i+1)+") > td:nth-child(4) > span > a").get("href") 
        
        horse_urls.append(horse_url)
        
    with futures.ThreadPoolExecutor(max_workers=4) as executor:
        results = list(executor.map(get_table,horse_urls))
        
    horses_list = []
    for i in range(len(df)):
        horse_list = list(map(str,df.iloc[i,:].values))
        table_0 = results[i][1]
    
        table_0.index = table_0.iloc[:,0]
        horse_list.append(table_0.loc["調教師",1])
        horse_list.append(table_0.loc["馬主",1])
        horse_list.append(table_0.loc["生産者",1])
        table_1 = results[i][2]
        horse_list.append(table_1.loc[0,0])
    
        horse_list.append(table_1.loc[2,0])
        
        table_2 = results[i][3]
        
        if table_2.columns[0] == "受賞歴":
            table_2 = results[i][4]
            
        table_2.index = [table_2.iloc[i,0][0:4]+table_2.iloc[i,1] for i in range(len(table_2))]
        a = table_2.index.get_loc(id2kaisai(race_id))
        table_2 = table_2.iloc[a+1:,:]
        #table_2["賞金"].fillna('0',inplace=True)
        #table_2["賞金"] = list(map(lambda x: x.replace(",",""),table_2["賞金"]))
        horse_list.append(str(np.sum(table_2["賞金"])))
        horse_info = ";".join(horse_list)
        horses_list.append(horse_info)
    horses_info = ",".join(horses_list)
    
    info = ",".join([race_info,horses_info,odds_info])+"\n"
    return info

def get_text(race_id):
    try:
        text = make_data(race_id)
        return text
    except Exception as e:
        return 0
    

                            

    
race_id_list = []
where = [str(i )for i in range(1,10)]
kaisai = [str(i )for i in range(1,6)]
day = [str(i )for i in range(1,10)]
rounds = ["01","02","03","04","05","06","07","08","09","10","11","12"]
year_list = ["2020"]
for y in year_list:
    for w in where:
        for k in kaisai:
            for d in day:
                for r in rounds:
                    race_id = y+"0"+w+"0"+k+"0"+d+r
                    race_id_list.append(race_id)

with futures.ThreadPoolExecutor(max_workers=4) as executor:
        results = list(executor.map(get_text,race_id_list))
with open("keiba_2020", mode='w') as f:
    pass
with open("keiba_2020", mode='a') as f:
    for text in result:
        f.write(text)

In [None]:
import lightgbm as lgb
import numpy as np
import pandas as pd
import random
import pickle
from sklearn.model_selection import train_test_split
from sklearn import preprocessing 
from bs4 import BeautifulSoup
import re
from scipy.stats import norm
from bayes_opt import BayesianOptimization
import graphviz
import warnings
warnings.simplefilter('ignore')
def make_Xyqodds():
    result = [[[],[],[],[]],[[],[],[],[]]]
    num = 0
    for file in ["keiba_2019","keiba_2020"]
        with open(file,mode="r") as f:
            for line in f.readlines():
                year = int(line[0:4])
                if (year < 2020 and num == 0) or num ==1:
                    pass
                else:
                    num = 1
                    break
                


                line_list = line.split(",")
                race_info,horses_info,odds_info = line_list[0],line_list[1:-1],line_list[-1]
                race_list = race_info.split(";")
                for horse_info in horses_info:
                    horse_list = horse_info.split(";")
                    label = 18 - int(horse_list[0])
                    data = race_list + horse_list[1:]
                    for i in [2,5,8,10,11,12,18]:
                        try:
                            data[i] = float(data[i])
                        except:
                            data[i] = 0
                    result[num][0].append(data)
                    result[num][1].append(label)

                result[num][2].append(len(horses_info))
                result[num][3].append(odds_info)
    
    result = np.array(result)
    return result

Xyqodds = make_Xyqodds()

def train_sim(max_depth,num_leaves,think_odds,boosting_type):
    
    boosting_type = "dart" if boosting_type > 0.5 else "gbdt"
    result = Xyqodds
  
    X_train,X_test = result[0][0],result[1][0]
    y_train,y_test = result[0][1],result[1][1]
    q_train,q_test = result[0][2],result[1][2]
    odds_train,odds_test = result[0][3],result[1][3]
    category = [0,1,3,4,6,7,9,13,14,15,16,17]
    number =[2,5,8,10,11,12,18]
    X_train = pd.DataFrame(X_train)
    X_test = pd.DataFrame(X_test)
    for i in category:
        X_train[i] = X_train[i].astype('category')
        X_test[i] = X_test[i].astype('category')
    #for i in number:
        #X_train[i] = X_train[i].astype('float')
        #X_test[i] = X_test[i].astype('float')

    if think_odds > 0.5:
        X_train.drop(columns=10,inplace=True) 
        X_test.drop(columns=10,inplace=True) 

    train = lgb.Dataset(X_train, y_train,group=q_train)
    test = lgb.Dataset(X_test, y_test, group=q_test)

    params = { 
        'objective': 'lambdarank',
        'metric': 'ndcg',
        'ndcg_eval_at': 3,
        'max_depth': int(max_depth),
        'max_position':3, 
        'num_leaves': int(num_leaves),
        'min_data_in_leaf':1,
        'verbose': -1,
        'categorical_feature':category,
        'boosting_type': boosting_type
    }
    #print(type(X_train.loc[0,2]))
    model = lgb.train(
        params,
        train,
        num_boost_round=150,
        valid_sets=[train,test],
        valid_names=['train','test'],
        early_stopping_rounds=20,
        verbose_eval=False
    )

    from_ = 0
    odds_all = odds_test
   
  
    gain = [[],[],[],[],[],[],[]]
    for i,odds in zip(q_test,odds_all):
        pred = model.predict(X_test[from_:from_+i],num_iteration=model.best_iteration)
        pred = pd.Series(pred).sort_values(ascending=False)          
        pred.index = pred.index + 1
        pred_rank = list(map(str,pred.index.values))[0:3]

        from_ += i
        odds_list = re.findall(r'([^\|]*)\|([^\|]*)\|([^;]*)円;?',odds)

        result = []
        for i,j in zip([1,2],[" ","円"]):
            comb_list =  [x[i] for x in odds_list if x[0] != "枠連"]
            comb_list[0] = [comb_list[0:1]]
            comb_list[1] = [[a] for a in comb_list[1].split(j)]
            if len(comb_list[1]) == 5:
                comb_list[1] = comb_list[1][::2]
                j = "   "
            comb_list[2] = [set(comb_list[2].split(j))]
            comb_list[3] = comb_list[3].split(j)
            comb_list[3] = [set(comb_list[3][0:(3-i)]),set(comb_list[3][3-i:6-2*i]),set(comb_list[3][6-2*i:9-3*i])]
            comb_list[4] = [comb_list[4].split(j)]
            comb_list[5] = [set(comb_list[5].split(j))]
            comb_list[6] = [comb_list[6].split(j)]
            result.append(comb_list)

        baken_list = [0,0,0,0,0,0,0]
        baken_list[0] = [[a] for a in pred_rank[0:1]]
        baken_list[1] = [[a] for a in pred_rank[0:3]]
        baken_list[2] = [set(pred_rank[0:2])]
        baken_list[3] = [set(pred_rank[0:2]),set(pred_rank[1:3]),set(pred_rank[::2])]
        baken_list[4] = [pred_rank[0:2]]
        baken_list[5] = [set(pred_rank)]
        baken_list[6] = [pred_rank]
        
        for i in range(len(result[0])):
            for j in range(len(result[0][i])):
                if result[0][i][j] in baken_list[i]:
                    gain[i].append(float(list(result[1][i][j])[0]))
                else:
                    gain[i].append(0)
                



    gain = np.array(gain)
    objective = []
    for i in range(7):
        p = norm.sf(x=100,loc=np.average(gain[i]),scale=np.std(gain[i],ddof=1)/np.sqrt(len(gain[i])))
        objective.append(p)
    return max(objective)

rf_cv_bo = BayesianOptimization(
    train_sim, # 上の関数を与える
    {'max_depth': (1,6),
     'num_leaves': (2,64),
     'think_odds' : (0,1),
     'boosting_type': (0,1)
    },
    verbose=0, # 0だと学習過程を表示をしない、デフォルトの2なら全て表示する、1は最高値が更新されたら表示
    random_state=0
)
                
rf_cv_bo.maximize(init_points=5, n_iter=50, acq='ei')

# 実行結果の内、最も精度の良かった評価値とパラメータの値をprint

with open("params",mode="w") as txt:
    txt.write(str(rf_cv_bo.max['params']['max_depth'])+"\n")
    txt.write(str(rf_cv_bo.max['params']['num_leaves'])+"\n")
    txt.write(str(rf_cv_bo.max['params']['think_odds'])+"\n")
    txt.write(str(rf_cv_bo.max['params']['boosting_type']))



In [None]:
import lightgbm as lgb
import numpy as np
import pandas as pd
import random
from scipy.stats import norm
import pickle
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import preprocessing 
from bs4 import BeautifulSoup
import re
from bayes_opt import BayesianOptimization
import graphviz
import warnings
warnings.simplefilter('ignore')
def make_Xyqodds():
    result = [[[],[],[],[]],[[],[],[],[]]]
    with open("keiba_2020",mode="r") as f:
        for line in f.readlines():
          
            year = int(line[0:4])
            if year < 2020:
                num = 0
            else:
                num = 1
        
            
            line_list = line.split(",")
            race_info,horses_info,odds_info = line_list[0],line_list[1:-1],line_list[-1]
            race_list = race_info.split(";")
            for horse_info in horses_info:
                horse_list = horse_info.split(";")
                label = 18 - int(horse_list[0])
                data = race_list + horse_list[1:]
                for i in [2,5,8,10,11,12,18]:
                    try:
                        data[i] = float(data[i])
                    except:
                        data[i] = 0
                result[num][0].append(data)
                result[num][1].append(label)
            
            result[num][2].append(len(horses_info))
            result[num][3].append(odds_info)
    result = np.array(result)

    return result
def train_sim(max_depth,num_leaves,think_odds,boosting_type):
    
    boosting_type = "dart" if boosting_type > 0.5 else "gbdt"
    
    result = make_Xyqodds()
    X_train,X_test = result[0][0],result[1][0]
    y_train,y_test = result[0][1],result[1][1]
    q_train,q_test = result[0][2],result[1][2]
    odds_train,odds_test = result[0][3],result[1][3]
    category = [0,1,3,4,6,7,9,13,14,15,16,17]
    number =[2,5,8,10,11,12,18]
    X_train = pd.DataFrame(X_train)
    X_test = pd.DataFrame(X_test)
    for i in category:
        X_train[i] = X_train[i].astype('category')
        X_test[i] = X_test[i].astype('category')
    #for i in number:
        #X_train[i] = X_train[i].astype('float')
        #X_test[i] = X_test[i].astype('float')

    if think_odds > 0.5:
        X_train.drop(columns=10,inplace=True) 
        X_test.drop(columns=10,inplace=True) 

    train = lgb.Dataset(X_train, y_train,group=q_train)
    test = lgb.Dataset(X_test, y_test, group=q_test)

    params = { 
        'objective': 'lambdarank',
        'metric': 'ndcg',
        'ndcg_eval_at': 3,
        'max_depth': int(max_depth),
        'max_position':3, 
        'num_leaves': int(num_leaves),
        'min_data_in_leaf':1,
        'verbose': -1,
        'categorical_feature':category,
        'boosting_type': boosting_type
    }
    #print(type(X_train.loc[0,2]))
    model = lgb.train(
        params,
        train,
        num_boost_round=150,
        valid_sets=[train,test],
        valid_names=['train','test'],
        early_stopping_rounds=20,
        verbose_eval=False
    )

    from_ = 0
    odds_all = odds_test
   
    
    gain = [[],[],[],[],[],[],[]]

    
    for i,odds in zip(q_test,odds_all):
        pred = model.predict(X_test[from_:from_+i],num_iteration=model.best_iteration)
        pred = pd.Series(pred).sort_values(ascending=False)          
        pred.index = pred.index + 1
        pred_rank = list(map(str,pred.index.values))[0:3]

        from_ += i
        odds_list = re.findall(r'([^\|]*)\|([^\|]*)\|([^;]*)円;?',odds)

        result = []
        for i,j in zip([1,2],[" ","円"]):
            comb_list =  [x[i] for x in odds_list if x[0] != "枠連"]
            comb_list[0] = [comb_list[0:1]]
            comb_list[1] = [[a] for a in comb_list[1].split(j)]
            if len(comb_list[1]) == 5:
                comb_list[1] = comb_list[1][::2]
                j = "   "
            comb_list[2] = [set(comb_list[2].split(j))]
            comb_list[3] = comb_list[3].split(j)
            comb_list[3] = [set(comb_list[3][0:(3-i)]),set(comb_list[3][3-i:6-2*i]),set(comb_list[3][6-2*i:9-3*i])]
            comb_list[4] = [comb_list[4].split(j)]
            comb_list[5] = [set(comb_list[5].split(j))]
            comb_list[6] = [comb_list[6].split(j)]
            result.append(comb_list)

        baken_list = [0,0,0,0,0,0,0]
        baken_list[0] = [[a] for a in pred_rank[0:1]]
        baken_list[1] = [[a] for a in pred_rank[0:3]]
        baken_list[2] = [set(pred_rank[0:2])]
        baken_list[3] = [set(pred_rank[0:2]),set(pred_rank[1:3]),set(pred_rank[::2])]
        baken_list[4] = [pred_rank[0:2]]
        baken_list[5] = [set(pred_rank)]
        baken_list[6] = [pred_rank]
        
        for i in range(len(result[0])):
            for j in range(len(result[0][i])):
                if result[0][i][j] in baken_list[i]:
                    gain[i].append(float(list(result[1][i][j])[0]))
                else:
                    gain[i].append(0)
                


 
    gain = np.array(gain)
    objective = []
    for i in range(7):
        p = norm.sf(x=100,loc=np.average(gain[i]),scale=np.std(gain[i],ddof=1)/np.sqrt(len(gain[i])))
        objective.append(p)
        
    max_index = numpy.argmax(objective)
    with open("how_2_buy",mode="w") as f:
        f.write(max_index)
        
    return model

with open("params",mode="r") as f:
    l = []
    for line in f.readlines():
        l.append(float(line))

model = train_sim(l[0],l[1],l[2],l[3]) #max_depth,num_leaves,think_odds,boosting_type
pickle.dump(model, open('trained_model.pkl', 'wb'))

In [1]:
import traceback
import pandas as pd
import numpy as np
import urllib.parse
import urllib.request
from bs4 import BeautifulSoup
import re
import time
import warnings
warnings.simplefilter('ignore')
from concurrent import futures
import lightgbm as lgb
import pickle
from selenium.webdriver import Chrome, ChromeOptions
from selenium.webdriver.common.keys import Keys
options = ChromeOptions()
options.add_argument('--headless')   

def id2kaisai(race_id):
    num = int(race_id[4:6])
    if num == 1:
        return race_id[0:4]+race_id[7]+"札幌"+race_id[9]
    elif num == 2:
        return race_id[0:4]+race_id[7]+"函館"+race_id[9]
    elif num == 3:
        return race_id[0:4]+race_id[7]+"福島"+race_id[9]
    elif num == 4:
        return race_id[0:4]+race_id[7]+"新潟"+race_id[9]
    elif num == 5:
        return race_id[0:4]+race_id[7]+"東京"+race_id[9]
    elif num == 6:
        return race_id[0:4]+race_id[7]+"中山"+race_id[9]
    elif num == 7:
        return race_id[0:4]+race_id[7]+"中京"+race_id[9]
    elif num == 8:
        return race_id[0:4]+race_id[7]+"京都"+race_id[9]
    elif num == 9:
        return race_id[0:4]+race_id[7]+"阪神"+race_id[9]
    else:
        return race_id[0:4]+race_id[7]+"小倉"+race_id[9]
    
def get_table(target_url):
    with urllib.request.urlopen(target_url) as response:
        html = response.read()
        return pd.read_html(html)
    
def make_data(race_id):
    target_url = 'https://race.netkeiba.com/race/shutuba.html?race_id='+race_id+'&rf=race_submenu' 
    driver = Chrome(options=options)
    driver.get(target_url)
    #time.sleep(3)
    html = driver.page_source.encode('euc-jp',"ignore")  
    driver.quit()
    df_ = pd.read_html(html)[0]
    df = pd.DataFrame()
    with urllib.request.urlopen(target_url) as response:
        html = response.read()
        # 馬場距離天気
        soup = BeautifulSoup(html, "html.parser")
   
    #レース情報作成
    # "race_id;siba_da;length;weather;baba;prize"
    data_1 = soup.select_one("#page > div.RaceColumn01 > div > div.RaceMainColumn > div.RaceList_NameBox > div.RaceList_Item02 > div.RaceData01").text
    result_1 = re.findall(r'/\s([^\s]+)',data_1)
    siba_da = result_1[0][0]
    length = result_1[0][1:-1]
    weather = result_1[1][-1]
    baba = result_1[2][-1]
    
    data_2 = soup.select_one("#page > div.RaceColumn01 > div > div.RaceMainColumn > div.RaceList_NameBox > div.RaceList_Item02 > div.RaceData02 > span:nth-child(10)").text
    result_2 = re.findall(r'本賞金:([^,]+),',data_2)
    prize = result_2[0]
    # 着順・馬名・性齢・騎手名
    
    #new_columns = ["レース番号","芝/ダ","距離","天気","馬場","賞金","odds_all"]
    list_race = [race_id,siba_da,length,weather,baba,prize]
    race_info = ";".join(list_race)
    
    #各馬情報作成
    # "着順;馬番;馬名;馬齢;騎手;単勝オッズ;調教師;馬主;生産者;父馬;母馬;それまでの獲得賞金
    df["馬齢"] = [x[1] for x in df_["性齢"]["性齢"]]
    df["馬名"] = [x for x in df_["馬名"]["馬名"]]
    df["馬番"] = df.index + 1
    df["騎手"] = [x for x in df_["騎手"]["騎手"]]
    try:
        df["単勝オッズ"] = [x for x in df_["オッズ 更新"]["オッズ 更新"]]
    except:
        df["単勝オッズ"] = [x for x in df_["オッズ"]["オッズ"]]
    df["馬体重"] = [str(x)[0:3] for x in df_["馬体重(増減)"]["馬体重(増減)"]]
    df["増減"] = [str(x)[4:-1] for x in df_["馬体重(増減)"]["馬体重(増減)"]]
    df = df[["馬番","馬名","馬齢","騎手","単勝オッズ","馬体重","増減"]]
    df = df.sort_values('馬番')
    horse_urls = re.findall(r'<a href="([^"]*/horse/[^"]+)"', str(html))
    with futures.ThreadPoolExecutor(max_workers=4) as executor:
        results = list(executor.map(get_table,horse_urls))
        
    horses_list = []
    for i in range(len(df)):
        horse_list = list(map(str,df.iloc[i,:].values))
        table_0 = results[i][1]
    
        table_0.index = table_0.iloc[:,0]
        horse_list.append(table_0.loc["調教師",1])
        horse_list.append(table_0.loc["馬主",1])
        horse_list.append(table_0.loc["生産者",1])
        table_1 = results[i][2]
        horse_list.append(table_1.loc[0,0])
    
        horse_list.append(table_1.loc[2,0])
        
        table_2 = results[i][3]
        
        if table_2.columns[0] == "受賞歴":
            table_2 = results[i][4]
            
        table_2.index = [table_2.iloc[i,0][0:4]+table_2.iloc[i,1] for i in range(len(table_2))]
        try:
            a = table_2.index.get_loc(id2kaisai(race_id))
            table_2 = table_2.iloc[a+1:,:]
        except:
            pass
        #table_2["賞金"].fillna('0',inplace=True)
        #table_2["賞金"] = list(map(lambda x: x.replace(",",""),table_2["賞金"]))
        horse_list.append(str(np.sum(table_2["賞金"])))
        horse_info = ";".join(horse_list)
        horses_list.append(horse_info)
    horses_info = ",".join(horses_list)
    
    info = ",".join([race_info,horses_info])+"\n"
    return info

def predict(race_id,odds=True):
    X = []
    line = make_data(race_id)
    model = pickle.load(open('trained_model.pkl', 'rb'))
    
    line_list = line.split(",")
    race_info,horses_info = line_list[0],line_list[1:]
    race_list = race_info.split(";")
    for horse_info in horses_info:
        horse_list = horse_info.split(";")
        data = race_list + horse_list
        for i in [2,5,8,10,11,12,18]:
            try:
                data[i] = float(data[i])
            except:
                data[i] = 0
        X.append(data)
    X = np.array(X)
 
    category = [0,1,3,4,6,7,9,13,14,15,16,17]
    number =[2,5,8,10,11,12,18]
    X = pd.DataFrame(X)

    for i in category:
        X[i] = X[i].astype('category')
    for i in number:
        X[i] = X[i].astype('float')
    if odds != True:
        X.drop(columns=10,inplace=True)
    race_id = id2kaisai(race_id)[5:7]+race_id[-2:]
    pred = model.predict(X,num_iteration=model.best_iteration)
    pred = pd.DataFrame(pred,columns=[race_id]).sort_values(race_id,ascending=False)   
    pred.index = pred.index + 1
    return pred



In [15]:
target_url = "https://race.netkeiba.com/top/?rf=navi"
driver = Chrome(options=options)
driver.get(target_url)
html = driver.page_source.encode('euc-jp',"ignore")  
soup = BeautifulSoup(html, "html.parser")

driver.quit()

0


In [23]:
titles = re.findall(r'id="movie_([^"]*)"', str(html))
titles = set([x[:-2] for x in titles])
print(list(set(titles)))

['2020090106', '2020070106', '2020060206']
