# Velogames Model

The purpose of this script is to learn some basic Python skills by training a predictive model from data collected on the internet.<br>
The predictive model will attempt to develop the optimal Velogames team for the 2023 Grand Tour season


In [28]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import re
import numpy as np 
import time

### Collect training data from Velogames
This section creates a csv file with the riders and their Velogames score from past grand tours

In [None]:
def get_velogame_data(races,years):
    search_string_list=[,,]
    
    for race in races:        
        if race not in ['france','italy','spain']:
            continue
        for year in years:
            search_string_list.append(race,year,"https://www.velogames.com/"+race.replace('France','velogame')+"/"+year+"/riders.php")
            
    for velogame in search_string_list:
        page = requests.get(velogame)
        print("Status Code {} {}: {}".format(race, year, page.status_code)) #Status 200 = good
        
        soup = BeautifulSoup(page.content,'html.parser')
        
        

In [9]:
race = 'italy'
year = '2022'

search_string = "https://www.velogames.com/"+race.replace('france','velogame')+"/"+year+"/riders.php"
print("Collecting Data from {}".format(search_string))
      
page = requests.get("https://www.velogames.com/"+race.replace('france','velogame')+"/"+year+"/riders.php")
print("Status Code: {}".format(page.status_code)) #Status 200 = good

soup = BeautifulSoup(page.content,'html.parser')

      

Collecting Data from https://www.velogames.com/italy/2022/riders.php
Status Code: 200


In [10]:
header_list=[]
vg_rider_data_staged = []

#Collect table headers - these can be different between races and years
tbl_headers = soup.select('div[class="main-container"] th b')
vg_rider_data_staged.append([header.get_text() for header in tbl_headers])
vg_rider_data_staged[0].insert(0,'Rider_Link')

#Collect table data
tbl_data = soup.select('div[class="main-container"] tbody tr')
for tbl_row in tbl_data:
    vg_rider_data_staged.append([tbl_cell.get_text(",") for tbl_cell in tbl_row.find_all('td')])
    vg_rider_data_staged[-1][0]=tbl_row.find('a')['href']
    
print(vg_rider_data_staged[:5])


[['Rider_Link', 'Rider', 'Team', 'Class', 'Cost', 'Selected', 'Points'], ['riderprofile.php?rider=20220570', 'Richard Carapaz', 'INEOS Grenadiers', 'All Rounder', '24', '56.3%', '2244'], ['riderprofile.php?rider=20220952', 'Simon Yates', 'Team BikeExchange - Jayco', 'Climber', '22', '27.8%', '666'], ['riderprofile.php?rider=20221068', 'João Almeida', 'UAE Team Emirates', 'All Rounder', '20', '41.2%', '985'], ['riderprofile.php?rider=20220173', 'Mikel Landa', 'Bahrain - Victorious', 'Climber', '18', '11.4%', '1432']]


In [33]:
header_list=[]
vg_rider_points_data_staged = []

for rider in vg_rider_data_staged[1:5]:
    search_string = "https://www.velogames.com/"+race.replace('france','velogame')+"/"+year+"/"+rider[0]
    print("Collecting Data from {}".format(search_string))
    page = requests.get(search_string)
    print("Status Code: {}".format(page.status_code)) #Status 200 = good
    soup = BeautifulSoup(page.content,'html.parser')
    
    #Collect table headers
    tbl_headers = soup.select('table thead')
    vg_rider_points_data_staged.append([header.get_text(',') for header in tbl_headers])
  
    print(vg_rider_points_data_staged)
    
    
    time.sleep(1)

Collecting Data from https://www.velogames.com/italy/2022/riderprofile.php?rider=20220570
Status Code: 200
[['\n,Round,\n,Stg,\n,GC,\n,PC,\n,KOM,\n,Spr,\n,Sum,\n,Bky,\n,Ass,\n,Tot,\n,\n']]
Collecting Data from https://www.velogames.com/italy/2022/riderprofile.php?rider=20220952
Status Code: 200
[['\n,Round,\n,Stg,\n,GC,\n,PC,\n,KOM,\n,Spr,\n,Sum,\n,Bky,\n,Ass,\n,Tot,\n,\n'], ['\n,Round,\n,Stg,\n,GC,\n,PC,\n,KOM,\n,Spr,\n,Sum,\n,Bky,\n,Ass,\n,Tot,\n,\n']]
Collecting Data from https://www.velogames.com/italy/2022/riderprofile.php?rider=20221068
Status Code: 200
[['\n,Round,\n,Stg,\n,GC,\n,PC,\n,KOM,\n,Spr,\n,Sum,\n,Bky,\n,Ass,\n,Tot,\n,\n'], ['\n,Round,\n,Stg,\n,GC,\n,PC,\n,KOM,\n,Spr,\n,Sum,\n,Bky,\n,Ass,\n,Tot,\n,\n'], ['\n,Round,\n,Stg,\n,GC,\n,PC,\n,KOM,\n,Spr,\n,Sum,\n,Bky,\n,Ass,\n,Tot,\n,\n']]
Collecting Data from https://www.velogames.com/italy/2022/riderprofile.php?rider=20220173
Status Code: 200
[['\n,Round,\n,Stg,\n,GC,\n,PC,\n,KOM,\n,Spr,\n,Sum,\n,Bky,\n,Ass,\n,Tot,\n,\n'], [

### Get the PCS name keys

This section creates a csv file with the names of top riders as well as the associated PCS name key which is used to query individual rider results later on. 

In [2]:
rider_list=['PCS_key,Lname,Fname']
n = 3 #n page requests will be made, n*100 riders will be collected

for i in range(n):
    
    #Query the PCS Rankings site to get a list of riders
    seach_string = "https://www.procyclingstats.com/rankings.php?date=2021-12-31&nation=&age=&zage=&page=smallerorequal&team=&offset="+str(100*i)+"&teamlevel=&filter=Filter"
    page = requests.get(seach_string)
    print("Status Code {}: {}".format(i, page.status_code)) #Status 200 = good
    
    #Use BS to pull the PCS name key, Fname and Lname
    soup = BeautifulSoup(page.content,'html.parser')
    riders = soup.select('tr a[href^="rider/"]')
    
    #Create a list of the PCS name key as well as the first and last name of the rider
    for rider in riders:
        rider_list.append(rider['href'].replace("rider/","")+","+rider.find('span').get_text()+","+rider.find('span').next_sibling.strip())

Status Code 0: 200
Status Code 1: 200
Status Code 2: 200


In [3]:
n=10
print('Information for {} riders has been collected.\n'.format(len(rider_list)))

print('Top {} results are shown below:'.format(n))
rider_list[:n]

Information for 301 riders has been collected.

Top 10 results are shown below:


['PCS_key,Lname,Fname',
 'tadej-pogacar,Pogačar,Tadej',
 'wout-van-aert,van Aert,Wout',
 'primoz-roglic,Roglič,Primož',
 'julian-alaphilippe,Alaphilippe,Julian',
 'joao-almeida,Almeida,João',
 'sonny-colbrelli,Colbrelli,Sonny',
 'jasper-philipsen,Philipsen,Jasper',
 'egan-bernal,Bernal,Egan',
 'adam-yates,Yates,Adam']

In [4]:
np.savetxt("/home/quixote/Documents/project_data/pcs_rider_list.csv",rider_list,delimiter=",",fmt='% s')

In [6]:
del rider_list, soup, riders, page