ICI ca test de recup des infos sur nba.com

# import all the libraries

In [38]:
from selenium import webdriver #/!\ version :4.5.0
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options


from bs4 import BeautifulSoup

import pandas as pd

import re
import time

# Constants

In [39]:
#create a constant for the url
NBA_PLAYERS_STATS_URL = 'https://www.nba.com/stats/alltime-leaders'
NBA_PLAYERS_INFO_URL = 'https://www.nba.com/players'
NBA_PLAYERS_AGILITY_URL = 'https://www.nba.com/stats/draft/combine-strength-agility'

# path to the file containing the chrome driver
CHROME_DRIVER_PATH = "./chromedriver.exe"

# Functions

### Browser functions

In [40]:
# Starting the browser.
def startBrowser():
    s=Service(CHROME_DRIVER_PATH)
    options = webdriver.ChromeOptions()
    options = Options()
    # options.add_argument("--headless")
    return  webdriver.Chrome(service=s, options=options)

# Checking if the browser is closed. // used to check if the user has closed the browser manually
def isBrowserClosed(browser):
    isbrowserClosed = False
    try:
        webdriver.title
    except:
        isbrowserClosed = True
    return isbrowserClosed
        

### Convertion functions

In [41]:
#convert height in df to cm
def convertHeightToCm(height):
    height = re.findall(r'\d+-\d+', height)
    if len(height) == 0:
        return 0

    feet = int(height[0].split('-')[0])
    inches = int(height[0].split('-')[1])
    return (feet * 12 + inches) * 2.54

#convert weight in df to kg
def convertWeightToKg(weight):
    weight = re.findall(r'\d+', weight)
    if(len(weight) == 0):
        return 0
    return float(weight[0]) * 0.453592



### Scapping functions

In [42]:
# The above code is getting the data from the URL and returning the dataframe.
def getDataFromURL(URL):
    browser = startBrowser()
    #open the url
    browser.get(URL)

    time.sleep(1)
    # accept cookies
    browser.find_element(By.CSS_SELECTOR, "button#onetrust-accept-btn-handler").click()
    time.sleep(5)

    if URL == NBA_PLAYERS_INFO_URL: #this is to get infos from all time players
        #click on the button to show all players
        browser.find_element(By.XPATH, '//*[@id="__next"]/div[2]/div[2]/main/div[2]/section/div/div[2]/div[1]/div[6]').click()
        time.sleep(1)

    df = pd.DataFrame()

    while browser.find_element(By.CSS_SELECTOR, "button[title^='Next Page Button']").is_enabled():
        html = browser.page_source
        data, headers = getDataFromHTML(html)

        df = pd.concat([df, pd.DataFrame(data, columns=headers)], ignore_index=True)

        print(".", end = '')

        while True:
            try:
                browser.find_element(By.CSS_SELECTOR, "button[title^='Next Page Button']").click()
                break
            except:
                if(isBrowserClosed(browser)):
                    return df
                else:
                    print('not yet clickable')
                    continue

    html = browser.page_source
    data, headers = getDataFromHTML(html)

    df = pd.concat([df, pd.DataFrame(data, columns=headers)], ignore_index=True)

    print(".", end = '')

    #close the browser
    browser.quit()
    #return the dataframe
    return df

def getDataAgilityFromURL(URL, startYear, endYear):
    browser = startBrowser()
    #open the url
    browser.get(URL)

    time.sleep(1)
    # accept cookies
    browser.find_element(By.CSS_SELECTOR, "button#onetrust-accept-btn-handler").click()
    time.sleep(5)

    df = pd.DataFrame()
    currentYear = startYear

    while currentYear <= endYear:
        #select the year
        # send key arrow down
        browser.find_element(By.CSS_SELECTOR, "select.DropDown_select__4pIg9").send_keys(Keys.ARROW_DOWN)
        time.sleep(1)

        html = browser.page_source
        data, headers = getDataFromHTML(html)

        df = pd.concat([df, pd.DataFrame(data, columns=headers)], ignore_index=True)

        print(".", end = '')

        currentYear += 1

    #close the browser
    browser.quit()
    #return the dataframe
    return df



#Parsing the html and returning the data and headers.
def getDataFromHTML(html):
    #parse the html
    soup = BeautifulSoup(html, 'html.parser')
    #find the table with class Crom_table__p1iZz or players-list
    table = soup.find('table', attrs = {'class' : ['Crom_table__p1iZz','players-list']})
    #get the table headers
    headers = [header.text for header in table.findAll('th', attrs = {'hidden': None})]
    #get the table rows
    rows = table.find_all('tr')
    #get the table data
    data = [[td.text for td in rows[i].find_all('td')] for i in range(len(rows))]
    data = [row for row in data if row != []]#they is an empty at the start idk why but yes

    return data, headers


# Get stats from all players

In [43]:
print("start of the scrapping")
df = getDataFromURL(NBA_PLAYERS_STATS_URL)
print("\nend of the scrapping\n\n")



print("saving the dataframe to a csv file...")
df.to_csv('nbaPlayersAllTimesStatsData.csv', index = False)
print("done")

start of the scrapping
................................................................................................
end of the scrapping


saving the dataframe to a csv file...
done


In [44]:
#get nb rows and nb columns
print("the dataframe has", df.shape[0], "rows and", df.shape[1], "columns")

the dataframe has 4771 rows and 23 columns


# Get Info from all players

In [45]:
print("start of the scrapping")
df = getDataFromURL(NBA_PLAYERS_INFO_URL)
print("\nend of the scrapping\n\n")

print("Converting height and weight to cm and kg...", end = '')
#convert height in df to cm
df['Height'] = df['Height'].apply(convertHeightToCm)
# convert weight in df to kg
df['Weight'] = df['Weight'].apply(convertWeightToKg)
print("done\n\n")

print("saving the dataframe to a csv file...", end = '')
df.to_csv('nbaPlayersAllTimesInfo.csv', index = False)
print("done")

start of the scrapping
.................................................................................................
end of the scrapping


Converting height and weight to cm and kg...done


saving the dataframe to a csv file...done


In [46]:
#get nb rows and nb columns
print("the dataframe has", df.shape[0], "rows and", df.shape[1], "columns")

the dataframe has 4804 rows and 8 columns


# Get player Agility infos

In [47]:
print("start of the scrapping")
df = getDataAgilityFromURL(NBA_PLAYERS_AGILITY_URL, 2000, 2023)
print("\nend of the scrapping\n\n")


print("saving the dataframe to a csv file...", end = '')
df.to_csv('nbaPlayersAllTimesAgilityData.csv', index = False)
print("done")


start of the scrapping
........................
end of the scrapping


saving the dataframe to a csv file...done


In [48]:
print("the dataframe has", df.shape[0], "rows and", df.shape[1], "columns")

the dataframe has 1606 rows and 8 columns
