In [3]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [4]:
nba_combined = pd.read_csv("../Datasets/mvps_stats_all_seasons_revised.csv")

In [5]:
#Filter out seasons before 2006-07
nba_combined = nba_combined[nba_combined['Season'] >= '2006-07']

In [7]:
#Load the RAPTOR data
latest_raptor_df = pd.read_csv('../Datasets/latest_RAPTOR_by_team.csv', usecols=['player_name', 'season', 'raptor_offense', 'raptor_defense', 'predator_offense', 'predator_defense'])
historical_raptor_df = pd.read_csv('../Datasets/historical_RAPTOR_by_team.csv', usecols=['player_name', 'season', 'raptor_offense', 'raptor_defense', 'predator_offense', 'predator_defense'])

#Convert the 'season' column in RAPTOR dataframes to string
latest_raptor_df['season'] = latest_raptor_df['season'].astype(str)
historical_raptor_df['season'] = historical_raptor_df['season'].astype(str)

 Merge the RAPTOR dataframes
raptor_df = pd.concat([latest_raptor_df, historical_raptor_df]).drop_duplicates(subset=['player_name', 'season'])

In [9]:
#Create list
dataodds = []

#Base URL
base_url = 'https://www.sportsoddshistory.com/nba-awd/?y={}&sa=nba&a=nbamvp&o=r'

#Loop through the years from 2006 to 2022
for year in range(2006, 2023):
    # Format the URL with the current season
    season = f'{year}-{year+1}'
    url = base_url.format(season)

    #Send a GET request
    response = requests.get(url)

    #Parse
    soup = BeautifulSoup(response.text, 'html.parser')

    #Find the table body
    table_body = soup.find('tbody')

    #Extract from first two columns
    for row in table_body.find_all('tr'):
        cols = row.find_all('td')
        player_name = cols[0].text.strip()
        odds = cols[1].text.strip()
        dataodds.append([season, player_name, odds])
oddsdf = pd.DataFrame(dataodds, columns=['Season', 'Player', 'Odds'])

In [13]:
oddsdf = pd.DataFrame(dataodds, columns=['Season', 'Player', 'Odds'])
oddsdf

Unnamed: 0,Season,Player,Odds
0,2006-2007,Dirk Nowitzki,+800
1,2006-2007,LeBron James,+250
2,2006-2007,Dwyane Wade,+400
3,2006-2007,Kobe Bryant,+500
4,2006-2007,Tim Duncan,+600
...,...,...,...
721,2022-2023,Draymond Green,+100000
722,2022-2023,Jalen Green,+100000
723,2022-2023,John Collins,+100000
724,2022-2023,Kevin Porter Jr,+100000


In [14]:
#Adjust the 'Season' column in nba_combined to match the 'season' format in RAPTOR data
def convert_season(season):
    parts = season.split('-')
    if len(parts) == 2:
        year = parts[1]
        return '20' + year[-2:] 
    else:
        return season

nba_combined['Season'] = nba_combined['Season'].apply(convert_season)

oddsdf['Season'] = oddsdf['Season'].apply(lambda x: '20' + x[-2:])


#Merge the combined RAPTOR data with the nba_combined DataFrame
nba_combined = nba_combined.merge(raptor_df, left_on=['Player', 'Season'], right_on=['player_name', 'season'], how='left')
nba_combined = nba_combined.merge(oddsdf, left_on=['Player', 'Season'], right_on=['Player', 'Season'], how='left')

In [15]:
#Convert Season to int
nba_combined['Season'] = nba_combined['Season'].astype(int)
#Drop Unneeded columns
nba_combined = nba_combined.drop(columns=['season', 'player_name'])
#Making Column names pretty
nba_combined = nba_combined.rename(columns={
    'raptor_offense': 'Raptor Offense',
    'raptor_defense': 'Raptor Defense',
    'predator_offense': 'Predator Offense',
    'predator_defense': 'Predator Defense'
})
#Replace NaN values with +99999 odds
#Players without odds preseason are foreseen to have an unrealistic chance at winning, meaning unrealistic odds
nba_combined['Odds'] = nba_combined['Odds'].fillna("+99999")

In [16]:
nba_combined

Unnamed: 0.1,Unnamed: 0,Season,Rank,Player,Pos,Age,G,MP,PTS,AST,...,First,Pts Won,Pts Max,Share,MVP,Raptor Offense,Raptor Defense,Predator Offense,Predator Defense,Odds
0,166,2007,1,Dirk Nowitzki,PF,28,78,36.2,24.6,3.4,...,83,1138,1290,0.882,1,2.169240,1.030291,2.558710,1.427014,+800
1,167,2007,2,Steve Nash,PG,32,76,35.3,18.6,11.6,...,44,1013,1290,0.785,0,7.029865,-1.873767,6.812993,-2.210570,+1000
2,168,2007,3,Kobe Bryant,SG,28,77,40.8,31.6,5.4,...,2,521,1290,0.404,0,3.346244,-0.523628,3.457525,-0.395470,+500
3,169,2007,4,Tim Duncan,C,30,80,34.1,20.0,3.4,...,0,286,1290,0.222,0,2.729091,3.926063,3.008572,3.244549,+600
4,170,2007,5,LeBron James,SF,22,78,40.9,27.3,6.0,...,0,183,1290,0.142,0,4.870715,3.108085,4.901036,2.817517,+250
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
228,393,2023,9,Stephen Curry,PG,34,56,34.7,29.4,6.3,...,0,5,1000,0.005,0,7.450293,-1.714806,6.534401,-0.943459,+1400
229,394,2023,10,Jimmy Butler,PF,33,64,33.4,22.9,5.3,...,0,3,1000,0.003,0,4.052923,1.246758,4.185950,2.119834,+5000
230,395,2023,11,De'Aaron Fox,PG,25,73,33.4,25.0,6.1,...,0,2,1000,0.002,0,3.934258,-5.604630,4.283736,-3.788431,+15000
231,396,2023,12T,Jalen Brunson,PG,26,68,35.0,24.0,6.2,...,0,1,1000,0.001,0,10.020898,-0.937940,10.882112,-0.463401,+99999


In [17]:
#Save the merged DataFrame
nba_combined.to_csv("../Datasets/MVPdata_final.csv")