# Imports and Global Settings

In [1]:
import pandas as pd
from sqlalchemy import create_engine

pd.options.display.max_rows = 999
pd.options.display.max_columns = 100

# Data Inbound

In [2]:
username = 'postgres'
password = ''
endpoint = ''
database = 'nba_betting'
port = '5432'

In [3]:
connection= create_engine(f'postgresql+psycopg2://{username}:{password}@{endpoint}/{database}').connect()

In [4]:
historic_odds = pd.read_sql_table('historic_odds', connection)
historic_standings = pd.read_sql_table('historic_standings', connection)
historic_team_stats = pd.read_sql_table('historic_team_stats', connection)
historic_opponent_stats = pd.read_sql_table('historic_opponent_stats', connection)

# Standardize Dates

In [5]:
historic_odds['datetime'] = pd.to_datetime(historic_odds['date'])

In [6]:
historic_standings['datetime'] = pd.to_datetime(historic_standings['date'])

In [7]:
historic_team_stats['datetime'] = pd.to_datetime(historic_team_stats['date'])

In [8]:
historic_opponent_stats['datetime'] = pd.to_datetime(historic_opponent_stats['date'])

# Standardize Team Names

In [9]:
teams = {'Washington Wizards': 'WAS', 'Brooklyn Nets': 'BKN', 'Chicago Bulls': 'CHI',
       'Miami Heat': 'MIA', 'Cleveland Cavaliers': 'CLE', 'Philadelphia 76ers': 'PHI',
       'New York Knicks': 'NYK', 'Charlotte Hornets': 'CHA', 'Boston Celtics': 'BOS',
       'Toronto Raptors': 'TOR', 'Milwaukee Bucks': 'MIL', 'Atlanta Hawks': 'ATL',
       'Indiana Pacers': 'IND', 'Detroit Pistons': 'DET', 'Orlando Magic': 'ORL',
       'Golden State Warriors': 'GSW', 'Phoenix Suns': 'PHX', 'Dallas Mavericks': 'DAL',
       'Denver Nuggets': 'DEN', 'Los Angeles Clippers': 'LAC', 'Utah Jazz': 'UTA',
       'Los Angeles Lakers': 'LAL', 'Memphis Grizzlies': 'MEM',
       'Portland Trail Blazers': 'POR', 'Sacramento Kings': 'SAC',
       'Oklahoma City Thunder': 'OKC', 'Minnesota Timberwolves': 'MIN',
       'San Antonio Spurs': 'SAS', 'New Orleans Pelicans': 'NOP', 'Houston Rockets': 'HOU', 'Charlotte Bobcats': 'CHA', 'New Orleans Hornets': 'NOP',
       'New Jersey Nets': 'BKN', 'Seattle SuperSonics': 'OKC',
       'New Orleans/Oklahoma City Hornets': 'NOP'}

In [10]:
historic_standings['teamname'] = historic_standings['team'].map(teams)

In [11]:
historic_team_stats['teamname'] = historic_team_stats['team'].map(teams)

In [12]:
historic_opponent_stats['teamname'] = historic_opponent_stats['team'].map(teams)

# Combine Data

In [13]:
historic_odds['pred_datetime'] = historic_odds['datetime'] - pd.DateOffset(1)

In [14]:
historic_odds[historic_odds['pred_datetime'] == pd.to_datetime('2020-12-31')]

Unnamed: 0,index,date,game_id,game_url,home,league_year,opponent,opponent_score,result,score,spread,spread_result,team,datetime,pred_datetime
757,757,2021-01-01,238998,/sport/basketball/nba/boxscore/238998,True,2020-2021,BOS,93,W,96,8.5,W,DET,2021-01-01,2020-12-31
947,947,2021-01-01,239000,/sport/basketball/nba/boxscore/239000,True,2020-2021,WAS,130,L,109,1.5,L,MIN,2021-01-01,2020-12-31
991,991,2021-01-01,238999,/sport/basketball/nba/boxscore/238999,True,2020-2021,ATL,114,L,96,-6.5,L,BKN,2021-01-01,2020-12-31
1021,1021,2021-01-01,238996,/sport/basketball/nba/boxscore/238996,True,2020-2021,MEM,108,L,93,-4.0,L,CHA,2021-01-01,2020-12-31
1068,1068,2021-01-01,239002,/sport/basketball/nba/boxscore/239002,True,2020-2021,CHI,96,W,126,-14.5,W,MIL,2021-01-01,2020-12-31
1191,1191,2021-01-01,239005,/sport/basketball/nba/boxscore/239005,True,2020-2021,POR,123,L,98,3.5,L,GSW,2021-01-01,2020-12-31
1196,1196,2021-01-01,239003,/sport/basketball/nba/boxscore/239003,True,2020-2021,LAC,100,W,106,3.5,W,UTA,2021-01-01,2020-12-31
1223,1223,2021-01-01,239004,/sport/basketball/nba/boxscore/239004,True,2020-2021,PHX,106,L,103,-3.5,L,DEN,2021-01-01,2020-12-31
1262,1262,2021-01-01,238997,/sport/basketball/nba/boxscore/238997,True,2020-2021,MIA,83,W,93,1.5,W,DAL,2021-01-01,2020-12-31
10875,10875,2021-01-01,239001,/sport/basketball/nba/boxscore/239001,True,2020-2021,LAL,109,L,103,7.5,W,SAS,2021-01-01,2020-12-31


In [15]:
full_dataset = historic_odds.merge(historic_standings,
                                   how='left',
                                   left_on=['team', 'pred_datetime'],
                                   right_on=['teamname', 'datetime'],
                                   suffixes=(None, '_sta'),
                                   validate='1:m')

In [16]:
full_dataset = full_dataset.merge(historic_standings,
                                   how='left',
                                   left_on=['opponent', 'pred_datetime'],
                                   right_on=['teamname', 'datetime'],
                                   suffixes=(None, '_osta'),
                                   validate='1:m')

In [17]:
full_dataset = full_dataset.merge(historic_team_stats,
                                   how='left',
                                   left_on=['team', 'pred_datetime'],
                                   right_on=['teamname', 'datetime'],
                                   suffixes=(None, '_ts'),
                                   validate='1:m')

In [18]:
full_dataset = full_dataset.merge(historic_opponent_stats,
                                   how='left',
                                   left_on=['opponent', 'pred_datetime'],
                                   right_on=['teamname', 'datetime'],
                                   suffixes=(None, '_os'),
                                   validate='1:m')

# Create Unique Record ID

In [19]:
full_dataset['id'] = full_dataset['datetime'].apply(lambda x: x.strftime('%Y%m%d')) + full_dataset['team'] + full_dataset['opponent']

# Cleanup

## Remove Excess Columns and Reorder

In [20]:
columns_to_keep = ['id', 'datetime', 'team', 'opponent', 'score', 'opponent_score', 'result',
                   'game_url', 'league_year', 'spread', 'spread_result', 'pred_datetime',
                   'wins', 'losses', 'win_perc', 'expected_wins', 'expected_losses',
                   'points_scored_per_game', 'points_allowed_per_game',
                   'wins_osta', 'losses_osta', 'win_perc_osta', 'expected_wins_osta', 'expected_losses_osta',
                   'points_scored_per_game_osta', 'points_allowed_per_game_osta',
                   'g', 'mp', 'pts', 'ast', 'trb', 'blk', 'stl',
                   'tov', 'pf', 'drb', 'orb',
                   'fg', 'fga', 'fg_pct', 'fg2', 'fg2a', 'fg2_pct',
                   'fg3', 'fg3a', 'fg3_pct', 'ft', 'fta', 'ft_pct',
                   'g_os', 'mp_os', 'pts_os', 'ast_os', 'trb_os', 'blk_os', 'stl_os',
                   'tov_os', 'pf_os', 'drb_os', 'orb_os',
                   'fg_os', 'fga_os', 'fg_pct_os', 'fg2_os', 'fg2a_os', 'fg2_pct_os',
                   'fg3_os', 'fg3a_os', 'fg3_pct_os', 'ft_os', 'fta_os', 'ft_pct_os']

In [21]:
full_dataset = full_dataset[columns_to_keep]

## Rename Columns

In [22]:
column_rename_dict = {'id': 'game_id', 'datetime': 'game_date', 'team': 'home_team', 'opponent': 'away_team',
                      'score': 'home_score', 'opponent_score': 'away_score', 'result': 'home_result',
                      'game_url': 'covers_game_url', 'league_year': 'league_year', 'spread': 'home_spread',
                      'spread_result': 'home_spread_result', 'pred_datetime': 'pred_date',
                      'wins': 'wins', 'losses': 'losses', 'win_perc': 'win_pct',
                      'expected_wins': 'expected_wins', 'expected_losses': 'expected_losses',
                      'points_scored_per_game': 'home_ppg', 'points_allowed_per_game': 'home_papg',
                      'wins_osta': 'away_wins', 'losses_osta': 'away_losses', 'win_perc_osta': 'away_win_pct',
                      'expected_wins_osta': 'away_expected_wins', 'expected_losses_osta': 'away_expected_losses',
                      'points_scored_per_game_osta': 'away_ppg', 'points_allowed_per_game_osta': 'away_papg',
                      'g': 'g', 'mp': 'mp', 'pts': 'pts', 'ast': 'ast', 'trb': 'trb', 'blk': 'blk',
                      'stl': 'stl', 'tov': 'tov', 'pf': 'pf', 'drb': 'drb', 'orb': 'orb',
                      'fg': 'fg', 'fga': 'fga', 'fg_pct': 'fg_pct',
                      'fg2': 'fg2', 'fg2a': 'fg2a', 'fg2_pct': 'fg2_pct',
                      'fg3': 'fg3', 'fg3a': 'fg3a', 'fg3_pct': 'fg3_pct',
                      'ft': 'ft', 'fta': 'fta', 'ft_pct': 'ft_pct',
                      'g_os': 'away_g', 'mp_os': 'away_mp', 'pts_os': 'away_pts', 'ast_os': 'away_ast',
                      'trb_os': 'away_trb', 'blk_os': 'away_blk', 'stl_os': 'away_stl',
                      'tov_os': 'away_tov', 'pf_os': 'away_pf', 'drb_os': 'away_drb', 'orb_os': 'away_orb',
                      'fg_os': 'away_fg', 'fga_os': 'away_fga', 'fg_pct_os': 'away_fg_pct',
                      'fg2_os': 'away_fg2', 'fg2a_os': 'away_fg2a', 'fg2_pct_os': 'away_fg2_pct',
                      'fg3_os': 'away_fg3', 'fg3a_os': 'away_fg3a', 'fg3_pct_os': 'away_fg3_pct',
                      'ft_os': 'away_ft', 'fta_os': 'away_fta', 'ft_pct_os': 'away_fta_pct'}

In [23]:
full_dataset = full_dataset.rename(columns=column_rename_dict)

# Save to RDS

In [24]:
# full_dataset.to_sql(name='combined_data_inbound', con=connection, index=False)