In [189]:
from pybaseball import statcast_batter
from pybaseball import batting_stats_bref
from pybaseball import retrosheet
from pybaseball import playerid_lookup
from pybaseball.lahman import *
from pybaseball import playerid_reverse_lookup
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

In [170]:
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import mean_squared_error

In [171]:
download_lahman()

In [172]:
batting = batting()

In [173]:
recent_bats = batting[batting['yearID'] > 2000]

In [174]:
career_totals = recent_bats.groupby('playerID').sum()
career_totals.reset_index()

# Limit to only players with 100+ AB
career_totals = career_totals[career_totals['AB'] > 100]

# Set career stat columns
# Batting Average
career_totals['BA'] = career_totals['H'] / career_totals['AB']
# 2B/AB
career_totals['2B/AB'] = career_totals['2B'] / career_totals['AB']
# 3B/AB
career_totals['3B/AB'] = career_totals['3B'] / career_totals['AB']
# HR/AB
career_totals['HR/AB'] = career_totals['HR'] / career_totals['AB']
# RBI/G
career_totals['RBI/G'] = career_totals['RBI'] / career_totals['G']
# R/G
career_totals['R/G'] = career_totals['R'] / career_totals['G']
# BB/G
career_totals['BB/G'] = career_totals['BB'] / career_totals['G']
# SB/G
career_totals['SB/G'] = career_totals['SB'] / career_totals['G']
# HBP/G
career_totals['HBP/G'] = career_totals['HBP'] / career_totals['G']




In [175]:
# Drop columns we don't need
career_totals = career_totals.iloc[:,19:]
career_totals = career_totals.reset_index()

In [176]:
data = pd.read_csv('../meana2020.txt', header=None)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2424 entries, 0 to 2423
Data columns (total 16 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       2424 non-null   object
 1   1       2424 non-null   object
 2   2       2424 non-null   object
 3   3       2424 non-null   object
 4   4       2424 non-null   object
 5   5       798 non-null    object
 6   6       523 non-null    object
 7   7       265 non-null    object
 8   8       2424 non-null   int64 
 9   9       2424 non-null   int64 
 10  10      2424 non-null   int64 
 11  11      2424 non-null   int64 
 12  12      2424 non-null   int64 
 13  13      2424 non-null   object
 14  14      2424 non-null   object
 15  15      2424 non-null   object
dtypes: int64(5), object(11)
memory usage: 303.1+ KB


In [177]:
columns = {0:'game_id', 1:'batter', 2:'batter_hand', 3:'pitcher', 4:'pitcher_hand',
           5:'run_first', 6:'run_second', 7:'run_third', 8:'result', 9:'rbi',
           10:'first_dest', 11:'second_dest', 12:'third_dest',13:'sb_first',
           14:'sb_second', 15:'sb_third'}

new_data = data.rename(columns=columns)

In [178]:

def calc_runs(game_df, batter, hrs):
    runs = hrs
    runs += game_df[(game_df['run_first'] == batter) & (game_df['first_dest'].isin([4,5,6]))].shape[0]
    runs += game_df[(game_df['run_second'] == batter) & (game_df['second_dest'].isin([4,5,6]))].shape[0]
    runs += game_df[(game_df['run_third'] == batter) & (game_df['third_dest'].isin([4,5,6]))].shape[0]
    return runs

def calc_sb(game_df, batter):
    sb = 0 
    sb += game_df[(game_df['run_first'] == batter) & (game_df['sb_first'] == 'T')].shape[0]
    sb += game_df[(game_df['run_second'] == batter) & (game_df['sb_second'] == 'T')].shape[0]
    sb += game_df[(game_df['run_third'] == batter) & (game_df['sb_third'] == 'T')].shape[0]
    return sb

In [179]:
columns = list(career_totals.columns)
columns.extend(['fantasy_ppg', 'starting_pitcher_hand', 'starting_pitcher_era', 'ppg_vs_hand', 'fantasy_points'])


batter_df = pd.DataFrame(columns=columns)
games = new_data['game_id'].unique()
for game in games:
    current_game = new_data[new_data['game_id'] == game]
    
    #date, wind_factor, temp, ballpark_rating = stadium_info(event_file_path, game) 
    
    for batter in current_game['batter'].unique():
        # Convert name to that used by Baseball-Reference
        bbref_name = playerid_reverse_lookup([batter], key_type='retro')
        bbref_name = bbref_name['key_bbref']
        n_index = str(bbref_name).index('\n')
        bbref_name = str(bbref_name)[5:n_index]
        
        
        current_batter = current_game[current_game['batter'] == batter]
        
        starting_pitcher_hand = current_batter.iloc[0, 4]
        singles = current_batter[current_batter['result'] == 20].shape[0]
        doubles = current_batter[current_batter['result'] == 21].shape[0]
        triples = current_batter[current_batter['result'] == 22].shape[0]
        hrs = current_batter[current_batter['result'] == 23].shape[0] 
        rbis = current_batter['rbi'].sum()
        runs = calc_runs(current_game, batter, hrs)
        walks = current_batter[current_batter['result'] == 14].shape[0]
        sb = calc_sb(current_game, batter)
        hbp = current_batter[current_batter['result'] == 16].shape[0]
        games = batter_df[batter_df['playerID'] == bbref_name].shape[0]
        total_fantasy_points = batter_df[batter_df['playerID'] == bbref_name].sum()['fantasy_points']
        if games > 0:
            points_per_game = total_fantasy_points/games
        else:
            points_per_game = 0
        
        total_vs_pitch_hand = batter_df[(batter_df['playerID'] == bbref_name) & (batter_df['starting_pitcher_hand'] == starting_pitcher_hand)].sum()['fantasy_points']
        games_vs_pitch_hand = batter_df[(batter_df['playerID'] == bbref_name) & (batter_df['starting_pitcher_hand'] == starting_pitcher_hand)].shape[0]
        
        
        if games_vs_pitch_hand > 0:
            ppg_vs_hand = total_vs_pitch_hand / games_vs_pitch_hand
        else:
            ppg_vs_hand = points_per_game
        
        
        fantasy_points = 3*singles + 6*doubles + 9*triples + 12*hrs + 3.5*rbis + 3.2*runs + 3*walks + 6*sb + 3*hbp
        
        
        
        
        career_nums = career_totals[career_totals['playerID'] == bbref_name]
        
        career_nums['fantasy_ppg'] = float(points_per_game)
        career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
        career_nums['starting_pitcher_hand'] = starting_pitcher_hand
        career_nums['fantasy_points'] = fantasy_points
        batter_df = batter_df.append(career_nums)
    
       


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_points'] = fantasy_points
A value is trying to be set on a copy of a slice 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_points'] = fantasy_points
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a Da

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['fantasy_ppg'] = float(points_per_game)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['ppg_vs_hand'] = float(ppg_vs_hand)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  career_nums['starting_pitcher_hand'] = starting_pitcher_hand
A value is trying to be set on a copy of a s

In [180]:
batter_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 547 entries, 1125 to 1789
Data columns (total 15 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   playerID               547 non-null    object 
 1   BA                     547 non-null    float64
 2   2B/AB                  547 non-null    float64
 3   3B/AB                  547 non-null    float64
 4   HR/AB                  547 non-null    float64
 5   RBI/G                  547 non-null    float64
 6   R/G                    547 non-null    float64
 7   BB/G                   547 non-null    float64
 8   SB/G                   547 non-null    float64
 9   HBP/G                  547 non-null    float64
 10  fantasy_ppg            547 non-null    float64
 11  starting_pitcher_hand  547 non-null    object 
 12  starting_pitcher_era   0 non-null      object 
 13  ppg_vs_hand            547 non-null    float64
 14  fantasy_points         547 non-null    float64
dtypes:

In [181]:
def stadium_info(event_file_path, game_id):
    f = open(event_file_path)
    text = f.read()
    game_idx = text.index(game_id)
    
    date_idx = text.index('date', game_idx)
    date = text[date_idx+5:date_idx+15]
    date = pd.to_datetime(date)
    
    winddir_idx = text.index('winddir,', game_idx)
    winddir = text[winddir_idx+8:winddir_idx+12]
    if winddir in ['ltor', 'rtol', 'unkn']:
        winddir = 0
    elif winddir in ['tocf', 'torf', 'tolf']:
        winddir = 1
    else:
        winddir = -1
    
    windspeed_idx = text.index('windspeed', game_idx)
    windspeed = text[windspeed_idx+10:windspeed_idx+11]
    wind_factor = winddir * int(windspeed)
    
    temp_idx = text.index('info,temp,', game_idx)
    temp = int(text[temp_idx+10:temp_idx+12])
    
    return date, wind_factor, temp, #ballpark_factor
    
stadium_info('../2015ANA.EVA', 'ANA201506290')

(Timestamp('2015-06-29 00:00:00'), 8, 78)

In [182]:
batter_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 547 entries, 1125 to 1789
Data columns (total 15 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   playerID               547 non-null    object 
 1   BA                     547 non-null    float64
 2   2B/AB                  547 non-null    float64
 3   3B/AB                  547 non-null    float64
 4   HR/AB                  547 non-null    float64
 5   RBI/G                  547 non-null    float64
 6   R/G                    547 non-null    float64
 7   BB/G                   547 non-null    float64
 8   SB/G                   547 non-null    float64
 9   HBP/G                  547 non-null    float64
 10  fantasy_ppg            547 non-null    float64
 11  starting_pitcher_hand  547 non-null    object 
 12  starting_pitcher_era   0 non-null      object 
 13  ppg_vs_hand            547 non-null    float64
 14  fantasy_points         547 non-null    float64
dtypes:

In [183]:
batter_df = batter_df[100:]
X = batter_df.drop(columns=['fantasy_points', 'playerID', 'starting_pitcher_hand', 'starting_pitcher_era'])
y = batter_df['fantasy_points']

ss = StandardScaler()
ss.fit(X)
X = ss.transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [184]:
xg_reg = xgb.XGBRegressor(learning_rate =.1, max_depth=2)
xg_reg.fit(X_train, y_train)
y_hat = xg_reg.predict(X_test)
mean_squared_error(y_test, y_hat)**.5

11.121104274942795

In [187]:
mean_ = np.full((y_test.shape[0],), 10.58975)
mean_squared_error(y_test, mean_)**.5

11.323747178023082

In [186]:
line = LinearRegression()
line.fit(X_train, y_train)
y_hat = line.predict(X_test)
mean_squared_error(y_test, y_hat)**.5


11.231828206198717