In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
import xgboost as xgb
import sklearn.metrics as metrics
import datetime

In [2]:
inputfolder = 'data'
datadf = pd.read_csv(os.path.join(inputfolder,"final_match_data.csv"))
datadf['datetime'] = pd.to_datetime(datadf['datetime'])
with open(os.path.join(inputfolder,'datacolumns.txt'),'r') as f:
    content = f.readlines()
datacols = [x.strip() for x in content]
print(datadf.dtypes)
display(datadf.head())

atpdata = pd.read_csv(os.path.join(inputfolder,"ATP_matches.csv"))
cols = atpdata.drop(['Winner','Loser','Tournament','Tournament_Date','Court_Surface','Round_Description'],axis=1).columns
for col in cols:
    atpdata.loc[:,col]=pd.to_numeric(atpdata[col],errors='coerce')
print(atpdata.dtypes)
atpdata['datetime'] = pd.to_datetime(atpdata['Tournament_Date'])
atpdata['year'] = pd.DatetimeIndex(atpdata['datetime']).year
def fillrank(row,col,ATPframe):
    if np.isnan(row[col]):
            year = row['year']
            tournament = row['Tournament']
            surface = row['Court_Surface']
            yeardf = ATPframe[ATPframe['year']==year] #i can do this slicing in one line, but im doing in two to allow for easier debugging
            tourndf = ATPframe[(ATPframe['Tournament']==tournament)&(ATPframe['Court_Surface']==surface)]
            maxrank = int(max([tourndf['Winner_Rank'].max(),tourndf['Loser_Rank'].max()]))
            return maxrank+1
    else:
        return row[col]
atpdata.loc[:,'Winner_Rank'] = atpdata.apply(fillrank,axis=1,args=('Winner_Rank',atpdata))
atpdata.loc[:,'Loser_Rank'] = atpdata.apply(fillrank,axis=1,args=('Loser_Rank',atpdata))

display(atpdata.head())

#only going to use matches from 2014 onwards, not ussing matches from 2012-13 so we have enough historic data for matches in 2014
atpHardOnly = atpdata.loc[(atpdata['Court_Surface']=='Hard')&(atpdata['year']>2013),['Winner','Loser','datetime']]
frame1,frame2 = train_test_split(atpHardOnly,test_size=.5)
frame1.columns = ['Player1','Player2','datetime']
frame1['Player1Win'] = 1

frame2.columns = ['Player2','Player1','datetime']
frame2['Player1Win'] = 0

traininput = pd.concat([frame1,frame2])
traininput.sort_index(inplace=True)
print(traininput['Player1Win'].mean())
display(traininput.head())

# splitting into X and Y
X = traininput.drop('Player1Win',axis=1)
Y = traininput['Player1Win']

Player                        object
Rank                         float64
Sets_Won                     float64
Games_Won                      int64
Aces                         float64
DoubleFaults                 float64
FirstServes_Won              float64
FirstServes_In               float64
SecondServes_Won             float64
SecondServes_In              float64
BreakPoints_Won              float64
BreakPoints                  float64
ReturnPoints_Won             float64
ReturnPoints_Faced           float64
TotalPoints_Won                int64
won_game?                      int64
FirstServes_ratio            float64
SecondServes_ratio           float64
BreakPoints_ratio            float64
ReturnPoints_ratio           float64
datetime              datetime64[ns]
year                           int64
Tournament                    object
Round_Description             object
Court_Surface                 object
Total_Serves                 float64
Aces%                        float64
S

Unnamed: 0,Player,Rank,Sets_Won,Games_Won,Aces,DoubleFaults,FirstServes_Won,FirstServes_In,SecondServes_Won,SecondServes_In,...,BreakPoints_ratio,ReturnPoints_ratio,datetime,year,Tournament,Round_Description,Court_Surface,Total_Serves,Aces%,ServesWon%
0,Edouard Roger-Vasselin,106.0,2.0,12,5.0,2.0,22.0,30.0,12.0,19.0,...,0.571429,0.423729,2012-01-02,2012,Chennai,First Round,Hard,49.0,0.102041,0.693878
1,Dudi Sela,83.0,2.0,12,2.0,0.0,14.0,17.0,11.0,16.0,...,0.428571,0.62069,2012-01-02,2012,Chennai,First Round,Hard,33.0,0.060606,0.757576
2,Go Soeda,120.0,2.0,19,6.0,1.0,48.0,64.0,19.0,39.0,...,0.454545,0.4,2012-01-02,2012,Chennai,First Round,Hard,103.0,0.058252,0.650485
3,Yuki Bhambri,345.0,2.0,12,1.0,2.0,22.0,29.0,9.0,17.0,...,0.384615,0.548387,2012-01-02,2012,Chennai,First Round,Hard,46.0,0.021739,0.673913
4,Yuichi Sugita,235.0,2.0,12,3.0,1.0,37.0,51.0,11.0,27.0,...,0.428571,0.407407,2012-01-02,2012,Chennai,First Round,Hard,78.0,0.038462,0.615385


  interactivity=interactivity, compiler=compiler, result=result)


Winner                        object
Loser                         object
Tournament                    object
Tournament_Date               object
Court_Surface                 object
Round_Description             object
Winner_Rank                  float64
Loser_Rank                   float64
Retirement_Ind                 int64
Winner_Sets_Won              float64
Winner_Games_Won               int64
Winner_Aces                  float64
Winner_DoubleFaults          float64
Winner_FirstServes_Won       float64
Winner_FirstServes_In        float64
Winner_SecondServes_Won      float64
Winner_SecondServes_In       float64
Winner_BreakPoints_Won       float64
Winner_BreakPoints           float64
Winner_ReturnPoints_Won      float64
Winner_ReturnPoints_Faced    float64
Winner_TotalPoints_Won         int64
Loser_Sets_Won               float64
Loser_Games_Won                int64
Loser_Aces                   float64
Loser_DoubleFaults           float64
Loser_FirstServes_Won        float64
L

Unnamed: 0,Winner,Loser,Tournament,Tournament_Date,Court_Surface,Round_Description,Winner_Rank,Loser_Rank,Retirement_Ind,Winner_Sets_Won,...,Loser_FirstServes_In,Loser_SecondServes_Won,Loser_SecondServes_In,Loser_BreakPoints_Won,Loser_BreakPoints,Loser_ReturnPoints_Won,Loser_ReturnPoints_Faced,Loser_TotalPoints_Won,datetime,year
0,Edouard Roger-Vasselin,Eric Prodon,Chennai,02-Jan-12,Hard,First Round,106.0,97.0,0,2.0,...,33.0,13.0,26.0,1.0,3.0,15.0,49.0,49,2012-01-02,2012
1,Dudi Sela,Fabio Fognini,Chennai,02-Jan-12,Hard,First Round,83.0,48.0,0,2.0,...,32.0,5.0,26.0,0.0,1.0,8.0,33.0,30,2012-01-02,2012
2,Go Soeda,Frederico Gil,Chennai,02-Jan-12,Hard,First Round,120.0,102.0,0,2.0,...,70.0,18.0,35.0,2.0,4.0,36.0,103.0,99,2012-01-02,2012
3,Yuki Bhambri,Karol Beck,Chennai,02-Jan-12,Hard,First Round,345.0,101.0,0,2.0,...,33.0,13.0,29.0,2.0,3.0,15.0,46.0,43,2012-01-02,2012
4,Yuichi Sugita,Olivier Rochus,Chennai,02-Jan-12,Hard,First Round,235.0,67.0,0,2.0,...,32.0,13.0,22.0,1.0,7.0,30.0,78.0,62,2012-01-02,2012


0.4999257609502598


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.




Unnamed: 0,Player1,Player1Win,Player2,datetime
5893,Albert Ramos-Vinolas,0,Edouard Roger-Vasselin,2014-01-01
5894,Aleksandr Nedovyesov,0,Guillermo Garcia-Lopez,2014-01-01
5895,Henri Laaksonen,0,Aljaz Bedene,2014-01-01
5896,Jeevan Nedunchezhiyan,0,Jiri Vesely,2014-01-01
5897,Benjamin Becker,1,Julian Reister,2014-01-01


In [3]:
#defining preprocessing functions (from wrangleDataAttempt2)
def days_difference(date1,date2):
            diff = date2-date1
            return diff.days

def get_player_stats(inputframe,harddf,datacols,playercol):
    #idea is to pass a panda dataframe with columns [playername, tournamentdate] and be able to return a dataframe with the stats
    length = inputframe.shape[0]
    historicnames = ['careeravg_'+x for x in datacols]
    ytdnames = ['ytd_'+x for x in datacols]
    colnames = [playercol,'datetime','Rank','ytd_Aces%_Stddev','ytd_ServesWon%_Stddev'] + historicnames+ytdnames
    outputframe = pd.DataFrame(index=range(0,length),columns=colnames)
    outputframe.loc[:,playercol] = inputframe[playercol]
    outputframe.loc[:,'datetime'] = inputframe['datetime']
    for index in range(0,length):
        #extract player name and date of game
        row = inputframe.iloc[index,:]
        playername = row[playercol]
        date = row['datetime']
        
        #grab only data for that player before that date
        tempdf = harddf[harddf['Player']==playername]
        tempdf.loc[:,'timedelta'] = tempdf['datetime'].apply(days_difference,args=(date,))
        tempdf = tempdf[tempdf['timedelta']>0]
        if tempdf.empty:
            continue
        ytddf = tempdf.loc[tempdf['timedelta']<=365]
#         display(ytddf.head())
        currank = tempdf.loc[tempdf['timedelta'].idxmin(),'Rank']
        historicframe = tempdf.loc[:,datacols].mean()
        historicframe.index = historicnames
        YTDframe = ytddf.loc[:,datacols].mean()
        YTDframe.index = ytdnames
        
        #code for debugging NaNs
#         if tempdf.isna().sum().sum()>0:
#             print(playername)
#             print("ytdframe shape: {}, missing values: {}".format(ytddf.shape,ytddf.isna().sum().sum()))
#             print(ytddf.isna().sum())
#             print("careerdf shape: {}, missing values: {}".format(tempdf.shape,tempdf.isna().sum().sum()))
#             print(tempdf.isna().sum())

        outputframe.loc[index,['Rank','ytd_Aces%_Stddev','ytd_ServesWon%_Stddev']] = [currank,ytddf['Aces%'].std(),ytddf['ServesWon%'].std()]
        outputframe.loc[index,historicnames]=historicframe
        outputframe.loc[index,ytdnames]=YTDframe
    return outputframe

def get_difference(frame1,frame2,colname=None, prefix1='',prefix2='',index1=None,index2=None):
    #check if index between frames are identical, if not, reset both
    idx_check = frame1.index.equals(frame2.index)
    if not(idx_check):
        frame1.reset_index(drop=True, inplace=True)
        frame2.reset_index(drop=True, inplace=True)
    if colname is None:
        colname = frame1.columns
    if frame1.shape[0] != frame2.shape[0]:
        raise ValueError('both frames must contain the same number of rows/n Frame1: %{}/mn Frame2: %{}'.format(frame1.shape[0],frame2.shape[0]))
    tempdiffcolnames = [x+"_diff" for x in colname]
    diffcolnames = list()
    if index1 is not None:
        diffcolnames.append(index1)
    if index2 is not None:
        diffcolnames.append(index2)
    diffcolnames = diffcolnames + tempdiffcolnames
    if idx_check:
        difframe = pd.DataFrame(index=frame1.index,columns=diffcolnames,data=0)
    else:
        difframe = pd.DataFrame(index=range(0,frame1.shape[0]),columns=diffcolnames,data=0)
    if index1 is not None:
        difframe[index1] = frame1[index1]
    if index2 is not None:
        difframe[index2] = frame2[index2]
    for col in colname:
        frame1name = prefix1+col
        frame2name = prefix2+col
        difframename = col+"_diff"
        difframe[difframename]=frame1[frame1name] - frame2[frame2name]
    return difframe

def convert_to_data(inputframe,matchdata,datacols,player1colname="Player1",player2colname="Player2"):
    inputframe.reset_index(drop=True,inplace=True)
    
    player1frame = inputframe.loc[:,[player1colname,'datetime']]
    player2frame = inputframe.loc[:,[player2colname,'datetime']]
    
    player1df = get_player_stats(player1frame,matchdata,datacols,player1colname)
    player2df = get_player_stats(player2frame,matchdata,datacols,player2colname)
    
    #append columns indicating whether this is a players first recorded Hard surface ATPgame (if it's their first game, Rank will return NaNs)
    player1df['Player1fg'] = player1df['Rank'].isna()
    player2df['Player2fg'] = player2df['Rank'].isna()
    
#     player1df = player1df[~player1df['Player1fg']].fillna(0)
#     player2df = player2df[~player2df['Player2fg']].fillna(0)
#     if player1df.isna().sum().sum() >0:
#         display(player1df)
    
    
    #for debugging purposes
    if player1df.shape[0] != player2df.shape[0]:
        return (player1df,player2df)
    
    newdatacols = player1df.drop([player1colname,'datetime','Player1fg'],axis=1).columns
    outputdf = get_difference(player1df,player2df,newdatacols,index1=player1colname,index2=player2colname)
    outputdf.loc[:,'Player1fg'] = player1df['Player1fg']
    outputdf.loc[:,'Player2fg'] = player2df['Player2fg']
    outputdf.loc[:,'datetime'] = player1df['datetime']
    
    return outputdf.infer_objects() #infer_objects soft converts object columns to their correct types

# tempX = X.iloc[:15,:]
# tempX = convert_to_data(tempX,datadf,datacols)
# print(tempX.dtypes)

X = convert_to_data(X,datadf,datacols)
print(X.dtypes)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


Player1                                      object
Player2                                      object
Rank_diff                                   float64
ytd_Aces%_Stddev_diff                       float64
ytd_ServesWon%_Stddev_diff                  float64
careeravg_Sets_Won_diff                     float64
careeravg_Games_Won_diff                    float64
careeravg_Aces_diff                         float64
careeravg_DoubleFaults_diff                 float64
careeravg_FirstServes_Won_diff              float64
careeravg_FirstServes_In_diff               float64
careeravg_SecondServes_Won_diff             float64
careeravg_SecondServes_In_diff              float64
careeravg_BreakPoints_Won_diff              float64
careeravg_BreakPoints_diff                  float64
careeravg_ReturnPoints_Won_diff             float64
careeravg_ReturnPoints_Faced_diff           float64
careeravg_TotalPoints_Won_diff              float64
careeravg_won_game?_diff                    float64
careeravg_Fi

In [4]:
#training and then testing a basic model
from sklearn.model_selection import GridSearchCV
fullX = X.copy()
# X.drop(['Player1','Player2','datetime'],axis=1,inplace=True)
Xtrain,Xtest,ytrain,ytest = train_test_split(X,Y,test_size=.3)
params ={'max_depth':[4,5,6],
         'n_estimators':[100,200,300],
         'learning_rate':[.01,.03,.1]
        }
estimator = xgb.XGBClassifier()
tuneparam = GridSearchCV(estimator,params,n_jobs=-1)
print("tuning model")
tuneparam.fit(Xtrain,ytrain)

model = tuneparam.best_estimator_
best_params = tuneparam.best_params_
def get_scores(predy,ytest):
    import sklearn.metrics as metrics
    acc = metrics.accuracy_score(ytest,predy)
    prec = metrics.precision_score(ytest,predy)
    recall = metrics.recall_score(ytest,predy)
    auc = metrics.roc_auc_score(ytest,predy)
    return {'accuracy':acc,'precision':prec,'recall':recall,'auc':auc}
score = model.score(Xtest,ytest)
predy = model.predict(Xtest)

scoresdict = get_scores(predy,ytest)
print("="*200)
print("Score: {}".format(score))
for key,value in scoresdict.items():
    print("{}: {}".format(key,value))

print("="*200)
for idx, col in enumerate(Xtrain.columns.tolist()):
    print("{} importance: {}".format(col,model.feature_importances_[idx]))


tuning model


JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
C:\Users\Hugh\Anaconda3\lib\runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    188         sys.exit(msg)
    189     main_globals = sys.modules["__main__"].__dict__
    190     if alter_argv:
    191         sys.argv[0] = mod_spec.origin
    192     return _run_code(code, main_globals, None,
--> 193                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py')
    194 
    195 def run_module(mod_name, init_globals=None,
    196                run_name=None, alter_sys=False):
    197     """Execute a module's code without importing it

...........................................................................
C:\Users\Hugh\Anaconda3\lib\runpy.py in _run_code(code=<code object <module> at 0x000002B4E946DDB0, fil...lib\site-packages\ipykernel_launcher.py", line 5>, run_globals={'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\Users\Hugh\Anaconda3\lib\site-packages\__pycache__\ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\Hugh\Anaconda3\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\Users\\H...a3\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x000002B4E946DDB0, fil...lib\site-packages\ipykernel_launcher.py", line 5>
        run_globals = {'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\Users\Hugh\Anaconda3\lib\site-packages\__pycache__\ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\Hugh\Anaconda3\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\Users\\H...a3\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\traitlets\config\application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\ipykernel\kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    481         if self.poller is not None:
    482             self.poller.start()
    483         self.kernel.start()
    484         self.io_loop = ioloop.IOLoop.current()
    485         try:
--> 486             self.io_loop.start()
        self.io_loop.start = <bound method BaseAsyncIOLoop.start of <tornado.platform.asyncio.AsyncIOMainLoop object>>
    487         except KeyboardInterrupt:
    488             pass
    489 
    490 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\tornado\platform\asyncio.py in start(self=<tornado.platform.asyncio.AsyncIOMainLoop object>)
    122         except (RuntimeError, AssertionError):
    123             old_loop = None
    124         try:
    125             self._setup_logging()
    126             asyncio.set_event_loop(self.asyncio_loop)
--> 127             self.asyncio_loop.run_forever()
        self.asyncio_loop.run_forever = <bound method BaseEventLoop.run_forever of <_Win...EventLoop running=True closed=False debug=False>>
    128         finally:
    129             asyncio.set_event_loop(old_loop)
    130 
    131     def stop(self):

...........................................................................
C:\Users\Hugh\Anaconda3\lib\asyncio\base_events.py in run_forever(self=<_WindowsSelectorEventLoop running=True closed=False debug=False>)
    417             sys.set_asyncgen_hooks(firstiter=self._asyncgen_firstiter_hook,
    418                                    finalizer=self._asyncgen_finalizer_hook)
    419         try:
    420             events._set_running_loop(self)
    421             while True:
--> 422                 self._run_once()
        self._run_once = <bound method BaseEventLoop._run_once of <_Windo...EventLoop running=True closed=False debug=False>>
    423                 if self._stopping:
    424                     break
    425         finally:
    426             self._stopping = False

...........................................................................
C:\Users\Hugh\Anaconda3\lib\asyncio\base_events.py in _run_once(self=<_WindowsSelectorEventLoop running=True closed=False debug=False>)
   1427                         logger.warning('Executing %s took %.3f seconds',
   1428                                        _format_handle(handle), dt)
   1429                 finally:
   1430                     self._current_handle = None
   1431             else:
-> 1432                 handle._run()
        handle._run = <bound method Handle._run of <Handle IOLoop._run_callback(functools.par...02B4EF202378>))>>
   1433         handle = None  # Needed to break cycles when an exception occurs.
   1434 
   1435     def _set_coroutine_wrapper(self, enabled):
   1436         try:

...........................................................................
C:\Users\Hugh\Anaconda3\lib\asyncio\events.py in _run(self=<Handle IOLoop._run_callback(functools.par...02B4EF202378>))>)
    140             self._callback = None
    141             self._args = None
    142 
    143     def _run(self):
    144         try:
--> 145             self._callback(*self._args)
        self._callback = <bound method IOLoop._run_callback of <tornado.platform.asyncio.AsyncIOMainLoop object>>
        self._args = (functools.partial(<function wrap.<locals>.null_wrapper at 0x000002B4EF202378>),)
    146         except Exception as exc:
    147             cb = _format_callback_source(self._callback, self._args)
    148             msg = 'Exception in callback {}'.format(cb)
    149             context = {

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\tornado\ioloop.py in _run_callback(self=<tornado.platform.asyncio.AsyncIOMainLoop object>, callback=functools.partial(<function wrap.<locals>.null_wrapper at 0x000002B4EF202378>))
    754         """Runs a callback with error handling.
    755 
    756         For use in subclasses.
    757         """
    758         try:
--> 759             ret = callback()
        ret = undefined
        callback = functools.partial(<function wrap.<locals>.null_wrapper at 0x000002B4EF202378>)
    760             if ret is not None:
    761                 from tornado import gen
    762                 # Functions that return Futures typically swallow all
    763                 # exceptions and store them in the Future.  If a Future

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=(), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = ()
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in <lambda>()
    531             return
    532 
    533         if state & self.socket.events:
    534             # events still exist that haven't been processed
    535             # explicitly schedule handling to avoid missing events due to edge-triggered FDs
--> 536             self.io_loop.add_callback(lambda : self._handle_events(self.socket, 0))
    537 
    538     def _init_io_state(self):
    539         """initialize the ioloop event handler"""
    540         with stack_context.NullContext():

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=0)
    445             return
    446         zmq_events = self.socket.EVENTS
    447         try:
    448             # dispatch events:
    449             if zmq_events & zmq.POLLIN and self.receiving():
--> 450                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    451                 if not self.socket:
    452                     return
    453             if zmq_events & zmq.POLLOUT and self.sending():
    454                 self._handle_send()

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    475             else:
    476                 raise
    477         else:
    478             if self._recv_callback:
    479                 callback = self._recv_callback
--> 480                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    481         
    482 
    483     def _handle_send(self):
    484         """Handle a send event."""

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    427         close our socket."""
    428         try:
    429             # Use a NullContext to ensure that all StackContexts are run
    430             # inside our blanket exception handler rather than outside.
    431             with stack_context.NullContext():
--> 432                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    433         except:
    434             gen_log.error("Uncaught exception in ZMQStream callback",
    435                           exc_info=True)
    436             # Re-raise the exception so that IOLoop.handle_callback_exception

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\ipykernel\kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\ipykernel\kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': '#training and then testing a basic model\nfrom sk...{}".format(col,model.feature_importances_[idx]))\n', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2019, 1, 15, 5, 35, 5, 434814, tzinfo=tzutc()), 'msg_id': 'd5c49ec4d88a412890a6971103665657', 'msg_type': 'execute_request', 'session': '9d438e7ad965457787d7b8da8a9bcd30', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'd5c49ec4d88a412890a6971103665657', 'msg_type': 'execute_request', 'parent_header': {}})
    228             self.log.warn("Unknown message type: %r", msg_type)
    229         else:
    230             self.log.debug("%s: %s", msg_type, msg)
    231             self.pre_handler_hook()
    232             try:
--> 233                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'9d438e7ad965457787d7b8da8a9bcd30']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': '#training and then testing a basic model\nfrom sk...{}".format(col,model.feature_importances_[idx]))\n', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2019, 1, 15, 5, 35, 5, 434814, tzinfo=tzutc()), 'msg_id': 'd5c49ec4d88a412890a6971103665657', 'msg_type': 'execute_request', 'session': '9d438e7ad965457787d7b8da8a9bcd30', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'd5c49ec4d88a412890a6971103665657', 'msg_type': 'execute_request', 'parent_header': {}}
    234             except Exception:
    235                 self.log.error("Exception in message handler:", exc_info=True)
    236             finally:
    237                 self.post_handler_hook()

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\ipykernel\kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'9d438e7ad965457787d7b8da8a9bcd30'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': '#training and then testing a basic model\nfrom sk...{}".format(col,model.feature_importances_[idx]))\n', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2019, 1, 15, 5, 35, 5, 434814, tzinfo=tzutc()), 'msg_id': 'd5c49ec4d88a412890a6971103665657', 'msg_type': 'execute_request', 'session': '9d438e7ad965457787d7b8da8a9bcd30', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'd5c49ec4d88a412890a6971103665657', 'msg_type': 'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\ipykernel\ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code='#training and then testing a basic model\nfrom sk...{}".format(col,model.feature_importances_[idx]))\n', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    203 
    204         self._forward_input(allow_stdin)
    205 
    206         reply_content = {}
    207         try:
--> 208             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = '#training and then testing a basic model\nfrom sk...{}".format(col,model.feature_importances_[idx]))\n'
        store_history = True
        silent = False
    209         finally:
    210             self._restore_input()
    211 
    212         if res.error_before_exec is not None:

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\ipykernel\zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=('#training and then testing a basic model\nfrom sk...{}".format(col,model.feature_importances_[idx]))\n',), **kwargs={'silent': False, 'store_history': True})
    532             )
    533         self.payload_manager.write_payload(payload)
    534 
    535     def run_cell(self, *args, **kwargs):
    536         self._last_traceback = None
--> 537         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ('#training and then testing a basic model\nfrom sk...{}".format(col,model.feature_importances_[idx]))\n',)
        kwargs = {'silent': False, 'store_history': True}
    538 
    539     def _showtraceback(self, etype, evalue, stb):
    540         # try to preserve ordering of tracebacks and print statements
    541         sys.stdout.flush()

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='#training and then testing a basic model\nfrom sk...{}".format(col,model.feature_importances_[idx]))\n', store_history=True, silent=False, shell_futures=True)
   2657         -------
   2658         result : :class:`ExecutionResult`
   2659         """
   2660         try:
   2661             result = self._run_cell(
-> 2662                 raw_cell, store_history, silent, shell_futures)
        raw_cell = '#training and then testing a basic model\nfrom sk...{}".format(col,model.feature_importances_[idx]))\n'
        store_history = True
        silent = False
        shell_futures = True
   2663         finally:
   2664             self.events.trigger('post_execute')
   2665             if not silent:
   2666                 self.events.trigger('post_run_cell', result)

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in _run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='#training and then testing a basic model\nfrom sk...{}".format(col,model.feature_importances_[idx]))\n', store_history=True, silent=False, shell_futures=True)
   2780                 self.displayhook.exec_result = result
   2781 
   2782                 # Execute the user code
   2783                 interactivity = 'none' if silent else self.ast_node_interactivity
   2784                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2785                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2786                 
   2787                 self.last_execution_succeeded = not has_raised
   2788                 self.last_execution_result = result
   2789 

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.ImportFrom object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Expr object>, <_ast.Expr object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.FunctionDef object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Expr object>, <_ast.Expr object>, <_ast.For object>, <_ast.Expr object>, <_ast.For object>], cell_name='<ipython-input-4-129fe169110a>', interactivity='none', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 2b4eeab0668, executio...rue silent=False shell_futures=True> result=None>)
   2898 
   2899         try:
   2900             for i, node in enumerate(to_run_exec):
   2901                 mod = ast.Module([node])
   2902                 code = compiler(mod, cell_name, "exec")
-> 2903                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x000002B4F0520D20, file "<ipython-input-4-129fe169110a>", line 13>
        result = <ExecutionResult object at 2b4eeab0668, executio...rue silent=False shell_futures=True> result=None>
   2904                     return True
   2905 
   2906             for i, node in enumerate(to_run_interactive):
   2907                 mod = ast.Interactive([node])

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x000002B4F0520D20, file "<ipython-input-4-129fe169110a>", line 13>, result=<ExecutionResult object at 2b4eeab0668, executio...rue silent=False shell_futures=True> result=None>)
   2958         outflag = True  # happens in more places, so it's easier as default
   2959         try:
   2960             try:
   2961                 self.hooks.pre_run_code_hook()
   2962                 #rprint('Running code', repr(code_obj)) # dbg
-> 2963                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x000002B4F0520D20, file "<ipython-input-4-129fe169110a>", line 13>
        self.user_global_ns = {'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'import pandas as pd\nimport numpy as np\nimport os...import sklearn.metrics as metrics\nimport datetime', "inputfolder = 'data'\ndatadf = pd.read_csv(os.pat...'Player1Win',axis=1)\nY = traininput['Player1Win']", '#defining preprocessing functions (from wrangleD...onvert_to_data(X,datadf,datacols)\nprint(X.dtypes)', '#training and then testing a basic model\nfrom sk... {}".format(col,model.feature_importances_[idx]))'], 'Out': {}, 'X':                       Player1                 Pl...     False 2019-01-14  

[6735 rows x 50 columns], 'Xtest':                        Player1               Pla...     False 2015-10-05  

[2021 rows x 50 columns], 'Xtrain':                      Player1                Play...     False 2016-08-29  

[4714 rows x 50 columns], 'Y': 5893     0
5894     0
5895     0
5896     0
5897...   1
Name: Player1Win, Length: 6735, dtype: int64, '_': '', '__': '', '___': '', ...}
        self.user_ns = {'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'import pandas as pd\nimport numpy as np\nimport os...import sklearn.metrics as metrics\nimport datetime', "inputfolder = 'data'\ndatadf = pd.read_csv(os.pat...'Player1Win',axis=1)\nY = traininput['Player1Win']", '#defining preprocessing functions (from wrangleD...onvert_to_data(X,datadf,datacols)\nprint(X.dtypes)', '#training and then testing a basic model\nfrom sk... {}".format(col,model.feature_importances_[idx]))'], 'Out': {}, 'X':                       Player1                 Pl...     False 2019-01-14  

[6735 rows x 50 columns], 'Xtest':                        Player1               Pla...     False 2015-10-05  

[2021 rows x 50 columns], 'Xtrain':                      Player1                Play...     False 2016-08-29  

[4714 rows x 50 columns], 'Y': 5893     0
5894     0
5895     0
5896     0
5897...   1
Name: Player1Win, Length: 6735, dtype: int64, '_': '', '__': '', '___': '', ...}
   2964             finally:
   2965                 # Reset our crash handler in place
   2966                 sys.excepthook = old_excepthook
   2967         except SystemExit as e:

...........................................................................
C:\betfairproject\<ipython-input-4-129fe169110a> in <module>()
      8          'learning_rate':[.01,.03,.1]
      9         }
     10 estimator = xgb.XGBClassifier()
     11 tuneparam = GridSearchCV(estimator,params,n_jobs=-1)
     12 print("tuning model")
---> 13 tuneparam.fit(Xtrain,ytrain)
     14 
     15 model = tuneparam.best_estimator_
     16 best_params = tuneparam.best_params_
     17 def get_scores(predy,ytest):

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self=GridSearchCV(cv=None, error_score='raise',
     ...ain_score='warn',
       scoring=None, verbose=0), X=                     Player1                Play...     False 2016-08-29  

[4714 rows x 50 columns], y=20045    0
9570     1
11094    1
6531     0
1098...   0
Name: Player1Win, Length: 4714, dtype: int64, groups=None, **fit_params={})
    634                                   return_train_score=self.return_train_score,
    635                                   return_n_test_samples=True,
    636                                   return_times=True, return_parameters=False,
    637                                   error_score=self.error_score)
    638           for parameters, (train, test) in product(candidate_params,
--> 639                                                    cv.split(X, y, groups)))
        cv.split = <bound method StratifiedKFold.split of Stratifie...ld(n_splits=3, random_state=None, shuffle=False)>
        X =                      Player1                Play...     False 2016-08-29  

[4714 rows x 50 columns]
        y = 20045    0
9570     1
11094    1
6531     0
1098...   0
Name: Player1Win, Length: 4714, dtype: int64
        groups = None
    640 
    641         # if one choose to see train score, "out" will contain train score info
    642         if self.return_train_score:
    643             (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object BaseSearchCV.fit.<locals>.<genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Tue Jan 15 16:50:12 2019
PID: 1848                  Python 3.6.5: C:\Users\Hugh\Anaconda3\python.exe
...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<function _fit_and_score>, (XGBClassifier(base_score=0.5, booster='gbtree', ...ht=1, seed=None,
       silent=True, subsample=1),                      Player1                Play...e      False 2016-08-29

[4714 rows x 50 columns], 20045    0
9570     1
11094    1
6531     0
1098...   0
Name: Player1Win, Length: 4714, dtype: int64, {'score': <function _passthrough_scorer>}, array([1534, 1535, 1536, ..., 4711, 4712, 4713]), array([   0,    1,    2, ..., 1601, 1602, 1604]), 0, {'learning_rate': 0.01, 'max_depth': 4, 'n_estimators': 100}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (XGBClassifier(base_score=0.5, booster='gbtree', ...ht=1, seed=None,
       silent=True, subsample=1),                      Player1                Play...e      False 2016-08-29

[4714 rows x 50 columns], 20045    0
9570     1
11094    1
6531     0
1098...   0
Name: Player1Win, Length: 4714, dtype: int64, {'score': <function _passthrough_scorer>}, array([1534, 1535, 1536, ..., 4711, 4712, 4713]), array([   0,    1,    2, ..., 1601, 1602, 1604]), 0, {'learning_rate': 0.01, 'max_depth': 4, 'n_estimators': 100})
        kwargs = {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator=XGBClassifier(base_score=0.5, booster='gbtree', ...ht=1, seed=None,
       silent=True, subsample=1), X=                     Player1                Play...e      False 2016-08-29

[4714 rows x 50 columns], y=20045    0
9570     1
11094    1
6531     0
1098...   0
Name: Player1Win, Length: 4714, dtype: int64, scorer={'score': <function _passthrough_scorer>}, train=array([1534, 1535, 1536, ..., 4711, 4712, 4713]), test=array([   0,    1,    2, ..., 1601, 1602, 1604]), verbose=0, parameters={'learning_rate': 0.01, 'max_depth': 4, 'n_estimators': 100}, fit_params={}, return_train_score='warn', return_parameters=False, return_n_test_samples=True, return_times=True, error_score='raise')
    453 
    454     try:
    455         if y_train is None:
    456             estimator.fit(X_train, **fit_params)
    457         else:
--> 458             estimator.fit(X_train, y_train, **fit_params)
        estimator.fit = <bound method XGBClassifier.fit of XGBClassifier...t=1, seed=None,
       silent=True, subsample=1)>
        X_train =                      Player1                 Pla...e      False 2016-08-29

[3142 rows x 50 columns]
        y_train = 20006    1
12398    1
17748    1
20199    1
2058...   0
Name: Player1Win, Length: 3142, dtype: int64
        fit_params = {}
    459 
    460     except Exception as e:
    461         # Note fit time as time until error
    462         fit_time = time.time() - start_time

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\xgboost\sklearn.py in fit(self=XGBClassifier(base_score=0.5, booster='gbtree', ...ht=1, seed=None,
       silent=True, subsample=1), X=                     Player1                 Pla...e      False 2016-08-29

[3142 rows x 50 columns], y=20006    1
12398    1
17748    1
20199    1
2058...   0
Name: Player1Win, Length: 3142, dtype: int64, sample_weight=None, eval_set=None, eval_metric=None, early_stopping_rounds=None, verbose=True, xgb_model=None, sample_weight_eval_set=None, callbacks=None)
    688         if sample_weight is not None:
    689             train_dmatrix = DMatrix(X, label=training_labels, weight=sample_weight,
    690                                     missing=self.missing, nthread=self.n_jobs)
    691         else:
    692             train_dmatrix = DMatrix(X, label=training_labels,
--> 693                                     missing=self.missing, nthread=self.n_jobs)
        self.missing = nan
        self.n_jobs = 1
    694 
    695         self._Booster = train(xgb_options, train_dmatrix, self.n_estimators,
    696                               evals=evals,
    697                               early_stopping_rounds=early_stopping_rounds,

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\xgboost\core.py in __init__(self=<xgboost.core.DMatrix object>, data=                     Player1                 Pla...e      False 2016-08-29

[3142 rows x 50 columns], label=array([1, 1, 1, ..., 0, 0, 0], dtype=int64), missing=nan, weight=None, silent=False, feature_names=None, feature_types=None, nthread=1)
    358                 self._feature_types = feature_types
    359             return
    360 
    361         data, feature_names, feature_types = _maybe_pandas_data(data,
    362                                                                 feature_names,
--> 363                                                                 feature_types)
        feature_types = None
    364 
    365         data, feature_names, feature_types = _maybe_dt_data(data,
    366                                                             feature_names,
    367                                                             feature_types)

...........................................................................
C:\Users\Hugh\Anaconda3\lib\site-packages\xgboost\core.py in _maybe_pandas_data(data=                     Player1                 Pla...e      False 2016-08-29

[3142 rows x 50 columns], feature_names=None, feature_types=None)
    223         bad_fields = [data.columns[i] for i, dtype in
    224                       enumerate(data_dtypes) if dtype.name not in PANDAS_DTYPE_MAPPER]
    225 
    226         msg = """DataFrame.dtypes for data must be int, float or bool.
    227                 Did not expect the data types in fields """
--> 228         raise ValueError(msg + ', '.join(bad_fields))
        msg.join = <built-in method join of str object>
        bad_fields = ['Player1', 'Player2', 'datetime']
    229 
    230     if feature_names is None:
    231         if isinstance(data.columns, MultiIndex):
    232             feature_names = [

ValueError: DataFrame.dtypes for data must be int, float or bool.
                Did not expect the data types in fields Player1, Player2, datetime
___________________________________________________________________________

What about a model just trained using Ranked Diff? (curious to see how much predictive power rank has)

In [None]:
rankXtrain = Xtrain.loc[:,'Rank_diff'].values.reshape(Xtrain.shape[0],1)
rankXtest = Xtest.loc[:,'Rank_diff'].values.reshape(Xtest.shape[0],1)

# print(rankXtrain.shape)
params ={'max_depth':[4,5,6],
         'n_estimators':[100,200,300],
         'learning_rate':[.01,.03,.1]
        }
estimator = xgb.XGBClassifier()
tuneparam = GridSearchCV(estimator,params,n_jobs=-1)
print("tuning model")
tuneparam.fit(rankXtrain,ytrain)

model2 = tuneparam.best_estimator_
best_params2 = tuneparam.best_params_

score2 = model2.score(rankXtest,ytest)
predy2 = model2.predict(rankXtest)
scoresdict2 = get_scores(predy2,ytest)

print("="*200)
print("Score: {}".format(score2))
for key,value in scoresdict2.items():
    print("{}: {}".format(key,value))

print("="*200)
print("{} importance: {}".format(col,model2.feature_importances_))


In [None]:
print(best_params)
print(best_params2)