In [1]:
import pandas as pd
import pybaseball as pyb
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from copy import copy
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, confusion_matrix, multilabel_confusion_matrix
from sklearn import metrics
import plotly.express as px

In [2]:
#Rapsodo : Savant

gloss = {
    'Velocity' : 'release_speed'
    ,'Total Spin' : 'release_spin_rate'
    ,'True Spin (release)' : 'N/Eq'
    ,'Spin Efficiency (release)' : 'N/Eq'
    ,'Spin Direction' :'*spin_axis'
    ,'HB (trajectory)' : 'pfx_x'
    ,'VB (trajectory)' : 'pfx_z'
    ,'Release Height' : 'release_extension'
    ,'Release Side' : 'N/Eq'
    ,'Release Angle' : 'release_pos_z'
    ,'Horizontal Angle' : 'release_pos_x'
    ,'Gyro Degree (deg)' : 'N/Eq'
    ,'Pitch Type' :'pitch_type'
    ,'Is Strike' : 'Strike',
    'Player Name' : 'player_name'
}

gloss2 = {
    'Velo' : 'release_speed'
    ,'Total S' : 'release_spin_rate'
    ,'True S' : 'N/Eq'
    ,'S.Eff' : 'N/Eq'
    ,'S.Dir' :'*spin_axis'
    ,'HB' : 'pfx_x'
    ,'VB' : 'pfx_z'
    ,'R.Hei' : 'release_extension'
    ,'R.Side' : 'N/Eq'
    ,'R.Ang' : 'release_pos_z'
    ,'H.Ang' : 'release_pos_x'
    ,'Gyro' : 'N/Eq'
    ,'P.Type' :'pitch_type'
}

#rapsodo id:player name

pitcher_id_dict = {
    1035163:'Hayden_Smith',
    309927:'James_Quinlivan',
    1032797:'Aiven_Cabral',
    934057:'Brett_Dunham',
    309954:'Nicholas_Davis',
    935304:'Jack_Beauchesne',
    1032804:'Charlie_Walker',
    1032799:'Ryan_Griffin',
    722490:'Will_Jones',
    1032801:'Carson_Walsh',
    722496:'Michael_Gemma',
    309933:'Matt_Downing',
    935609:'Sean_Quinlivan',
    722491:'Luke_Bottger',
    309940:'Owen_Langan',
}

#Pitch types to numbers
pitchtype = {'CH':5, 'FB':1, 'FA':1, 'SL':3, 'CU':2, 'FF':1, 'FS':1,
            'Fastball':1, 'CurveBall':2, 'Slider':3, '-':float('nan'), 'ChangeUp':5
             }


In [3]:
def clean_rapsodo_df(csv):

    all_seasions = pd.read_csv(csv)

    #change pitches to numbers
    all_seasions['Pitch Type'] = all_seasions['Pitch Type'].map(pitchtype)

    for col in ['Velocity','Total Spin','HB (trajectory)','VB (trajectory)','Release Height','Release Angle',
                'Horizontal Angle','Pitch Type']:
        for i in range(len(all_seasions[col])):
            if all_seasions[col][i] == '-':
                all_seasions[col][i] = float('nan')
            else:
                all_seasions[col][i] = float(all_seasions[col][i])
    
    #drop commas and change str to int
    all_seasions['Total Spin'] = all_seasions['Total Spin'].astype(float)
    all_seasions['Velocity'] = all_seasions['Velocity'].astype(float)
    all_seasions['Release Angle'] = all_seasions['Release Angle'].astype(float)
    all_seasions['VB (trajectory)'] = all_seasions['VB (trajectory)'].astype(float)
    all_seasions['HB (trajectory)'] = all_seasions['HB (trajectory)'].astype(float)


    #Rapsodo velo adjuster 
    all_seasions['Velocity'] = all_seasions['Velocity'].apply(lambda x: x*1.0919273389060635)

    #rename columns to match savant data. N/Eq means no equivalent 
    all_seasions = pd.DataFrame(all_seasions).rename(columns=gloss)
    
    all_seasions = all_seasions[['player_name','release_speed','release_spin_rate','*spin_axis','pfx_x','pfx_z',
                                 'Strike','release_extension','release_pos_z','release_pos_x','pitch_type']]
    
    all_seasions.dropna(inplace=True)
    return all_seasions
'''takes in rapsodo data, a players name, a pitch type, and k and outputs the mlb pitcher who is simlar to
the plurality of the players pitches'''

def knn_rapsodo(rapsodo_csv ,player, pitch_type):
    
    # KNN Classifier Cross-Validation 
    # code commented out because of long run-time
    x_train = mlb_metrics.sample(frac = .75)[features+ ['player_pitch']]
    x_test = mlb_metrics.drop(x_train.index)[features + ['player_pitch']]
    test_full = x_test

    y_train = x_train['player_pitch'].astype('str')
    y_test = x_test['player_pitch'].astype('str')

    x_train, x_test = scale_train_test_x(
        x_train[features].reset_index().drop(columns=['index']), 
        x_test[features].reset_index().drop(columns=['index'])
    )
    scores_list = []

    for neighbors in range(1, 100):
        regressor = KNeighborsClassifier(n_neighbors=neighbors)
        regressor.fit(x_train, y_train)
        y_pred_test = regressor.predict(x_test)
        accuracy, error, precision, recall, f1 = get_metrics(y_test, y_pred_test)
        if len(scores_list) != 0 and accuracy > max(scores_list):
            optimal_number_neighbors = neighbors
        scores_list.append(accuracy)
    
    k = optimal_number_neighbors
    
    rapsodo = clean_rapsodo_df(rapsodo_csv)
    player_df = rapsodo.loc[rapsodo['player_name'] == player]
    
    #take the players pitches of the selected pitch type and drop un-needed rows
    player_df['player_pitch'] = player_df['player_name']
    player_pitch_df = player_df.loc[player_df['pitch_type'] == pitch_type]
    player_pitch_df = player_pitch_df[['player_pitch','pitch_type','release_speed', 'release_pos_x',
       'release_pos_z','release_spin_rate','pfx_x',
       'pfx_z']]
    player_pitch_df.dropna(inplace=True)
    
    #combine dataframes to scale properly
    mlb_nu_pitchers = pd.concat([mlb_metrics,player_pitch_df])
    mlb_nu_pitchers_num_scaled = mlb_nu_pitchers[['release_speed','pfx_z','pfx_x']].dropna()
    for feat in mlb_nu_pitchers_num_scaled.columns:
        mlb_nu_pitchers_num_scaled[f'{feat}_scaled'] = (mlb_nu_pitchers_num_scaled[feat] -
                                                        mlb_nu_pitchers_num_scaled[feat].mean() 
                                                        / mlb_nu_pitchers_num_scaled[feat].std())

    #add names to the scaled df
    mlb_nu_pitchers_num_scaled['player_pitch'] = mlb_nu_pitchers['player_pitch']

    #seperate the pitcher from the mlb pitches to not count self in knn
    mlb_pitchers_scaled = mlb_nu_pitchers_num_scaled[:-(player_pitch_df.shape[0])]
    nu_pitcher_scaled = mlb_nu_pitchers_num_scaled[-player_pitch_df.shape[0]:] 
    k = k # how many nearest neighbors to use
    x_feat_list = ['release_speed_scaled', 'pfx_x_scaled','pfx_z_scaled']
    y_feat = 'player_pitch'

    x = mlb_pitchers_scaled.loc[:, x_feat_list].values
    y_true = mlb_pitchers_scaled.loc[:, y_feat].values

    # initialize a knn_classifier
    knn_classifier = KNeighborsClassifier(n_neighbors=k)

    # fit happens "inplace", we modify the internal state of knn_classifier to remember all the training samples
    knn_classifier.fit(x,y_true)

    # estimate each pitchers's comp
    y_pred = knn_classifier.predict(nu_pitcher_scaled.loc[:, x_feat_list].values)
    
    # zip together list of (truth, predict) pairs
    #true_pred_list = list(zip(y_true, y_pred))
    #print(true_pred_list[:player_pitches])
    return y_pred, mlb_nu_pitchers_num_scaled
def scale_train_test_x(train, test, scaler = None):
    if scaler is None:
        scaler = StandardScaler()
    scaler.fit(train)
    train = scaler.transform(train)
    test = scaler.transform(test)
    return train, test
def get_metrics(test, predict):
    accuracy = metrics.accuracy_score(test, predict)
    error = 1-accuracy
    precision = metrics.precision_score(test, predict, average = 'macro')
    recall = metrics.recall_score(test, predict, average = 'macro')
    f1 = metrics.f1_score(test, predict, average = 'macro')
    
    return accuracy, error, precision, recall, f1
#takes in a rapsodo csv and features to be used in the random forest
#returns a data frame of the rapsodo info and adds [player_comp] which is the indivual pitches mlb comp 

def random_forest_rapsodo(rapsodo,features):
    
    x_train = mlb_metrics.sample(frac = .75)[features+ ['player_pitch']]
    x_test = mlb_metrics.drop(x_train.index)[features + ['player_pitch']]
    test_full = x_test

    y_train = x_train['player_pitch'].astype('str')
    y_test = x_test['player_pitch'].astype('str')

    x_train, x_test = scale_train_test_x(
        x_train[features].reset_index().drop(columns=['index']), 
        x_test[features].reset_index().drop(columns=['index'])
    )

    #cross val descion tree

    scores_list = []

    for depth in range(1, 25):
        regressor = DecisionTreeClassifier(random_state=0, max_depth = depth)
        regressor.fit(x_train, y_train)
        y_pred_test = regressor.predict(x_test)
        accuracy, error, precision, recall, f1 = get_metrics(y_test, y_pred_test)
        if len(scores_list) != 0 and accuracy > max(scores_list):
            optimal_depth_ctree = depth
        scores_list.append(accuracy)

    x_train = mlb_metrics[features]
    y_train = mlb_metrics['player_pitch']

    classifier = KNeighborsClassifier(n_neighbors=optimal_depth_ctree)
    classifier.fit(x_train, y_train)

    rapsodo_df =  clean_rapsodo_df(rapsodo)
    rapsodo_df['player_comp'] = classifier.predict(rapsodo_df[features])
    
    return rapsodo_df


In [4]:
#import every pitch of the 2022 season
season22 = pyb.statcast(start_dt='2022-04-05', end_dt='2022-10-07')
#only keep important metrics
mlb_metrics = season22[['player_name','release_speed','release_pos_x','release_pos_z','release_spin_rate','release_extension','effective_speed','spin_axis','pfx_z','pfx_x','pitch_type']]
mlb_metrics.dropna(inplace=True)

#sort values in alphabetical order for ease when looking at 3d plot
mlb_metrics = mlb_metrics.sort_values('player_name', ascending = True)

#change the movement numbers from feet to inches 
mlb_metrics['pfx_x'] *= 12
mlb_metrics['pfx_z'] *= 12

#change pitch type from letters to numbers
mlb_metrics['player_pitch'] = mlb_metrics['player_name']+' '+mlb_metrics['pitch_type']

mlb_metrics = mlb_metrics.reset_index()

#find which pitcher threw at least 100 pitches
pitch_count = mlb_metrics.groupby('player_name').count()
pitch_count = pitch_count.where(pitch_count > 100)

#save those who have qualified and save as a list
qualified = pitch_count[pitch_count['release_speed'].notna()]
qualified_lst = list(qualified['pfx_x'].keys())

#create df without unqualified pitchers 
mlb_metrics_qual = mlb_metrics[mlb_metrics['player_name'].isin(qualified_lst)]
rapsodo = 'pitchinggroup.csv'
features = ['release_speed','pfx_z','pfx_x']

rapsodo_df = random_forest_rapsodo(rapsodo,features)

comps, mlb_nu_pitchers_num_scaled = knn_rapsodo(rapsodo,'Hayden Smith', 3)


This is a large query, it may take a moment to complete


100%|█████████████████████████████████████████| 186/186 [10:47<00:00,  3.48s/it]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mlb_metrics.dropna(inplace=True)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(averag

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  X = check_array(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentati

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=

  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  _warn_prf(average, modifier, msg_start, len(result))


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_seasions[col][i] = float(all_seasions[col][i])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_seasions[col][i] = float('nan')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  player_df['player_pitch'] = player_df['player_name']
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


In [6]:
pd.set_option('display.max_rows', 500)
rapsodo_df[['player_name','pitch_type','player_comp']].groupby(['player_name','pitch_type']).max()

Unnamed: 0_level_0,Unnamed: 1_level_0,player_comp
player_name,pitch_type,Unnamed: 2_level_1
Brett Dunham,1.0,"Zimmermann, Bruce CH"
Brett Dunham,2.0,"Valdez, Framber CU"
Brett Dunham,3.0,"Zerpa, Angel SL"
Brett Dunham,5.0,"Young, Alex SI"
Carson Walsh,1.0,"Zimmermann, Bruce CH"
Carson Walsh,3.0,"Wood, Alex SL"
Carson Walsh,5.0,"Zimmermann, Bruce CH"
Charlie Walker,1.0,"Wisler, Matt SL"
Charlie Walker,2.0,"Thompson, Zack CU"
Charlie Walker,3.0,"Ray, Robbie SL"
