# Pitcher Recommender System
---
Using Current stats, salaries, and clusters, a recommender system was built using cosine distances

## Import Libraries
---

In [1]:
import pandas as pd
import numpy as np 
from scipy import sparse
import pickle

from sklearn.metrics.pairwise import pairwise_distances, cosine_distances, cosine_similarity

## Import Dataset

In [2]:
df = pd.read_csv('../data/clusters_pitch.csv').drop('Unnamed: 0', axis = 1)
df.head()

Unnamed: 0,MLBID,FIRSTNAME,LASTNAME,Player,Team,Age,W,L,ERA,IP,H,ER,HR,BB,K,WHIP,salary,cluster
0,472551,Fernando,Abad,Fernando Abad,BAL,35,0,0,5.6,17.2,23,11,1,7,10,1.7,570500,2
1,676265,Cory,Abbott,Cory Abbott,CHC,26,0,0,6.75,17.1,20,13,7,11,12,1.79,570500,2
2,642758,Domingo,Acevedo,Domingo Acevedo,OAK,27,0,0,3.27,11.0,9,4,3,4,9,1.18,570500,4
3,613534,Austin,Adams,Austin Adams,SD,30,3,2,4.1,52.2,28,24,1,35,76,1.2,580200,3
4,669211,Keegan,Akin,Keegan Akin,BAL,26,2,10,6.63,95.0,110,70,17,40,82,1.58,570500,1


In [3]:
df.drop(columns = ['MLBID', 'FIRSTNAME', 'LASTNAME', 'Team', 'Age'], inplace = True)
df.head()

Unnamed: 0,Player,W,L,ERA,IP,H,ER,HR,BB,K,WHIP,salary,cluster
0,Fernando Abad,0,0,5.6,17.2,23,11,1,7,10,1.7,570500,2
1,Cory Abbott,0,0,6.75,17.1,20,13,7,11,12,1.79,570500,2
2,Domingo Acevedo,0,0,3.27,11.0,9,4,3,4,9,1.18,570500,4
3,Austin Adams,3,2,4.1,52.2,28,24,1,35,76,1.2,580200,3
4,Keegan Akin,2,10,6.63,95.0,110,70,17,40,82,1.58,570500,1


## Create Pivot Table
---

In [4]:
pivot = pd.pivot_table(df, index = 'Player')
pivot.head()

Unnamed: 0_level_0,BB,ER,ERA,H,HR,IP,K,L,W,WHIP,cluster,salary
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
A.J. Minter,20,22,3.78,44,2,52.1,57,6,3,1.22,3,1300000
Aaron Bummer,29,22,3.51,42,3,56.1,75,5,5,1.26,3,2000000
Aaron Civale,31,53,3.84,108,23,124.1,99,5,12,1.12,1,587400
Aaron Loup,16,6,0.95,37,1,56.2,57,0,6,0.94,4,3250000
Aaron Nola,39,93,4.63,165,26,180.2,223,9,9,1.13,5,12250000


In [5]:
sparse_pivot = sparse.csr_matrix(pivot.fillna(0))

In [6]:
recommender = cosine_distances(sparse_pivot)   

In [7]:
recommender_df = pd.DataFrame(recommender, columns=pivot.index, index=pivot.index)
recommender_df.head()

Player,A.J. Minter,Aaron Bummer,Aaron Civale,Aaron Loup,Aaron Nola,Adam Cimber,Adam Conley,Adam Morgan,Adam Ottavino,Adam Wainwright,...,Yusei Kikuchi,Yusmeiro Petit,Zac Gallen,Zach Davies,Zach Eflin,Zach Plesac,Zach Thompson,Zack Greinke,Zack Littell,Zack Wheeler
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A.J. Minter,0.0,1.961996e-10,3.795234e-08,1.041981e-09,9.903665e-10,2.554967e-10,2.611688e-10,2.315017e-10,1.572462e-09,5.167375e-10,...,1.516489e-09,7.46797e-10,4.589242e-08,8.982589e-10,4.971836e-10,5.532474e-08,1.080162e-08,2.090494e-09,6.175909e-09,1.559037e-09
Aaron Bummer,1.961996e-10,0.0,4.340337e-08,3.962921e-10,3.783326e-10,2.247954e-10,1.03767e-10,5.324619e-11,7.380885e-10,1.666837e-10,...,7.116074e-10,3.804917e-10,5.157985e-08,3.786417e-10,2.030095e-10,6.197649e-08,1.376844e-08,1.118503e-09,8.329857e-09,7.318879e-10
Aaron Civale,3.795234e-08,4.340337e-08,0.0,5.112334e-08,5.071026e-08,4.219545e-08,4.344315e-08,4.3425e-08,5.451496e-08,4.652858e-08,...,5.408411e-08,4.742154e-08,2.466865e-09,4.953926e-08,4.597266e-08,2.243063e-09,8.743274e-09,5.718225e-08,1.449839e-08,5.441193e-08
Aaron Loup,1.041981e-09,3.962921e-10,5.112334e-08,0.0,2.664324e-11,5.130626e-10,3.467447e-10,3.715269e-10,7.806589e-11,1.189867e-10,...,7.010081e-11,1.725424e-10,6.063336e-08,9.14111e-11,1.889997e-10,7.086928e-08,1.840582e-08,2.085905e-10,1.210775e-08,6.582501e-11
Aaron Nola,9.903665e-10,3.783326e-10,5.071026e-08,2.664324e-11,0.0,4.979477e-10,3.480748e-10,3.284587e-10,7.993062e-11,1.022171e-10,...,6.374679e-11,1.380142e-10,6.017343e-08,5.110823e-11,1.346691e-10,7.023316e-08,1.82504e-08,2.136642e-10,1.20216e-08,7.01792e-11


## Test Recommender
---

In [8]:
recommender_df['Chris Sale'].sort_values()[1:11]

Player
Brad Hand            2.620126e-13
Craig Kimbrel        3.441691e-13
Max Scherzer         5.361267e-13
David Price          1.088463e-12
Stephen Strasburg    1.537770e-12
Miles Mikolas        1.687761e-12
Jacob deGrom         3.459677e-12
Chris Archer         4.415135e-12
Drew Pomeranz        4.443668e-12
Andrew Miller        5.506262e-12
Name: Chris Sale, dtype: float64

## Save CSV and Pickle Files of Recommendation
---

In [9]:
recommender_df.to_csv('../data/recommender_pitch.csv', index = False)

In [10]:
with open('../pickles/recommendation_pitch.pkl', 'wb') as pickle_out:
    pickle_out = pickle.dump(recommender_df, pickle_out)

## Recap
---
Create a recommendation system from current stats, salaries, and clusters for each pitcher. This will be used in the app. 