# Batter Recommender System
---
Using Current stats, salaries, and clusters, a recommender system was built using cosine distances.

## Import Libraries
---

In [1]:
import pandas as pd
import numpy as np 
from scipy import sparse
import pickle

from sklearn.metrics.pairwise import pairwise_distances, cosine_distances, cosine_similarity

## Import Dataset
---

In [2]:
df = pd.read_csv('../data/clusters_bat.csv').drop('Unnamed: 0', axis = 1)
df.head()

Unnamed: 0,MLBID,FIRSTNAME,LASTNAME,Player,Team,Pos,Age,G,AB,R,...,HR,RBI,BB,SO,AVG,OBP,SLG,OPS,salary,cluster
0,547989,Jose,Abreu,Jose Abreu,CWS,1B,34,152,566,86,...,30,117,61,143,0.261,0.351,0.481,0.832,17666666,0
1,642715,Willy,Adames,Willy Adames,TB,SS,26,41,132,16,...,5,15,10,51,0.197,0.254,0.371,0.625,590000,4
2,501303,Ehire,Adrianza,Ehire Adrianza,ATL,SS,32,109,182,32,...,5,28,21,42,0.247,0.327,0.401,0.728,1500000,5
3,542583,Jesus,Aguilar,Jesus Aguilar,MIA,1B,31,131,449,49,...,22,93,46,93,0.261,0.329,0.459,0.788,4500000,3
4,605113,Nick,Ahmed,Nick Ahmed,ARI,SS,31,129,434,46,...,5,38,34,104,0.221,0.28,0.339,0.619,8125000,2


In [3]:
df.drop(columns = ['MLBID', 'FIRSTNAME', 'LASTNAME', 'Team', 'Pos', 'Age', 'G'], inplace = True)
df.head()

Unnamed: 0,Player,AB,R,H,2B,3B,HR,RBI,BB,SO,AVG,OBP,SLG,OPS,salary,cluster
0,Jose Abreu,566,86,148,30,2,30,117,61,143,0.261,0.351,0.481,0.832,17666666,0
1,Willy Adames,132,16,26,6,1,5,15,10,51,0.197,0.254,0.371,0.625,590000,4
2,Ehire Adrianza,182,32,45,9,2,5,28,21,42,0.247,0.327,0.401,0.728,1500000,5
3,Jesus Aguilar,449,49,117,23,0,22,93,46,93,0.261,0.329,0.459,0.788,4500000,3
4,Nick Ahmed,434,46,96,30,3,5,38,34,104,0.221,0.28,0.339,0.619,8125000,2


## Create Pivot Table
---

In [4]:
pivot = pd.pivot_table(df, index = 'Player')
pivot.head()

Unnamed: 0_level_0,2B,3B,AB,AVG,BB,H,HR,OBP,OPS,R,RBI,SLG,SO,cluster,salary
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Aaron Hicks,3,0,108,0.194,14,21,4,0.294,0.627,13,14,0.333,30,4,10785714
Aaron Judge,24,0,550,0.287,75,158,39,0.373,0.917,89,98,0.544,158,0,10175000
Adalberto Mondesi,8,1,126,0.23,6,29,6,0.271,0.723,19,17,0.452,43,5,2525000
Adam Duvall,7,1,199,0.226,14,45,16,0.287,0.8,26,45,0.513,69,5,2000000
Adam Engel,9,0,123,0.252,11,31,7,0.336,0.832,21,18,0.496,31,5,1375000


In [5]:
sparse_pivot = sparse.csr_matrix(pivot.fillna(0))

In [6]:
recommender = cosine_distances(sparse_pivot)   

In [7]:
recommender_df = pd.DataFrame(recommender, columns=pivot.index, index=pivot.index)
recommender_df.head()

Player,Aaron Hicks,Aaron Judge,Adalberto Mondesi,Adam Duvall,Adam Engel,Adam Frazier,Adolis Garcia,Akil Baddoo,Albert Pujols,Alcides Escobar,...,Yadiel Hernandez,Yadier Molina,Yan Gomes,Yandy Diaz,Yasmani Grandal,Yoan Moncada,Yonathan Daza,Yonny Hernandez,Yordan Alvarez,Zack Collins
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Aaron Hicks,0.0,1.233091e-09,9.854307e-10,5.06554e-09,3.790208e-09,6.588102e-10,6.107854e-07,3.082166e-07,1.085759e-08,1.745031e-07,...,1.184278e-07,8.559761e-10,8.02417e-11,3.52638e-07,2.771883e-11,2.72936e-09,1.521084e-07,3.257034e-08,4.690657e-07,6.727126e-08
Aaron Judge,1.233091e-09,0.0,3.880596e-11,1.341507e-09,7.196937e-10,1.510383e-10,5.574884e-07,2.706011e-07,4.935004e-09,1.468132e-07,...,9.565294e-08,6.541701e-11,6.950838e-10,3.124879e-07,9.253741e-10,3.10195e-10,1.26312e-07,2.136114e-08,4.222711e-07,5.047139e-08
Adalberto Mondesi,9.854307e-10,3.880596e-11,0.0,1.609302e-09,9.355794e-10,9.722134e-11,5.627906e-07,2.745317e-07,5.448153e-09,1.497017e-07,...,9.803472e-08,4.121403e-11,5.09719e-10,3.170856e-07,7.317278e-10,4.694561e-10,1.289939e-07,2.242355e-08,4.273817e-07,5.213752e-08
Adam Duvall,5.06554e-09,1.341507e-09,1.609302e-09,0.0,1.789338e-10,2.263531e-09,5.04981e-07,2.349014e-07,1.431801e-09,1.21682e-07,...,7.519161e-08,1.846381e-09,3.89364e-09,2.748865e-07,4.435886e-09,4.619628e-10,1.028562e-07,1.264069e-08,3.772508e-07,3.584162e-08
Adam Engel,3.790208e-09,7.196937e-10,9.355794e-10,1.789338e-10,0.0,1.357678e-09,5.188637e-07,2.438529e-07,1.974077e-09,1.271686e-07,...,7.998225e-08,1.076636e-09,2.775526e-09,2.837393e-07,3.247767e-09,1.262529e-10,1.082578e-07,1.438409e-08,3.88691e-07,3.947122e-08


## Test Recommender
---

In [8]:
recommender_df['Mookie Betts'].sort_values()[1:11]

Player
Jose Altuve         6.240564e-13
Francisco Lindor    1.205924e-12
Josh Donaldson      2.583156e-12
Starling Marte      2.909673e-12
Kole Calhoun        4.128475e-12
Kyle Schwarber      4.495515e-12
Yan Gomes           4.724887e-12
Brandon Belt        4.825473e-12
Buster Posey        5.035528e-12
Wil Myers           5.295875e-12
Name: Mookie Betts, dtype: float64

## Save CSV and Pickle Files of Recommendation
---

In [9]:
recommender_df.to_csv('../data/recommender_bat.csv', index = False)

In [10]:
with open('../pickles/recommendation_bat.pkl', 'wb') as pickle_out:
    pickle_out = pickle.dump(recommender_df, pickle_out)

## Recap
---
Create a recommendation system from current stats, salaries and clusters for each batter. This will be used in the app