In [1]:
import numpy as np
import pandas as pd

In [2]:
class EloCalculator:
    def __init__(self, events_path, elo_path):
        self.elo_path = elo_path
        self.events = pd.read_csv('data/events_elo.csv')
        self.elo_table = pd.read_csv(self.elo_path)
        self.elo_dict = dict(zip(self.elo_table['player_id'], self.elo_table['elo_rating']))
        del(self.elo_table['elo_rating'])
        self.snapshots = []

    def calc_season_elo(self, k=2):
        self.k = k
        num_rows = self.events.shape[0]
        prev_prop = -1
        i = 0
        
        for _, row in self.events.iterrows():
            curr_prop = i/num_rows
            if (curr_prop - prev_prop) > .1:
                self.save_snapshot()
                prev_prop = curr_prop

            self.update_elo(row['bat_id'], row['pit_id'], row['batter_score'], row['pitcher_score'])
            i += 1
            
    def save_snapshot(self):
        self.snapshots.append(self.elo_dict.copy())

    def update_elo(self, bat_id, pit_id, bat_score, pit_score):
            bat_exp, pit_exp = self.get_expected(bat_id, pit_id)
            bat_curr = self.elo_dict[bat_id]
            pit_curr = self.elo_dict[pit_id]
            
            bat_new = self.calc_new(bat_curr, bat_score, bat_exp)
            pit_new = self.calc_new(pit_curr, pit_score, pit_exp)

            self.elo_dict[bat_id] = bat_new
            self.elo_dict[pit_id] = pit_new

    def get_expected(self, bat_id, pit_id):
        bat_diff = self.elo_dict[bat_id] - self.elo_dict[pit_id]
        pit_diff = -bat_diff
        return self.calc_expected(bat_diff), self.calc_expected(pit_diff)

    def calc_expected(self, diff):
        return 1/(1+(np.power(10, (diff/400))))

    def calc_new(self, curr, score, exp):
        return curr+(self.k*(score-exp))
    
    def get_dict(self):
        return self.elo_dict
    
    def get_df(self):
        list_dict = {}
        for k, v in self.elo_dict.items():
            list_dict[k] = [v]
        dfElo = pd.DataFrame.from_dict(list_dict, orient='index').reset_index()
        dfElo.columns = ['player_id', 'elo']
        return dfElo
        
    def reset_df(self):
        self.elo_table = pd.read_csv(self.elo_path)        

In [3]:
elo = EloCalculator('data/events_elo.csv', 'data/elo.csv')
elo.calc_season_elo()

In [4]:
dfElo = elo.get_df()

In [5]:
dfElo.head()

Unnamed: 0,player_id,elo
0,zimmr001,759.860114
1,zimmj003,2531.972876
2,wertj001,959.340149
3,waltz001,1494.273048
4,tracc001,1359.538763


In [6]:
elo.snapshots[1]

{'zimmr001': 1464.7724999506077,
 'zimmj003': 1522.8179561845584,
 'wertj001': 1489.1096060215684,
 'waltz001': 1495.0645891116599,
 'tracc001': 1491.0890244580996,
 'suzuk001': 1482.133786976087,
 'stras001': 1520.173856036232,
 'stord001': 1523.0285336809434,
 'stamc001': 1527.8706753172269,
 'spand001': 1466.927269499787,
 'sorir001': 1513.439138816191,
 'solaj002': 1490.7625440426093,
 'rodrh002': 1500.7478610167661,
 'roart001': 1531.200739264362,
 'renda001': 1490.9791431844028,
 'ramow001': 1478.8935481922383,
 'peree003': 1497.0463751974523,
 'ohler001': 1500,
 'moort002': 1486.1268905330296,
 'mayay001': 1500,
 'mattr001': 1499.6923576900388,
 'marrc001': 1500,
 'lombs002': 1474.559985779506,
 'leons001': 1498.74950180645,
 'laroa001': 1473.7193626081212,
 'kroli001': 1505.9004790505355,
 'kobej001': 1491.5202805673687,
 'karnn001': 1505.7051434641994,
 'jordt001': 1507.744231988956,
 'harpb003': 1478.2250519849183,
 'hared001': 1542.7256685096859,
 'hairs001': 1485.3039454713

In [7]:
dfBat = pd.read_csv('data/batters.csv')
dfPit = pd.read_csv('data/pitchers.csv')

dfBat = pd.merge(dfElo, dfBat, left_on='player_id', right_on='player_id')
dfBat = pd.merge(elo.elo_table, dfBat, left_on=['player_id', 'last_name_tx', 'first_name_tx'],
                 right_on=['player_id', 'nameLast', 'nameFirst'])

dfPit = pd.merge(dfElo, dfPit, left_on='player_id', right_on='player_id')
dfPit = pd.merge(elo.elo_table, dfPit, left_on=['player_id', 'last_name_tx', 'first_name_tx'],
                 right_on=['player_id', 'nameLast', 'nameFirst'])

dfBat = dfBat.drop_duplicates(subset=['player_id', 'C'])
dfPit = dfPit.drop_duplicates(subset=['player_id', 'C'])

dfBat.to_csv('output/bat_elo.csv')
dfPit.to_csv('output/pit_elo.csv')

In [8]:
dfBat

Unnamed: 0,player_id,last_name_tx,first_name_tx,team_id,team_tx,pos_tx,elo,nameFirst,nameLast,score
0,zimmr001,Zimmerman,Ryan,WAS,WAS,3B,759.860114,Ryan,Zimmerman,252.53
1,zimmj003,Zimmermann,Jordan,WAS,WAS,P,2531.972876,Jordan,Zimmermann,6.25
2,wertj001,Werth,Jayson,WAS,WAS,OF,959.340149,Jayson,Werth,252.92
3,waltz001,Walters,Zach,WAS,WAS,SS,1494.273048,Zach,Walters,4.75
4,tracc001,Tracy,Chad,WAS,WAS,3B,1359.538763,Chad,Tracy,35.80
5,suzuk001,Suzuki,Kurt,WAS,WAS,C,1130.348789,Kurt,Suzuki,86.08
9,stras001,Strasburg,Stephen,WAS,WAS,P,2300.412880,Stephen,Strasburg,6.08
10,stord001,Storen,Drew,WAS,WAS,P,1787.046072,Drew,Storen,0.00
11,stamc001,Stammen,Craig,WAS,WAS,P,1856.236764,Craig,Stammen,0.29
12,spand001,Span,Denard,WAS,WAS,OF,729.498212,Denard,Span,222.88


In [9]:
events = pd.read_csv('data/events_elo.csv')
events.head()

Unnamed: 0,game_dt,bat_id,pit_id,batter_score,pitcher_score
0,20130409,crisc001,wilsc004,0.0,1.0
1,20130409,younc004,wilsc004,0.0,1.0
2,20130409,lowrj001,wilsc004,1.0,0.0
3,20130409,cespy001,wilsc004,1.0,0.0
4,20130409,norrd001,wilsc004,1.0,0.0


In [10]:
dfBat.head()

Unnamed: 0,player_id,last_name_tx,first_name_tx,team_id,team_tx,pos_tx,elo,nameFirst,nameLast,score
0,zimmr001,Zimmerman,Ryan,WAS,WAS,3B,759.860114,Ryan,Zimmerman,252.53
1,zimmj003,Zimmermann,Jordan,WAS,WAS,P,2531.972876,Jordan,Zimmermann,6.25
2,wertj001,Werth,Jayson,WAS,WAS,OF,959.340149,Jayson,Werth,252.92
3,waltz001,Walters,Zach,WAS,WAS,SS,1494.273048,Zach,Walters,4.75
4,tracc001,Tracy,Chad,WAS,WAS,3B,1359.538763,Chad,Tracy,35.8


In [11]:
elo.elo_table.head()

Unnamed: 0,player_id,last_name_tx,first_name_tx,team_id,team_tx,pos_tx
0,zimmr001,Zimmerman,Ryan,WAS,WAS,3B
1,zimmj003,Zimmermann,Jordan,WAS,WAS,P
2,wertj001,Werth,Jayson,WAS,WAS,OF
3,waltz001,Walters,Zach,WAS,WAS,SS
4,tracc001,Tracy,Chad,WAS,WAS,3B


In [12]:
dfCom.head()

NameError: name 'dfCom' is not defined

## 