# PyStan

In [1]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

import time
import timeit

import scipy.stats 
import pandas as pd
import pickle

import pystan

In [2]:
# Import data
game16_df = pd.read_csv('clean-data/game_outcomes_15-16.csv')
player_info = pd.read_csv('clean-data/player_info_pergame.csv')

In [3]:
score_diff = game16_df['diff'].values

In [4]:
# import the pickle files for guest/host teams line_up
with open('clean-data/guest_team_line_up.pkl', 'rb') as handle:
    guest_team_line_up = pickle.load(handle)
with open('clean-data/host_team_line_up.pkl','rb') as handle:
    host_team_line_up = pickle.load(handle)
    
guest_lineup_arr = np.array(guest_team_line_up, dtype = int)
host_lineup_arr = np.array(host_team_line_up, dtype = int)

In [5]:
def make_guest_host_mat(game_outcomes_df):
    '''
    Makes a matrix for guests and hosts. Each row of each
    matrix corresponds to one matchup. All elements of each row
    are zero except the ith one (different for each row).
    For the guest matrix, the ith entry in row j means that in game j,
    the guest team was team i. In the host matrix, the ith entry in
    row j means that the host team was team i
    '''
    
    def make_matrix(mat, indices):
        '''given a matrix and indices, sets the right one in each row
        to be true'''
        for (i, ind) in enumerate(indices):
            mat[i, ind] = 1
        
    nrows = game_outcomes_df.shape[0]
    ncols = np.max(game_outcomes_df['Visitor_Index'] + 1)
    
    guest_matrix = np.zeros((nrows, ncols), dtype = bool)
    host_matrix = np.zeros((nrows, ncols), dtype = bool)
    
    make_matrix(guest_matrix, game_outcomes_df['Visitor_Index'].values)
    make_matrix(host_matrix, game_outcomes_df['Home_Index'].values)
    
    return(guest_matrix, host_matrix)

In [6]:
guest_matrix, host_matrix = make_guest_host_mat(game16_df)
guest_matrix = np.array(guest_matrix, dtype = int)
host_matrix = np.array(host_matrix, dtype = int)

In [7]:
off_rating = player_info['PTS'].values + player_info['AST'].values
def_rating = player_info['BLK'].values + player_info["STL"].values + player_info['DRB'].values

In [8]:
nba_code = """
data {
    int<lower=0> nplayers; // number of players
    int<lower=0> nteams; // number of teams
    int<lower=0> ngames; // number of games played in season
    vector[nplayers] off_rating;
    vector[nplayers] def_rating;
    matrix[ngames, nteams] guest_matrix;
    matrix[ngames, nteams] host_matrix;
    matrix[ngames, nplayers] guest_lineup_arr;
    matrix[ngames, nplayers] host_lineup_arr;
    vector[ngames] score_diff;
    real<lower=0> lam;
}
parameters {
    vector[nteams] beta0;
    vector[nplayers] beta;
    vector[nteams] gamma0;
    vector[nplayers] gamma;
    real logsigma;
}
model {
    for (i in 1:nteams) {
        beta0[i] ~ normal(0, lam);
        gamma0[i] ~ normal(0, lam);
    } 
        
    for (j in 1:nplayers) {
        beta[j] ~ normal(0, lam);
        gamma[j] ~ normal(0, lam);
    }  

    score_diff ~ normal(((guest_matrix * beta0) + 
                        guest_lineup_arr * (beta .* off_rating)) -
                        ((host_matrix * gamma0) + 
                        host_lineup_arr * (gamma .* def_rating)) - 
                        (((host_matrix * beta0) +
                        host_lineup_arr * (beta .* off_rating)) -
                        ((guest_matrix * gamma0) + 
                        guest_lineup_arr * (gamma .* def_rating))), exp(logsigma));
}
"""

In [9]:
sm = pystan.StanModel(model_code=nba_code)

In [12]:
data = {
    'nplayers': guest_lineup_arr.shape[1],
    'nteams': guest_matrix.shape[1],
    'ngames': guest_matrix.shape[0],
    'off_rating': off_rating,
    'def_rating': def_rating,
    'guest_matrix': guest_matrix,
    'host_matrix': host_matrix,
    'guest_lineup_arr': guest_lineup_arr,
    'host_lineup_arr': host_lineup_arr,
    'score_diff': score_diff,
    'lam': 1.0
}

In [13]:
#opt_res = sm.optimizing(data = data)

In [None]:
# contribute_off = np.argsort(-opt_res['beta'])# * player_info['PTS'])
# betas_df = player_info.loc[contribute_off]
# betas_df['beta'] = opt_res['beta'][contribute_off]
# betas_df.head(20)

In [None]:
sm.sampling(data = data, chains = 1, iter = 500)