In [1]:
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
import torch
from torch import optim
import torch.nn as nn
from torch.autograd import Variable as V
import torch.nn.functional as F
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
import scipy

# Gradient descent with backtracking line search

In [2]:
def obj(x):
    return 0.5*np.dot(np.dot(x.T,Q),x)+ np.dot(b.T,x)

def get_jacobian(x):
    return np.dot(Q,x)+b

# gradient descent with backtracking line search
def gd_bls(obj, alpha ,  beta , n ,  x_init , d, curve1):
    iteration_time = 1
    while(iteration_time <=n):
        curve1[iteration_time - 1] = obj(x_init)
        iteration_time += 1
        step_size = beta
        jacobian = get_jacobian(x_init)
        while(True):
            if( obj(x_init - step_size*jacobian) <= (obj(x_init) - alpha*step_size*np.linalg.norm(jacobian))):        
                break
            else:
                step_size = step_size*beta
        x_init = x_init - step_size*jacobian
    return (curve1, x_init)
    
def gd(obj, n , step ,   x_init , d, curve2):
    iteration_time = 1
    while(iteration_time <=n):
        curve2[iteration_time - 1] = obj(x_init)
        iteration_time += 1
        jacobian = get_jacobian(x_init)
        x_init = x_init - step*jacobian
    return (curve2,x_init)

In [4]:

alpha = 0.1
beta = 0.3
n = 2000
d= 100
curve1 = np.zeros((n,1))
curve2 = np.zeros((n,1))

x_init = np.zeros((d,1))
x_init = np.matrix(x_init)
# print("x_init shape: ",x_init.shape)

#We use 08-09 season to get our weight for player stat 
player_stat_08_09=pd.read_csv("play_per_game_stat/8-9.csv",delimiter = ",")
advanced_stat_08_09=pd.read_csv("advanced_player_stat/8-9.csv",delimiter = ",")
player_stat_08_09.fillna(0,inplace = True)
advanced_stat_08_09.fillna(0,inplace = True)
player_stat_08_09["WS"] = 0 #add a new column WS -> win share

# print(advanced_stat_08_09.info())
player_stat_08_09_column_num = player_stat_08_09.shape[0]

advanced_stat_08_09_column_num = advanced_stat_08_09.shape[0]

for i in range(player_stat_08_09_column_num):
    for j in range(advanced_stat_08_09_column_num):
        if(player_stat_08_09.loc[i,"Player"] == advanced_stat_08_09.loc[j,"Player"]):
            player_stat_08_09.loc[i,"WS"] = advanced_stat_08_09.loc[j,"WS"]
    if(player_stat_08_09.loc[i,"Player"].find("*") != -1):
        player_stat_08_09.loc[i,"Player"] = player_stat_08_09.loc[i,"Player"][0:player_stat_08_09.loc[i,"Player"].find("*")]
    if(player_stat_08_09.loc[i,"Player"].find("\\") != -1):
        player_stat_08_09.loc[i,"Player"] = player_stat_08_09.loc[i,"Player"][0:player_stat_08_09.loc[i,"Player"].find("\\")] 

player_stat_08_09=player_stat_08_09.drop(["Pos","Tm","Rk","Age","Player"],axis=1) 
player_stat_08_09_matrix = player_stat_08_09.as_matrix(columns=None)
player_stat_08_09_data = player_stat_08_09_matrix[:,:-1]
player_stat_08_09_ws = player_stat_08_09_matrix[:,-1]

a = np.array(player_stat_08_09_data)
b = np.array(player_stat_08_09_ws).reshape(-1,1)

a_pinv =  scipy.linalg.pinv(a)
weights = a_pinv.dot(b)
attributes = player_stat_08_09.columns.values




In [5]:
team_dic = {"SAS":"San Antonio Spurs","SAC":"Sacramento Kings","UTA":"Utah Jazz",
                "DAL":"Dallas Mavericks","POR":"Portland Trail Blazers","LAL":"Los Angeles Lakers",
                "PHI":"Philadelphia 76ers","MIL":"Milwaukee Bucks","HOU":"Houston Rockets",
                "PHO":"Phoenix Suns","NYK":"New York Knicks","MIN":"Minnesota Timberwolves",
                "MIA":"Miami Heat","TOR":"Toronto Raptors",
                "CHH":"Charlotte Hornets",  #02-03 New Orleans Hornets
                "NOH":"New Orleans Hornets",
                "NOP":"New Orleans Pelicans",#13-14 New Orleans Pelicans
                "SEA":"Seattle SuperSonics",  
                "OKC":"Oklahoma City Thunder",#08-09 Oklahoma City Thunder
                "ORL":"Orlando Magic","IND":"Indiana Pacers","DEN":"Denver Nuggets",
                "DET":"Detroit Pistons","LAC":"Los Angeles Clippers","BOS":"Boston Celtics",
                "CLE":"Cleveland Cavaliers",   
                "VAN":"Vancouver Grizzlies",#1-2 Memphis Grizzlies
                "MEM":"Memphis Grizzlies",
                "NJN":"New Jersey Nets",
                "BRK":"Brooklyn Nets",#12-13 Brooklyn Nets
                "ATL":"Atlanta Hawks","WAS":"Washington Wizards",
                 "GSW":"Golden State Warriors",
                "CHI":"Chicago Bulls" ,
                "CHA":"Charlotte Bobcats",   #04-05 29-30 CHA Charlotte Bobcats
                "CHO":"Charlotte Hornets"  #14-15 Charlotte Hornets
           }



In [7]:
def initialize_advanced_stat(season_begin, season_end):
    a = []
    for i in np.arange(season_begin,season_end,1):
        miscellaneous_dataframe = pd.read_csv("advanced_player_stat/"+str(i)+"-"+str(i+1)+".csv",delimiter = ",")
        miscellaneous_dataframe.fillna(0,inplace = True)
        a.append(miscellaneous_dataframe)
    return a
   
advanced_stat_00_17 = initialize_advanced_stat(0,17)

def initialize_player_stat(season_begin, season_end, weights):
    # decide weights you want
    print(weights.shape)
    a = []
    w = weights
#     w.de[2,3,4,5,6,7,8,9,10,11,13,14,15,18]
    w = np.delete(w,[[2,3,4,5,6,7,8,9,10,11,13,14,15,18]],axis = 0)
    print(w.shape)
    for i in np.arange(season_begin,season_end,1):
        player_stat_dataframe = pd.read_csv("play_per_game_stat/"+str(i)+"-"+str(i+1)+".csv",delimiter = ",")
        player_stat_dataframe.fillna(0,inplace = True)
        A = player_stat_dataframe.iloc[:,[5,6,17,21,22,24,25,26,27,28,29]]
        x = np.dot(A,w)
        player_stat_dataframe["ability_value"] = x
        a.append(player_stat_dataframe)
    return a

player_player_stat_00_17 =initialize_player_stat(0,17,weights)


(25, 1)
(11, 1)


In [8]:

# mvp_train_dataframe = pd.read_csv("mvp/mvp_train.csv",delimiter = ",")
# mvp_test_dataframe = pd.read_csv("mvp/mvp_test.csv",delimiter = ",")
mvp_dataframe_00_17 = pd.read_csv("mvp/mvp_00_17.csv",delimiter = ",")

def initialize_team_miscellaneous_stat(season_begin, season_end , team_dic):
    a = []
    for i in np.arange(season_begin,season_end,1):
        miscellaneous_dataframe = pd.read_csv("miscellaneous_stat/"+str(i)+"-"+str(i+1)+".csv",delimiter = ",")
        miscellaneous_dataframe.fillna(0,inplace = True)
        column_num = miscellaneous_dataframe.shape[0]
        for j in range(column_num):
            if(miscellaneous_dataframe.loc[j,"Team"].find("*") != -1):
                miscellaneous_dataframe.loc[j,"Team"] = miscellaneous_dataframe.loc[j,"Team"][0:miscellaneous_dataframe.loc[j,"Team"].find("*")]
            for key in team_dic:
                if(team_dic[key] == miscellaneous_dataframe.loc[j,"Team"]):
                    miscellaneous_dataframe.loc[j,"Team"] = key
                    if(team_dic[key] =="Charlotte Hornets"):
                        if(i>=14):
                            miscellaneous_dataframe.loc[j,"Team"] = "CHO"    
                        else:
                            miscellaneous_dataframe.loc[j,"Team"] = "CHH"
        a.append(miscellaneous_dataframe)
    return a

miscellaneous_stat_00_17 = initialize_team_miscellaneous_stat(0,17,team_dic)
# miscellaneous_stat_train  = initialize_team_miscellaneous_stat(0,10,team_dic)
# miscellaneous_stat_test = initialize_team_miscellaneous_stat(10,17,team_dic)



In [9]:

def initialize_data(advanced_stat,mvp_dataframe,team_dic,team_miscellaneous_stat,player_stat):
    a=[]
    b=[]
    d=[]
    mvp_list = []
    for i in range(len(advanced_stat)):
        a.append(advanced_stat[i])
        origin_column_num = a[i].shape[0]
        a[i]["W"] = 0 #新的一列
        a[i]["ability_value"] = 0
        for o in range(origin_column_num):
            # add ability_value
            original_player_name = a[i].loc[o,"Player"]
            player_stat_column_num = player_stat[i].shape[0]
            for p in range(player_stat_column_num):
                if(original_player_name == player_stat[i].loc[p,"Player"] ):
                    a[i].loc[o,"ability_value"] = player_stat[i].loc[p,"ability_value"]
            # add W
            team_abbreviation = a[i].loc[o,"Tm"]
            for key in team_dic:
                if(team_abbreviation == key):
                    # series （6:56) --->list --->get value
                    serie = team_miscellaneous_stat[i].loc[team_miscellaneous_stat[i]["Team"]==key,"W"]
                    team_win_num = serie.values[0]
                    a[i].loc[o,"W"] = team_win_num
        a[i] = a[i].drop(["Pos","Tm","Rk","Age","Unnamed: 19","Unnamed: 24"],axis=1)
        
#         a[i].drop([“//÷,19], axis = 1 , inplace=True)
        a[i].sort_values("WS",ascending=False, inplace=True)
        a[i]= a[i].reset_index(drop=True)  # 来改变index就可以了,
        a[i] = a[i][0:50]
        a[i].fillna(0,inplace = True)

        column_num = a[i].shape[0] #50
        c = []
        for j in range(column_num):     
            if(a[i].loc[j,"Player"].find("*") != -1):
                a[i].loc[j,"Player"] = a[i].loc[j,"Player"][0:a[i].loc[j,"Player"].find("*")]
            if(a[i].loc[j,"Player"].find("\\") != -1):
                a[i].loc[j,"Player"] = a[i].loc[j,"Player"][0:a[i].loc[j,"Player"].find("\\")]          
            if (a[i].loc[j,"Player"] == mvp_dataframe.loc[i,"name"]):
                mvp_list.append(j)
            c.append(list(a[i].loc[j,a[i].columns.difference(["Player"])]))
        
        b.append(c)
        d.append(list(a[i]["Player"]))
    return (d,mvp_list,b)

# name_list_train, mvp_list_train, X_train_list = initialize_data(stat_00_10,mvp_train_dataframe,team_dic,miscellaneous_stat_train)
# print("|"*50)
# name_list_test, mvp_list_test ,X_test_list= initialize_data(stat_10_17,mvp_test_dataframe,team_dic,miscellaneous_stat_test)

name_list_00_17, mvp_list, X_list = initialize_data(advanced_stat_00_17,mvp_dataframe_00_17,team_dic,miscellaneous_stat_00_17,player_player_stat_00_17)

print(len(name_list_00_17))
print(len(X_list))


print(len(X_list[0][0]))



17
17
24


In [28]:
X_train = torch.tensor(X_list,dtype=torch.float).view(-1,50*24)[0:10]
y_train = torch.tensor(mvp_list,dtype=torch.long)[0:10]

X_test = torch.tensor(np.array(X_list),dtype=torch.float).view(-1,50*24)[10:17]
y_test = torch.tensor(mvp_list,dtype=torch.long)[10:17]

print(X_train.shape)
print(y_train.shape)
print("*")
print(X_test.shape)
print(y_test.shape)
print("*")

class Model(torch.nn.Module): 
    def __init__(self):
        super(Model, self).__init__() 
        self.linear1 = torch.nn.Linear(1200, 600)    
        self.linear2= torch.nn.Linear(600, 50)
    def forward(self, x):
        x = F.relu(self.linear1(x))
        x = self.linear2(x)
        return x
def train_model(model, X_train, y_train, X_test, y_test, epochs=150,  lr=0.1, weight_decay=0.05):
    loss_func = torch.nn.CrossEntropyLoss()
    # optimizer
    optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay)
    # training loop
    for epoch in range(epochs):
        pred = model(X_train)
        loss = loss_func(pred, y_train)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()  
model = Model()
train_model(model, X_train, y_train, X_test, y_test, lr=0.1)

y_pred = model(X_test)
y_pred_softmax = F.softmax(y_pred,dim=1)
y_pred = y_pred_softmax.detach().numpy()

y_pred = np.argmax(y_pred, axis=1)
# print(np.argmax(y_pred, axis=1),np.max(y_pred, axis=1))
test_num = len(y_test)
right_num = 0
print("test_num",test_num)
for i in range(test_num):
    if(y_test[i] == y_pred[i]):
        right_num += 1
print("accuracy: ",right_num/test_num)



torch.Size([10, 1200])
torch.Size([10])
*
torch.Size([7, 1200])
torch.Size([7])
*
test_num 7
accuracy:  0.5714285714285714
