In [11]:
# 과적합 : 데이터 중 모든 데이터를 학습 데이터로만 사용하면 훈련 데이터에
#          과도하게 적합하게 학습된 나머지 새로운 테스트 데이터에 대해 올바르게 분류하지
#          못하는 일 발생.

# 검증(Validation) : 가용 데이터 중에 일부는 모델을 검증을 위해 사용해야.

# 단, 사용 가능한 데이터가 많지 않을 때, 데이터 샘플을 줄이면 생성되는 모델이 학습에
# 사용한 데이터에 매우 의존적이게 된다.

In [2]:
# 데이터 준비
import pandas as pd
from sklearn import datasets
iris = datasets.load_iris()
print(iris.data[:5])

[[ 5.1  3.5  1.4  0.2]
 [ 4.9  3.   1.4  0.2]
 [ 4.7  3.2  1.3  0.2]
 [ 4.6  3.1  1.5  0.2]
 [ 5.   3.6  1.4  0.2]]


In [3]:
# STEP 01. K-1개의 집합을 이용 모델 학습
# STEP 02. 남은 1개의 집합을 모델 검증에 이용
# 교차 검증으로  모델 평가 : scikit-learn::cross_val_score()메서드 

from sklearn.tree import DecisionTreeClassifier
from sklearn.cross_validation import cross_val_score

clf = DecisionTreeClassifier(random_state=7)
scores = cross_val_score(clf, iris.data, iris.target, scoring='accuracy')
scores



array([ 0.98039216,  0.92156863,  1.        ])

In [4]:
clf = DecisionTreeClassifier(random_state=7)
clf

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=7,
            splitter='best')

In [5]:
# cross_val_score : 교차 검증으로 학습된 모델을 평가할 수 있다.
# 이 메서드를 통해 K개 집합으로 분리하여 반복 수행하는 것을 할 수 있다.
# clf : 만들어진 모델
# iris.data : 모델에 사용하기 위한 데이터
# iris.target : 목표 feature
# print(iris.data)
scores = cross_val_score(clf, iris.data, iris.target,scoring='accuracy')
scores

array([ 0.98039216,  0.92156863,  1.        ])

In [6]:
import numpy as np
print("Accuracy:{0:f}% [오차 +/-{1:f}%]".format(np.mean(scores) * 100, np.std(scores)))

Accuracy:96.732026% [오차 +/-0.033327%]


## 데이터 수집 및 전처리

### NBA 농구의 승패를 예측하는 모델을 만들기


In [7]:
import os
import pandas as pd

# '~' user home directory
home_folder = os.path.expanduser("~")

# C:\Users\front\Documents\Python\data\NBA_2014_15
# 데이터 폴더 위치 지정
data_folder = os.path.join(home_folder, "Documents", "python", "data", "NBA_2014_15")
print(data_folder)

dat_files = ["nba_2014_10_dat.csv", "nba_2014_11_dat.csv", "nba_2014_12_dat.csv",
                 "nba_2015_01_dat.csv", "nba_2015_02_dat.csv", "nba_2015_03_dat.csv",
                "nba_2015_04_dat.csv", "nba_2015_05_dat.csv", "nba_2015_06_dat.csv"]

C:\Users\front\Documents\python\data\NBA_2014_15


In [8]:
# 파일을 하나씩 읽어와 이 데이터를 cvs_objs에 추가한다.
cvs_dat = [ ]
for f in dat_files:
    month_data =  os.path.join(data_folder, f)
    dat = pd.read_csv(month_data)
    cvs_dat.append(dat)
    # print(month_data)
    
print(cvs_dat)

[               Date Start (ET)         Visitor/Neutral  PTS  \
0   Tue Oct 28 2014   10:30 pm         Houston Rockets  108   
1   Tue Oct 28 2014    8:00 pm           Orlando Magic   84   
2   Tue Oct 28 2014    8:00 pm        Dallas Mavericks  100   
3   Wed Oct 29 2014    7:30 pm           Brooklyn Nets  105   
4   Wed Oct 29 2014    7:00 pm         Milwaukee Bucks  106   
5   Wed Oct 29 2014    9:00 pm         Detroit Pistons   79   
6   Wed Oct 29 2014    7:00 pm      Philadelphia 76ers   91   
7   Wed Oct 29 2014    8:00 pm  Minnesota Timberwolves  101   
8   Wed Oct 29 2014    7:30 pm      Washington Wizards   95   
9   Wed Oct 29 2014    8:00 pm           Chicago Bulls  104   
10  Wed Oct 29 2014   10:00 pm      Los Angeles Lakers   99   
11  Wed Oct 29 2014   10:30 pm   Oklahoma City Thunder   89   
12  Wed Oct 29 2014   10:00 pm   Golden State Warriors   95   
13  Wed Oct 29 2014    7:30 pm           Atlanta Hawks  102   
14  Wed Oct 29 2014    9:00 pm         Houston Rockets

In [9]:
season_result = pd.concat(cvs_dat, ignore_index=True)
season_result.columns = ["Date", "StartTime", "VisitorTeam", "VisitorPts",
                              "HomeTeam", "HomePts", "ScoreType", "Overtime",
                              "Notes"]

season_result.columns

Index(['Date', 'StartTime', 'VisitorTeam', 'VisitorPts', 'HomeTeam', 'HomePts',
       'ScoreType', 'Overtime', 'Notes'],
      dtype='object')

In [10]:
# 홈팀이 승리하면 True, 패배, False
season_result["HomeWin"] = season_result["HomePts"] > season_result["VisitorPts"]
season_result[-5:]

Unnamed: 0,Date,StartTime,VisitorTeam,VisitorPts,HomeTeam,HomePts,ScoreType,Overtime,Notes,HomeWin
1306,Sun Jun 7 2015,8:00 pm,Cleveland Cavaliers,95,Golden State Warriors,93,Box Score,OT,,False
1307,Tue Jun 9 2015,9:00 pm,Golden State Warriors,91,Cleveland Cavaliers,96,Box Score,,,True
1308,Thu Jun 11 2015,9:00 pm,Golden State Warriors,103,Cleveland Cavaliers,82,Box Score,,,False
1309,Sun Jun 14 2015,8:00 pm,Cleveland Cavaliers,91,Golden State Warriors,104,Box Score,,,True
1310,Tue Jun 16 2015,9:00 pm,Golden State Warriors,105,Cleveland Cavaliers,97,Box Score,,,False


In [11]:
# 전체 경기에서 홈팀이 승리한 게임
# 전체 경기에서 홈팀이 승리한 경우, 57.6%, 
score = 100 * season_result["HomeWin"].sum() / season_result['HomeWin'].count()
print("Home Win percentage: {0:.1f}%".format(score))
      

Home Win percentage: 57.6%


In [12]:
# 지난 시즌의 팀별 순위 및 관련 데이터 
# http://www.basketball-reference/leagues/NBA_2015_standings.html
# skiprows : Line numbers to skip (0-indexed) or 
# number of lines to skip (int) at the start of the file.
# 파일의 시작 부분에서 건너 뛸 줄 번호 (0- 색인) 또는 건너 뛸 줄 수
standing_file = os.path.join(data_folder, "nba_2014_2015_standing.csv")
standing_result = pd.read_csv(standing_file, skiprows=[0])
standing_result.head()

Unnamed: 0,Rk,Team,Overall,Home,Road,E,W,A,C,SE,...,Post,≤3,≥10,Oct,Nov,Dec,Jan,Feb,Mar,Apr
0,1,Golden State Warriors,67-15,39-2,28-13,25-5,42-10,9-1,7-3,9-1,...,25-6,5-3,45-9,1-0,13-2,11-3,12-3,8-3,16-2,6-2
1,2,Atlanta Hawks,60-22,35-6,25-16,38-14,22-8,12-6,14-4,12-4,...,17-11,6-4,30-10,0-1,9-5,14-2,17-0,7-4,9-7,4-3
2,3,Houston Rockets,56-26,30-11,26-15,23-7,33-19,9-1,8-2,6-4,...,20-9,8-4,31-14,2-0,11-4,9-5,11-6,7-3,10-6,6-2
3,4,Los Angeles Clippers,56-26,30-11,26-15,19-11,37-15,7-3,6-4,6-4,...,21-7,3-5,33-9,2-0,9-5,11-6,11-4,5-6,11-5,7-0
4,5,Memphis Grizzlies,55-27,31-10,24-17,20-10,35-17,8-2,5-5,7-3,...,16-13,9-3,26-13,2-0,13-2,8-6,12-4,7-4,9-8,4-3


In [13]:
# 자 그러면 선수 개개인의 능력을 알아보자.
# 이중에 PER(Player Efficiency Rating)이 있다.

# 분당 얼마나 활동을 하는 지수로 PER(Player Efficiency Rating)
# 선수 개개인의 전반적인 능력을 나타내는 지수로 활용
# http://insider.espn.com/nba/hollinger/statistics/_/year/2016
# 여기서 결정 트리의 특성 값으로 PER를 이용해 보자.
# 파일을 내려받는다. 
# nba_2016_player_stat.csv

In [14]:
# 파일을 불러오고 이를 player_result로 저장.
player_file = os.path.join(data_folder, "nba_2016_player_stat.csv")
player_result = pd.read_csv(player_file)


In [15]:
player_result["PLAYER"][:3]

0       Stephen Curry, GS
1       Kevin Durant, OKC
2    Boban Marjanovic, SA
Name: PLAYER, dtype: object

In [16]:
# 팀 약자와 팀명을 맵핑
team_name = {"GS"  : "Golden State Warriors",
             "SA"  : "San Antonio Spurs",
             "CLE" : "Cleveland Cavaliers",
             "TOR" : "Toronto Raptors",
             "OKC" : "Oklahoma City Thunder",
             "LAC" : "Los Angeles Clippers",
             "ATL" : "Atlanta Hawks",
             "BOS" : "Boston Celtics",
             "CHA" : "Charlotte Hornets",
             "MIA" : "Miami Heat",
             "IND" : "Indiana Pacers",
             "DET" : "Detroit Pistons",
             "POR" : "Portland Trail Blazers",
             "DAL" : "Dallas Mavericks",
             "MEM" : "Memphis Grizzlies",
             "CHI" : "Chicago Bulls",
             "HOU" : "Houston Rockets",
             "WSH" : "Washington Wizards",
             "UTAH" : "Utah Jazz",
             "ORL" : "Orlando Magic",
             "DEN" : "Denver Nuggets",
             "MIL" : "Milwaukee Bucks",
             "SAC" : "Sacramento Kings",
             "NY" : "New York Knicks",
             "NO" : "New Orleans Pelicans",
             "MIN" : "Minnesota Timberwolves",
             "PHX" : "Phoenix Suns",
             "BKN" : "Brooklyn Nets",
             "LAL" : "Los Angeles Lakers",
             "PHI" : "Philadelphia 76ers",
}

In [17]:
# 이제 NBA 리그의 모든 선수 데이터를 순회하면서 각 선수의 팀과 PER값을 추출.
# team_per 딕셔너리 팀의 이름은 키로, 선수 개개인의 PER값은 리스트 형태로 추가

# 시즌 중에 선수가 이적했을 때, 단순히 양쪽 팀 모두에 추가
# NBA 모든 선수를 정보를 순회하면서 각 선수의 팀과 PER 값을 추출
team_per = {}
for key, value in team_name.items():
    team_per[value] = []            # 각각의 팀별 키:값(리스트) 만들기

# print(team_per)
# 

In [18]:
# 각 player의 성적
player_result.iterrows

<bound method DataFrame.iterrows of       RK                    PLAYER  GP   MPG    TS%   AST    TO   USG   ORR  \
0      1         Stephen Curry, GS  79  34.2  0.669  20.6  10.2  31.6   2.9   
1      2         Kevin Durant, OKC  72  35.8  0.634  16.3  11.3  30.2   2.0   
2      3      Boban Marjanovic, SA  54   9.4  0.662   7.7  10.6  21.0  16.9   
3      4    Russell Westbrook, OKC  80  34.4  0.554  29.0  11.9  33.3   6.1   
4      5         LeBron James, CLE  76  35.6  0.588  21.5  10.4  31.1   4.7   
5      6           Chris Paul, LAC  74  32.7  0.575  33.7   8.9  28.1   1.8   
6      7         Kawhi Leonard, SA  72  33.1  0.616  12.2   6.9  24.2   4.7   
7      8     Hassan Whiteside, MIA  73  29.1  0.629   2.9  13.8  18.8  13.1   
8      9         James Harden, HOU  82  38.1  0.598  20.6  12.6  32.2   2.2   
9     10         Anthony Davis, NO  61  35.5  0.559   7.4   7.7  27.2   6.4   
10    11          Enes Kanter, OKC  82  21.0  0.626   3.4  11.8  21.9  16.7   
11    12     DeM

In [19]:
# 한 행, 한 행의 값을 읽어, player, per의 값을 구한다.
for idx, row in player_result.iterrows():
    player = row["PLAYER"]
    per = row["PER"]
    print("player:{0}, per:{1}".format(player, per))
    team_list = player.split(',')[1].strip(' ').split("/")
    print("team_list:{0}".format(team_list))
    for team in team_list:
        team_per[team_name[team]].append(per)

player:Stephen Curry, GS, per:31.56
team_list:['GS']
player:Kevin Durant, OKC, per:28.25
team_list:['OKC']
player:Boban Marjanovic, SA, per:27.77
team_list:['SA']
player:Russell Westbrook, OKC, per:27.64
team_list:['OKC']
player:LeBron James, CLE, per:27.64
team_list:['CLE']
player:Chris Paul, LAC, per:26.31
team_list:['LAC']
player:Kawhi Leonard, SA, per:26.11
team_list:['SA']
player:Hassan Whiteside, MIA, per:25.69
team_list:['MIA']
player:James Harden, HOU, per:25.36
team_list:['HOU']
player:Anthony Davis, NO, per:25.1
team_list:['NO']
player:Enes Kanter, OKC, per:24.09
team_list:['OKC']
player:DeMarcus Cousins, SAC, per:23.67
team_list:['SAC']
player:Jonas Valanciunas, TOR, per:22.63
team_list:['TOR']
player:Karl-Anthony Towns, MIN, per:22.59
team_list:['MIN']
player:Carl Landry, PHI, per:22.51
team_list:['PHI']
player:LaMarcus Aldridge, SA, per:22.43
team_list:['SA']
player:Damian Lillard, POR, per:22.25
team_list:['POR']
player:Blake Griffin, LAC, per:22.22
team_list:['LAC']
play

team_list:['HOU', 'LAC']
player:Patrick Patterson, TOR, per:10.61
team_list:['TOR']
player:Danny Green, SA, per:10.49
team_list:['SA']
player:Lance Thomas, NY, per:10.45
team_list:['NY']
player:Marcelo Huertas, LAL, per:10.38
team_list:['LAL']
player:Joe Ingles, UTAH, per:10.38
team_list:['UTAH']
player:Jonas Jerebko, BOS, per:10.32
team_list:['BOS']
player:Raul Neto, UTAH, per:10.28
team_list:['UTAH']
player:Anthony Tolliver, DET, per:10.26
team_list:['DET']
player:Andre Roberson, OKC, per:10.23
team_list:['OKC']
player:Jason Terry, HOU, per:10.22
team_list:['HOU']
player:Pablo Prigioni, LAC, per:10.22
team_list:['LAC']
player:Wayne Ellington, BKN, per:10.19
team_list:['BKN']
player:Gerald Green, MIA, per:10.15
team_list:['MIA']
player:Brandon Rush, GS, per:10.12
team_list:['GS']
player:Josh McRoberts, MIA, per:9.97
team_list:['MIA']
player:Corey Brewer, HOU, per:9.97
team_list:['HOU']
player:Emmanuel Mudiay, DEN, per:9.96
team_list:['DEN']
player:Steve Blake, DET, per:9.81
team_list:

In [20]:
# PER 지수 비교 1위 팀과, 30위 팀 비교
# 실제 2015~2016년 1위 팀인 'Golden State Warriors'와 30위 팀인 Philadelphia 76ers를 비교하면
# 2.27정도로 선수별 PER 차이보다 상대적으로 적다.
import numpy as np
str1 = "Golden State Warriors: Sum of PER: "
str2 = "Philadelphia 76ers: Sum of PER: "
idx1 = "Golden State Warriors"
idx2 = "Philadelphia 76ers"
print("{0:s}  {1:.2f} / Mean of PER: {2:.2f}".format(str1,np.sum(team_per[idx1]), np.mean(team_per[idx1])))
print("{0:s}  {1:.2f} / Mean of PER: {2:.2f}".format(str2,np.sum(team_per[idx2]), np.mean(team_per[idx2])))

Golden State Warriors: Sum of PER:   192.37 / Mean of PER: 16.03
Philadelphia 76ers: Sum of PER:   165.17 / Mean of PER: 13.76


In [21]:
season_result[-5:-3]

Unnamed: 0,Date,StartTime,VisitorTeam,VisitorPts,HomeTeam,HomePts,ScoreType,Overtime,Notes,HomeWin
1306,Sun Jun 7 2015,8:00 pm,Cleveland Cavaliers,95,Golden State Warriors,93,Box Score,OT,,False
1307,Tue Jun 9 2015,9:00 pm,Golden State Warriors,91,Cleveland Cavaliers,96,Box Score,,,True


## 06 결정 트리를 이용한 NBA 경기 결과 예측

### NBA 경기의 특성들을 하나씩 추가하면서 생성한 결정 트리 모델로 예측한 경기 결과가 얼마나 일치하는지 확인해 보자.

In [22]:
# 모든 운동 경기에서 상위권의 팀들은 홈 경기와 원정 경기 상관없이 연전연승한다.
# 홈팀과 연전연승한 결과 컬럼 : HomeConWin, VisitorConWin
# VisitorConWin, HomeConWin 컬럼 생성

# 연승 기록을 winning_streak 딕셔너리에 저장.
# 연승 기록을 season_result에 갱신
# 홈팀, 원정팀의 연승 횟수를 세기 위한 추가 컬럼
season_result["HomeConWin"] = 0
season_result["VisitorConWin"] = 0

from collections import defaultdict
winning_streak = defaultdict(int)
print(winning_streak)

for index, row in season_result.iterrows():
    VScore= row["VisitorPts"]
    HScore = row["HomePts"]
    home = row["HomeTeam"]                          # 홈팀 
    visitor = row["VisitorTeam"]                    # 방문팀 
    row["HomeConWin"] = winning_streak[home]        # 특성 추가 : 홈팀 연속 승리
    row["VisitorConWin"] = winning_streak[visitor]  # 특성 추가 : 방문팀 연속 승리
    print("visitor:{0}:{1}, home:{2}:{3}, HomeConWin:{4}, VisitorConWin:{5}".format(
        visitor, VScore, home,HScore, row["HomeConWin"], row["VisitorConWin"]))
    
    season_result.ix[index] = row

    # row["HomeWin']이 1이면 연승 home팀의 키에 1을 더한다.
    if row["HomeWin"]:
        winning_streak[home] += 1
        winning_streak[visitor] = 0
    else:
        winning_streak[home] = 0
        winning_streak[visitor] += 1
        
# 홈팀의 승리 결과는 y_test 변수에 저장
y_test = season_result["HomeWin"].values
y_test

defaultdict(<class 'int'>, {})
visitor:Houston Rockets:108, home:Los Angeles Lakers:90, HomeConWin:0, VisitorConWin:0
visitor:Orlando Magic:84, home:New Orleans Pelicans:101, HomeConWin:0, VisitorConWin:0
visitor:Dallas Mavericks:100, home:San Antonio Spurs:101, HomeConWin:0, VisitorConWin:0
visitor:Brooklyn Nets:105, home:Boston Celtics:121, HomeConWin:0, VisitorConWin:0
visitor:Milwaukee Bucks:106, home:Charlotte Hornets:108, HomeConWin:0, VisitorConWin:0
visitor:Detroit Pistons:79, home:Denver Nuggets:89, HomeConWin:0, VisitorConWin:0
visitor:Philadelphia 76ers:91, home:Indiana Pacers:103, HomeConWin:0, VisitorConWin:0
visitor:Minnesota Timberwolves:101, home:Memphis Grizzlies:105, HomeConWin:0, VisitorConWin:0
visitor:Washington Wizards:95, home:Miami Heat:107, HomeConWin:0, VisitorConWin:0
visitor:Chicago Bulls:104, home:New York Knicks:80, HomeConWin:0, VisitorConWin:0
visitor:Los Angeles Lakers:99, home:Phoenix Suns:119, HomeConWin:0, VisitorConWin:0
visitor:Oklahoma City Thunde

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated


visitor:Golden State Warriors:95, home:Sacramento Kings:77, HomeConWin:0, VisitorConWin:0
visitor:Atlanta Hawks:102, home:Toronto Raptors:109, HomeConWin:0, VisitorConWin:0
visitor:Houston Rockets:104, home:Utah Jazz:93, HomeConWin:0, VisitorConWin:1
visitor:New York Knicks:95, home:Cleveland Cavaliers:90, HomeConWin:0, VisitorConWin:0
visitor:Utah Jazz:102, home:Dallas Mavericks:120, HomeConWin:0, VisitorConWin:0
visitor:Oklahoma City Thunder:90, home:Los Angeles Clippers:93, HomeConWin:0, VisitorConWin:0
visitor:Detroit Pistons:91, home:Minnesota Timberwolves:97, HomeConWin:0, VisitorConWin:0
visitor:Washington Wizards:105, home:Orlando Magic:98, HomeConWin:0, VisitorConWin:0
visitor:Cleveland Cavaliers:114, home:Chicago Bulls:108, HomeConWin:1, VisitorConWin:0
visitor:Memphis Grizzlies:97, home:Indiana Pacers:89, HomeConWin:1, VisitorConWin:1
visitor:Los Angeles Clippers:118, home:Los Angeles Lakers:111, HomeConWin:0, VisitorConWin:1
visitor:Philadelphia 76ers:81, home:Milwaukee Buc

visitor:Utah Jazz:97, home:Atlanta Hawks:100, HomeConWin:2, VisitorConWin:0
visitor:Oklahoma City Thunder:109, home:Boston Celtics:94, HomeConWin:2, VisitorConWin:0
visitor:Portland Trail Blazers:130, home:Denver Nuggets:113, HomeConWin:0, VisitorConWin:2
visitor:Indiana Pacers:81, home:Miami Heat:75, HomeConWin:2, VisitorConWin:1
visitor:Houston Rockets:113, home:Minnesota Timberwolves:101, HomeConWin:0, VisitorConWin:0
visitor:Los Angeles Lakers:102, home:New Orleans Pelicans:109, HomeConWin:0, VisitorConWin:0
visitor:Orlando Magic:97, home:New York Knicks:95, HomeConWin:0, VisitorConWin:0
visitor:Brooklyn Nets:104, home:Phoenix Suns:112, HomeConWin:1, VisitorConWin:2
visitor:Detroit Pistons:103, home:Washington Wizards:107, HomeConWin:1, VisitorConWin:0
visitor:Philadelphia 76ers:70, home:Dallas Mavericks:123, HomeConWin:1, VisitorConWin:0
visitor:Brooklyn Nets:99, home:Golden State Warriors:107, HomeConWin:0, VisitorConWin:0
visitor:Sacramento Kings:110, home:Memphis Grizzlies:111,

visitor:Sacramento Kings:89, home:Houston Rockets:102, HomeConWin:2, VisitorConWin:3
visitor:Memphis Grizzlies:99, home:Los Angeles Lakers:93, HomeConWin:0, VisitorConWin:2
visitor:Milwaukee Bucks:103, home:Minnesota Timberwolves:86, HomeConWin:0, VisitorConWin:1
visitor:Utah Jazz:82, home:Oklahoma City Thunder:97, HomeConWin:0, VisitorConWin:0
visitor:Golden State Warriors:111, home:Orlando Magic:96, HomeConWin:0, VisitorConWin:6
visitor:Brooklyn Nets:99, home:Philadelphia 76ers:91, HomeConWin:0, VisitorConWin:0
visitor:Denver Nuggets:112, home:Phoenix Suns:120, HomeConWin:0, VisitorConWin:5
visitor:Indiana Pacers:100, home:San Antonio Spurs:106, HomeConWin:4, VisitorConWin:1
visitor:New Orleans Pelicans:91, home:Atlanta Hawks:100, HomeConWin:0, VisitorConWin:0
visitor:Chicago Bulls:109, home:Boston Celtics:102, HomeConWin:0, VisitorConWin:0
visitor:Golden State Warriors:106, home:Charlotte Hornets:101, HomeConWin:0, VisitorConWin:7
visitor:Phoenix Suns:97, home:Denver Nuggets:122, Ho

visitor:New Orleans Pelicans:107, home:Dallas Mavericks:112, HomeConWin:0, VisitorConWin:2
visitor:Miami Heat:82, home:Denver Nuggets:102, HomeConWin:0, VisitorConWin:1
visitor:Houston Rockets:93, home:Golden State Warriors:105, HomeConWin:13, VisitorConWin:4
visitor:Los Angeles Clippers:103, home:Indiana Pacers:96, HomeConWin:0, VisitorConWin:8
visitor:Portland Trail Blazers:82, home:Minnesota Timberwolves:90, HomeConWin:0, VisitorConWin:5
visitor:Washington Wizards:91, home:Orlando Magic:89, HomeConWin:2, VisitorConWin:1
visitor:New York Knicks:95, home:San Antonio Spurs:109, HomeConWin:0, VisitorConWin:0
visitor:Cleveland Cavaliers:94, home:Oklahoma City Thunder:103, HomeConWin:3, VisitorConWin:8
visitor:Houston Rockets:113, home:Sacramento Kings:109, HomeConWin:0, VisitorConWin:0
visitor:Orlando Magic:81, home:Atlanta Hawks:87, HomeConWin:8, VisitorConWin:0
visitor:New York Knicks:101, home:Boston Celtics:95, HomeConWin:0, VisitorConWin:0
visitor:Philadelphia 76ers:70, home:Brookly

visitor:Minnesota Timberwolves:104, home:Cleveland Cavaliers:125, HomeConWin:2, VisitorConWin:0
visitor:New Orleans Pelicans:84, home:Indiana Pacers:96, HomeConWin:1, VisitorConWin:1
visitor:Golden State Warriors:105, home:Los Angeles Lakers:115, HomeConWin:0, VisitorConWin:2
visitor:Philadelphia 76ers:91, home:Miami Heat:87, HomeConWin:1, VisitorConWin:1
visitor:Charlotte Hornets:108, home:Milwaukee Bucks:101, HomeConWin:0, VisitorConWin:3
visitor:Portland Trail Blazers:115, home:Oklahoma City Thunder:111, HomeConWin:0, VisitorConWin:0
visitor:Boston Celtics:95, home:Orlando Magic:100, HomeConWin:0, VisitorConWin:0
visitor:Dallas Mavericks:115, home:Phoenix Suns:124, HomeConWin:3, VisitorConWin:0
visitor:Chicago Bulls:99, home:Washington Wizards:91, HomeConWin:0, VisitorConWin:3
visitor:Los Angeles Lakers:93, home:Chicago Bulls:113, HomeConWin:4, VisitorConWin:1
visitor:Golden State Warriors:86, home:Los Angeles Clippers:100, HomeConWin:0, VisitorConWin:0
visitor:Cleveland Cavaliers:9

visitor:Los Angeles Lakers:94, home:Portland Trail Blazers:98, HomeConWin:0, VisitorConWin:1
visitor:Indiana Pacers:105, home:Utah Jazz:101, HomeConWin:1, VisitorConWin:0
visitor:Phoenix Suns:102, home:Milwaukee Bucks:96, HomeConWin:1, VisitorConWin:2
visitor:Detroit Pistons:105, home:San Antonio Spurs:104, HomeConWin:2, VisitorConWin:5
visitor:Memphis Grizzlies:86, home:Atlanta Hawks:96, HomeConWin:5, VisitorConWin:1
visitor:Boston Celtics:89, home:Brooklyn Nets:81, HomeConWin:0, VisitorConWin:0
visitor:Utah Jazz:97, home:Chicago Bulls:77, HomeConWin:3, VisitorConWin:0
visitor:New Orleans Pelicans:94, home:Charlotte Hornets:98, HomeConWin:2, VisitorConWin:0
visitor:Houston Rockets:105, home:Cleveland Cavaliers:93, HomeConWin:0, VisitorConWin:0
visitor:Detroit Pistons:108, home:Dallas Mavericks:95, HomeConWin:6, VisitorConWin:6
visitor:Orlando Magic:90, home:Denver Nuggets:93, HomeConWin:2, VisitorConWin:0
visitor:Indiana Pacers:102, home:Golden State Warriors:117, HomeConWin:4, Visito

visitor:New Orleans Pelicans:92, home:New York Knicks:99, HomeConWin:0, VisitorConWin:1
visitor:Los Angeles Lakers:100, home:Phoenix Suns:115, HomeConWin:2, VisitorConWin:0
visitor:Sacramento Kings:94, home:Portland Trail Blazers:98, HomeConWin:0, VisitorConWin:0
visitor:Philadelphia 76ers:76, home:Washington Wizards:111, HomeConWin:1, VisitorConWin:0
visitor:San Antonio Spurs:109, home:Denver Nuggets:99, HomeConWin:0, VisitorConWin:3
visitor:Oklahoma City Thunder:94, home:Miami Heat:86, HomeConWin:1, VisitorConWin:2
visitor:Indiana Pacers:91, home:Atlanta Hawks:110, HomeConWin:13, VisitorConWin:0
visitor:Miami Heat:76, home:Charlotte Hornets:78, HomeConWin:2, VisitorConWin:0
visitor:Utah Jazz:92, home:Cleveland Cavaliers:106, HomeConWin:3, VisitorConWin:0
visitor:Orlando Magic:118, home:Detroit Pistons:128, HomeConWin:0, VisitorConWin:0
visitor:Houston Rockets:113, home:Golden State Warriors:126, HomeConWin:2, VisitorConWin:1
visitor:Toronto Raptors:86, home:Memphis Grizzlies:92, Home

visitor:Memphis Grizzlies:102, home:Phoenix Suns:101, HomeConWin:0, VisitorConWin:6
visitor:Milwaukee Bucks:82, home:Toronto Raptors:75, HomeConWin:6, VisitorConWin:3
visitor:Charlotte Hornets:92, home:Washington Wizards:88, HomeConWin:0, VisitorConWin:1
visitor:Miami Heat:91, home:Detroit Pistons:108, HomeConWin:1, VisitorConWin:1
visitor:Boston Celtics:108, home:New York Knicks:97, HomeConWin:1, VisitorConWin:0
visitor:Denver Nuggets:98, home:Philadelphia 76ers:105, HomeConWin:0, VisitorConWin:0
visitor:Utah Jazz:102, home:Portland Trail Blazers:103, HomeConWin:0, VisitorConWin:1
visitor:Golden State Warriors:121, home:Sacramento Kings:96, HomeConWin:1, VisitorConWin:1
visitor:Washington Wizards:96, home:Atlanta Hawks:105, HomeConWin:0, VisitorConWin:0
visitor:Denver Nuggets:100, home:Boston Celtics:104, HomeConWin:1, VisitorConWin:0
visitor:Dallas Mavericks:114, home:Golden State Warriors:128, HomeConWin:2, VisitorConWin:3
visitor:Chicago Bulls:90, home:Houston Rockets:101, HomeConW

visitor:Charlotte Hornets:81, home:Dallas Mavericks:92, HomeConWin:1, VisitorConWin:0
visitor:Washington Wizards:89, home:Detroit Pistons:106, HomeConWin:1, VisitorConWin:0
visitor:Golden State Warriors:98, home:Indiana Pacers:104, HomeConWin:2, VisitorConWin:4
visitor:Boston Celtics:111, home:Los Angeles Lakers:118, HomeConWin:0, VisitorConWin:0
visitor:Atlanta Hawks:97, home:Milwaukee Bucks:86, HomeConWin:4, VisitorConWin:0
visitor:Cleveland Cavaliers:101, home:New York Knicks:83, HomeConWin:0, VisitorConWin:1
visitor:Denver Nuggets:94, home:Oklahoma City Thunder:119, HomeConWin:5, VisitorConWin:0
visitor:Philadelphia 76ers:98, home:Orlando Magic:103, HomeConWin:2, VisitorConWin:0
visitor:Memphis Grizzlies:98, home:Portland Trail Blazers:92, HomeConWin:0, VisitorConWin:0
visitor:Milwaukee Bucks:71, home:Chicago Bulls:87, HomeConWin:1, VisitorConWin:0
visitor:Brooklyn Nets:110, home:Denver Nuggets:82, HomeConWin:0, VisitorConWin:1
visitor:Minnesota Timberwolves:102, home:Houston Rocke

visitor:Houston Rockets:114, home:Denver Nuggets:100, HomeConWin:0, VisitorConWin:1
visitor:Sacramento Kings:109, home:Miami Heat:114, HomeConWin:0, VisitorConWin:0
visitor:Washington Wizards:85, home:Milwaukee Bucks:91, HomeConWin:0, VisitorConWin:1
visitor:Portland Trail Blazers:113, home:Minnesota Timberwolves:121, HomeConWin:0, VisitorConWin:5
visitor:Memphis Grizzlies:89, home:New Orleans Pelicans:95, HomeConWin:0, VisitorConWin:2
visitor:Indiana Pacers:92, home:New York Knicks:86, HomeConWin:0, VisitorConWin:4
visitor:Atlanta Hawks:84, home:Philadelphia 76ers:92, HomeConWin:0, VisitorConWin:6
visitor:Utah Jazz:95, home:Brooklyn Nets:88, HomeConWin:0, VisitorConWin:1
visitor:Charlotte Hornets:108, home:Detroit Pistons:101, HomeConWin:0, VisitorConWin:4
visitor:Los Angeles Clippers:98, home:Golden State Warriors:106, HomeConWin:2, VisitorConWin:0
visitor:Dallas Mavericks:100, home:Los Angeles Lakers:93, HomeConWin:0, VisitorConWin:0
visitor:Toronto Raptors:104, home:Oklahoma City T

visitor:Milwaukee Bucks:127, home:Brooklyn Nets:129, HomeConWin:0, VisitorConWin:0
visitor:Toronto Raptors:92, home:Chicago Bulls:108, HomeConWin:1, VisitorConWin:2
visitor:Indiana Pacers:92, home:Cleveland Cavaliers:95, HomeConWin:1, VisitorConWin:0
visitor:Memphis Grizzlies:112, home:Dallas Mavericks:101, HomeConWin:3, VisitorConWin:0
visitor:New Orleans Pelicans:96, home:Golden State Warriors:112, HomeConWin:3, VisitorConWin:0
visitor:Washington Wizards:99, home:Los Angeles Clippers:113, HomeConWin:2, VisitorConWin:5
visitor:Denver Nuggets:91, home:Miami Heat:108, HomeConWin:2, VisitorConWin:0
visitor:Atlanta Hawks:115, home:Oklahoma City Thunder:123, HomeConWin:1, VisitorConWin:0
visitor:Portland Trail Blazers:104, home:Orlando Magic:111, HomeConWin:0, VisitorConWin:0
visitor:New York Knicks:81, home:Philadelphia 76ers:97, HomeConWin:1, VisitorConWin:0
visitor:Charlotte Hornets:91, home:Sacramento Kings:101, HomeConWin:0, VisitorConWin:0
visitor:Boston Celtics:89, home:San Antonio 

visitor:Chicago Bulls:91, home:Milwaukee Bucks:95, HomeConWin:0, VisitorConWin:3
visitor:Toronto Raptors:113, home:Minnesota Timberwolves:99, HomeConWin:0, VisitorConWin:2
visitor:Brooklyn Nets:100, home:New York Knicks:98, HomeConWin:0, VisitorConWin:4
visitor:Dallas Mavericks:135, home:Oklahoma City Thunder:131, HomeConWin:1, VisitorConWin:0
visitor:San Antonio Spurs:103, home:Orlando Magic:91, HomeConWin:0, VisitorConWin:4
visitor:Los Angeles Clippers:126, home:Portland Trail Blazers:122, HomeConWin:4, VisitorConWin:0
visitor:Denver Nuggets:84, home:Utah Jazz:98, HomeConWin:2, VisitorConWin:0
visitor:Philadelphia 76ers:93, home:Washington Wizards:106, HomeConWin:0, VisitorConWin:0
visitor:Miami Heat:88, home:Cleveland Cavaliers:114, HomeConWin:1, VisitorConWin:0
visitor:Houston Rockets:108, home:Dallas Mavericks:101, HomeConWin:1, VisitorConWin:1
visitor:Phoenix Suns:106, home:Golden State Warriors:107, HomeConWin:10, VisitorConWin:0
visitor:Milwaukee Bucks:110, home:Boston Celtics:

visitor:Washington Wizards:108, home:Cleveland Cavaliers:113, HomeConWin:1, VisitorConWin:0
visitor:Portland Trail Blazers:98, home:Dallas Mavericks:114, HomeConWin:0, VisitorConWin:0
visitor:Denver Nuggets:126, home:Golden State Warriors:133, HomeConWin:3, VisitorConWin:0
visitor:Utah Jazz:91, home:Houston Rockets:117, HomeConWin:2, VisitorConWin:2
visitor:Sacramento Kings:122, home:Los Angeles Lakers:99, HomeConWin:0, VisitorConWin:1
visitor:Indiana Pacers:83, home:Memphis Grizzlies:95, HomeConWin:0, VisitorConWin:6
visitor:Boston Celtics:105, home:Milwaukee Bucks:100, HomeConWin:3, VisitorConWin:5
visitor:Oklahoma City Thunder:138, home:Minnesota Timberwolves:113, HomeConWin:0, VisitorConWin:1
visitor:San Antonio Spurs:103, home:New Orleans Pelicans:108, HomeConWin:1, VisitorConWin:11
visitor:Detroit Pistons:112, home:New York Knicks:90, HomeConWin:2, VisitorConWin:0
visitor:Miami Heat:105, home:Philadelphia 76ers:101, HomeConWin:0, VisitorConWin:1
visitor:Charlotte Hornets:87, home

array([False,  True,  True, ..., False,  True, False], dtype=bool)

In [24]:
season_result

Unnamed: 0,Date,StartTime,VisitorTeam,VisitorPts,HomeTeam,HomePts,ScoreType,Overtime,Notes,HomeWin,HomeConWin,VisitorConWin
0,Tue Oct 28 2014,10:30 pm,Houston Rockets,108,Los Angeles Lakers,90,Box Score,,,False,0,0
1,Tue Oct 28 2014,8:00 pm,Orlando Magic,84,New Orleans Pelicans,101,Box Score,,,True,0,0
2,Tue Oct 28 2014,8:00 pm,Dallas Mavericks,100,San Antonio Spurs,101,Box Score,,,True,0,0
3,Wed Oct 29 2014,7:30 pm,Brooklyn Nets,105,Boston Celtics,121,Box Score,,,True,0,0
4,Wed Oct 29 2014,7:00 pm,Milwaukee Bucks,106,Charlotte Hornets,108,Box Score,OT,,True,0,0
5,Wed Oct 29 2014,9:00 pm,Detroit Pistons,79,Denver Nuggets,89,Box Score,,,True,0,0
6,Wed Oct 29 2014,7:00 pm,Philadelphia 76ers,91,Indiana Pacers,103,Box Score,,,True,0,0
7,Wed Oct 29 2014,8:00 pm,Minnesota Timberwolves,101,Memphis Grizzlies,105,Box Score,,,True,0,0
8,Wed Oct 29 2014,7:30 pm,Washington Wizards,95,Miami Heat,107,Box Score,,,True,0,0
9,Wed Oct 29 2014,8:00 pm,Chicago Bulls,104,New York Knicks,80,Box Score,,,False,0,0


In [25]:
# 의사결정트리 - 예측에 적용
# 
from sklearn.tree import DecisionTreeClassifier
from sklearn.cross_validation import cross_val_score


clf = DecisionTreeClassifier(random_state=7)
x_test = season_result[["HomeConWin", "VisitorConWin"]].values
scores = cross_val_score(clf, x_test, y_test, scoring='accuracy')

# 정확도 : 59%,  표준편차 : 0.01%로 승리 예측 
print("Accuracy: {0:.1f}% (+/- {1:.2f}%)".format(np.mean(scores) * 100, np.std(scores)))


Accuracy: 59.0% (+/- 0.01%)


In [26]:
# 팀별 보유 선수의 PER지수를 특성 값으로 결정 트리 학습
# 팀관련 정보를 저장한 standing_result에 신규 컬럼 'PER_Sum'을 생성.
# 팀별로 PER 지수 상위 10명의 PER 지수 합계를 저장하겠다.

standing_result["PER_Sum"] = 0   # 신규 컬럼 생성
for idx, row in standing_result.iterrows():
    print("idx ={0} row={1} ".format(idx, row))
    team = row["Team"]
    row["PER_Sum"] = np.sum(team_per[team][:10])
    standing_result.ix[idx] = row

idx =0 row=Rk                             1
Team       Golden State Warriors
Overall                    67-15
Home                        39-2
Road                       28-13
E                           25-5
W                          42-10
A                            9-1
C                            7-3
SE                           9-1
NW                          15-3
P                           13-3
SW                          14-4
Pre                         42-9
Post                        25-6
≤3                           5-3
≥10                         45-9
Oct                          1-0
Nov                         13-2
Dec                         11-3
Jan                         12-3
Feb                          8-3
Mar                         16-2
Apr                          6-2
PER_Sum                        0
Name: 0, dtype: object 
idx =1 row=Rk                     2
Team       Atlanta Hawks
Overall            60-22
Home                35-6
Road               25-16
E   

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.


idx =3 row=Rk                            4
Team       Los Angeles Clippers
Overall                   56-26
Home                      30-11
Road                      26-15
E                         19-11
W                         37-15
A                           7-3
C                           6-4
SE                          6-4
NW                         15-3
P                          12-4
SW                         10-8
Pre                       35-19
Post                       21-7
≤3                          3-5
≥10                        33-9
Oct                         2-0
Nov                         9-5
Dec                        11-6
Jan                        11-4
Feb                         5-6
Mar                        11-5
Apr                         7-0
PER_Sum                       0
Name: 3, dtype: object 
idx =4 row=Rk                         5
Team       Memphis Grizzlies
Overall                55-27
Home                   31-10
Road                   24-17
E        

idx =18 row=Rk                     19
Team       Indiana Pacers
Overall             38-44
Home                23-18
Road                15-26
E                   28-24
W                   10-20
A                     9-9
C                     8-8
SE                   11-7
NW                    4-6
P                     2-8
SW                    4-6
Pre                 21-33
Post                17-11
≤3                   4-12
≥10                 19-20
Oct                   1-1
Nov                   6-9
Dec                  5-11
Jan                  5-11
Feb                   7-2
Mar                   8-8
Apr                   6-2
PER_Sum                 0
Name: 18, dtype: object 
idx =19 row=Rk                20
Team       Utah Jazz
Overall        38-44
Home           21-20
Road           17-24
E              15-15
W              23-29
A                6-4
C                6-4
SE               3-7
NW               9-7
P               7-11
SW              7-11
Pre            19-34
Post   

In [None]:
# 지난 시즌 팀별 순위 -> PER_Sum은 추가된 PER 합계 지수 
standing_result

In [27]:
# standing_result[standing_result["Team"] == home]["PER_Sum"].values[0]
# home팀의 결과
standing_result[standing_result["Team"] == "Minnesota Timberwolves"]["PER_Sum"].values[0]

149.91999999999999

In [28]:
# 
season_result.ix[1:2]

Unnamed: 0,Date,StartTime,VisitorTeam,VisitorPts,HomeTeam,HomePts,ScoreType,Overtime,Notes,HomeWin,HomeConWin,VisitorConWin
1,Tue Oct 28 2014,8:00 pm,Orlando Magic,84,New Orleans Pelicans,101,Box Score,,,True,0,0
2,Tue Oct 28 2014,8:00 pm,Dallas Mavericks,100,San Antonio Spurs,101,Box Score,,,True,0,0


In [35]:
for idx, row in season_result.iterrows():
    print(idx)
    #print("++++++++++++++++++++++")
    #print(row)
    #print("++++++++++++++++++++++")
    row["HomePERHigh"] = True
    # print(row)
    print("++++++++++++++++++++++")
    # season_result.ix[idx] = row
    
season_result.ix[1] = row
season_result.ix[1]

0
++++++++++++++++++++++
1
++++++++++++++++++++++
2
++++++++++++++++++++++
3
++++++++++++++++++++++
4
++++++++++++++++++++++
5
++++++++++++++++++++++
6
++++++++++++++++++++++
7
++++++++++++++++++++++
8
++++++++++++++++++++++
9
++++++++++++++++++++++
10
++++++++++++++++++++++
11
++++++++++++++++++++++
12
++++++++++++++++++++++
13
++++++++++++++++++++++
14
++++++++++++++++++++++
15
++++++++++++++++++++++
16
++++++++++++++++++++++
17
++++++++++++++++++++++
18
++++++++++++++++++++++
19
++++++++++++++++++++++
20
++++++++++++++++++++++
21
++++++++++++++++++++++
22
++++++++++++++++++++++
23
++++++++++++++++++++++
24
++++++++++++++++++++++
25
++++++++++++++++++++++
26
++++++++++++++++++++++
27
++++++++++++++++++++++
28
++++++++++++++++++++++
29
++++++++++++++++++++++
30
++++++++++++++++++++++
31
++++++++++++++++++++++
32
++++++++++++++++++++++
33
++++++++++++++++++++++
34
++++++++++++++++++++++
35
++++++++++++++++++++++
36
++++++++++++++++++++++
37
++++++++++++++++++++++
38
+++++++++++++++++++

631
++++++++++++++++++++++
632
++++++++++++++++++++++
633
++++++++++++++++++++++
634
++++++++++++++++++++++
635
++++++++++++++++++++++
636
++++++++++++++++++++++
637
++++++++++++++++++++++
638
++++++++++++++++++++++
639
++++++++++++++++++++++
640
++++++++++++++++++++++
641
++++++++++++++++++++++
642
++++++++++++++++++++++
643
++++++++++++++++++++++
644
++++++++++++++++++++++
645
++++++++++++++++++++++
646
++++++++++++++++++++++
647
++++++++++++++++++++++
648
++++++++++++++++++++++
649
++++++++++++++++++++++
650
++++++++++++++++++++++
651
++++++++++++++++++++++
652
++++++++++++++++++++++
653
++++++++++++++++++++++
654
++++++++++++++++++++++
655
++++++++++++++++++++++
656
++++++++++++++++++++++
657
++++++++++++++++++++++
658
++++++++++++++++++++++
659
++++++++++++++++++++++
660
++++++++++++++++++++++
661
++++++++++++++++++++++
662
++++++++++++++++++++++
663
++++++++++++++++++++++
664
++++++++++++++++++++++
665
++++++++++++++++++++++
666
++++++++++++++++++++++
667
++++++++++++++++++++++
6

++++++++++++++++++++++
1040
++++++++++++++++++++++
1041
++++++++++++++++++++++
1042
++++++++++++++++++++++
1043
++++++++++++++++++++++
1044
++++++++++++++++++++++
1045
++++++++++++++++++++++
1046
++++++++++++++++++++++
1047
++++++++++++++++++++++
1048
++++++++++++++++++++++
1049
++++++++++++++++++++++
1050
++++++++++++++++++++++
1051
++++++++++++++++++++++
1052
++++++++++++++++++++++
1053
++++++++++++++++++++++
1054
++++++++++++++++++++++
1055
++++++++++++++++++++++
1056
++++++++++++++++++++++
1057
++++++++++++++++++++++
1058
++++++++++++++++++++++
1059
++++++++++++++++++++++
1060
++++++++++++++++++++++
1061
++++++++++++++++++++++
1062
++++++++++++++++++++++
1063
++++++++++++++++++++++
1064
++++++++++++++++++++++
1065
++++++++++++++++++++++
1066
++++++++++++++++++++++
1067
++++++++++++++++++++++
1068
++++++++++++++++++++++
1069
++++++++++++++++++++++
1070
++++++++++++++++++++++
1071
++++++++++++++++++++++
1072
++++++++++++++++++++++
1073
++++++++++++++++++++++
1074
++++++++++++++++++++

Date                   Tue Jun 16 2015
StartTime                      9:00 pm
VisitorTeam      Golden State Warriors
VisitorPts                         105
HomeTeam           Cleveland Cavaliers
HomePts                             97
ScoreType                    Box Score
Overtime                           NaN
Notes                              NaN
HomeWin                          False
HomeConWin                           0
VisitorConWin                        2
Name: 1, dtype: object

In [36]:
# 신규 컬럼 생성
# PER 지수를 고려한다.10명의 PER 지수 합계가 높은 경우를 1, 그렇지 않은 경우를 0으로 설정.
# 홈팀과 연전연승한 결과 컬럼 : HomeConWin, VisitorConWin
# .values[0] 뭔지 모르게 
season_result["HomePERHigh"]=0             # HomePERHigh 신규 컬럼 생성 
for idx, row in season_result.iterrows():  
    home = row["HomeTeam"]
    visitor=row["VisitorTeam"]
    
    # home_per : 홈팀의 per 지수, visitor_per : 방문팀의 per 합
    home_per = standing_result[standing_result["Team"] == home]["PER_Sum"].values[0]
    visitor_per = standing_result[standing_result["Team"] == visitor]["PER_Sum"].values[0]
    
    # 이를 비교해서 "HomePERHigh" 칼럼 추가히기 
    row["HomePERHigh"] = int(home_per > visitor_per)
    season_result.ix[idx] = row
season_result

Unnamed: 0,Date,StartTime,VisitorTeam,VisitorPts,HomeTeam,HomePts,ScoreType,Overtime,Notes,HomeWin,HomeConWin,VisitorConWin,HomePERHigh
0,Tue Oct 28 2014,10:30 pm,Houston Rockets,108,Los Angeles Lakers,90,Box Score,,,False,0,0,0
1,Tue Jun 16 2015,9:00 pm,Golden State Warriors,105,Cleveland Cavaliers,97,Box Score,,,False,0,2,0
2,Tue Oct 28 2014,8:00 pm,Dallas Mavericks,100,San Antonio Spurs,101,Box Score,,,True,0,0,1
3,Wed Oct 29 2014,7:30 pm,Brooklyn Nets,105,Boston Celtics,121,Box Score,,,True,0,0,1
4,Wed Oct 29 2014,7:00 pm,Milwaukee Bucks,106,Charlotte Hornets,108,Box Score,OT,,True,0,0,1
5,Wed Oct 29 2014,9:00 pm,Detroit Pistons,79,Denver Nuggets,89,Box Score,,,True,0,0,1
6,Wed Oct 29 2014,7:00 pm,Philadelphia 76ers,91,Indiana Pacers,103,Box Score,,,True,0,0,1
7,Wed Oct 29 2014,8:00 pm,Minnesota Timberwolves,101,Memphis Grizzlies,105,Box Score,,,True,0,0,1
8,Wed Oct 29 2014,7:30 pm,Washington Wizards,95,Miami Heat,107,Box Score,,,True,0,0,1
9,Wed Oct 29 2014,8:00 pm,Chicago Bulls,104,New York Knicks,80,Box Score,,,False,0,0,0


In [37]:
# 59%의 정확도로 팀별 연승을 고려한 것과 비슷한 결과이다.
x_test = season_result[["HomePERHigh"]].values
clf = DecisionTreeClassifier(random_state=7)
scores = cross_val_score(clf, x_test, y_test, scoring='accuracy')
print("Accuracy: {0:.1f}% (+/- {1:.2f}%".format(np.mean(scores) * 100, np.std(scores)))
                        

Accuracy: 59.0% (+/- 0.03%


In [38]:
# 팀의 연승 기록과 PER 지수를 함께 고려, 약 0.1% 상승한 정확도
x_test = season_result[["HomeConWin", "VisitorConWin", "HomePERHigh"]].values
clf = DecisionTreeClassifier(random_state=7)
scores = cross_val_score(clf, x_test, y_test, scoring='accuracy')
scores

array([ 0.62100457,  0.56292906,  0.60321101])

In [32]:
print("정확도 : {0:.1f}% (+/- {1:.2f}%)".format(np.mean(scores) * 100, np.std(scores)))

정확도 : 59.1% (+/- 0.02%)


## 매 경기마다 각 팀의 이름을 특성값으로 사용해 결정 트리 학습하기

In [40]:
# NBA는 총 30개의 팀이 있다. 각 팀에 0부터 29까지의 숫자를 부여할 수 있다.
# scikit-learn 라이브러리에서는 LabelEncoder를 이용해 다음과 같이 팀 이름을 문자열로 변환한다.
# 그리고 다시 팀이름을 숫자 값으로 변환.
from sklearn.preprocessing import LabelEncoder
name_encoding = LabelEncoder()
name_encoding.fit(season_result["HomeTeam"].values)

LabelEncoder()

In [44]:
# 아래 두 개의 팀은 챔피언 결정전이다. 
# 우리는 이를 숫자 9,5로 나타낼 수 있다.
print(name_encoding.transform(["Golden State Warriors", "Cleveland Cavaliers"])) #팀 이름 숫자 변경
print(name_encoding.inverse_transform([9,5])) # 팀 번호에서 팀으로 변경하기
print(name_encoding.transform(["Los Angeles Clippers"]))

[9 5]
['Golden State Warriors' 'Cleveland Cavaliers']
[12]


In [45]:
home_teams = name_encoding.transform(season_result["HomeTeam"].values)
visitor_teams=name_encoding.transform(season_result["VisitorTeam"].values)
home_teams

array([13,  5, 26, ...,  5,  9,  5], dtype=int64)

In [46]:
# 숫자를 이용해서 표현한 경기 일정을 
# team_match에 추가하겠다.
team_match=np.vstack([home_teams, visitor_teams]).T
team_match

array([[13, 10],
       [ 5,  9],
       [26,  6],
       ..., 
       [ 5,  9],
       [ 9,  5],
       [ 5,  9]], dtype=int64)

In [47]:
# DecisionTreeClassfier는 3과 4의 차이는 매우 작다고 판단,
# 1과 20의 차이는 매우 크다고 판단 이는 팀의 이름이기때문에
# 따라서 이는 크다 작다 판단하면 올바른 판단이 아니다.
# 이를 위해 OneHotEncoder를 이용해 숫자 쌍 정보를 카테고리 범주 형태로 변경 가능.
# 확인 결과 59.6%로 팀의 성적 향상
from sklearn.preprocessing import OneHotEncoder
onehot = OneHotEncoder()
x_test = onehot.fit_transform(team_match).todense()
print(x_test)
clf = DecisionTreeClassifier(random_state=7)
scores = cross_val_score(clf, x_test, y_test, scoring="accuracy")
print("Accuracy: {0:.1f}% (+/- {1:.2f}%)".format(np.mean(scores) * 100, np.std(scores)))

[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]
Accuracy: 59.4% (+/- 0.03%)


### 지난 시즌의 팀별 순위 고려 결정 트리 학습시키기

In [48]:
season_result["HomeRankHigh"] = 0   # 신규 컬럼 생성
for idx, row in season_result.iterrows():
    home = row["HomeTeam"]
    visitor = row["VisitorTeam"]
    
    home_rank=standing_result[standing_result["Team"] == home]["Rk"].values[0]
    visitor_rank=standing_result[standing_result["Team"] == visitor]["Rk"].values[0]
    row["HomeRankHigh"] = int(home_rank>visitor_rank)
    season_result.ix[idx] = row

In [49]:
# 지난 시즌의 순위 고려시, 결정 트리 모델 검증 결과 69.9%의 정확도 승리팀 맞춤.
x_test = season_result[["HomeRankHigh"]].values
clf = DecisionTreeClassifier(random_state=7)
scores = cross_val_score(clf, x_test, y_test, scoring="accuracy")
print("Accuracy: {0:.1f}% (+/- {1:.2f}%)".format(np.mean(scores) * 100, np.std(scores)))

Accuracy: 69.9% (+/- 0.03%)


In [46]:
# 마지막으로 지금까지 조사했던 팀별 순위, PER, 연승 기록을 모두 같이 고려시 66.4%의 정확도 승패 예측
x_test = season_result[["HomeRankHigh", "HomePERHigh", "HomeWinStreak", "VisitorWinStreak"]].values
clf = DecisionTreeClassifier(random_state=7)
scores = cross_val_score(clf, x_test, y_test, scoring="accuracy")
print("Accuracy: {0:.1f}% (+/- {1:.2f}%)".format(np.mean(scores) * 100, np.std(scores)))

Accuracy: 66.4% (+/- 0.03%)


In [None]:
# 이외에도 서포트 벡터 머신, 회귀분석, 신경망, 딥러닝, 등의 다양한