# 제 6 장 __지도학습의 이해__
___

## __사전설정__
---

(1) 저장소 데이터 가져오기

In [None]:
!rm -rf /content/BizDataAnalysis/

In [None]:
!git clone https://github.com/BizStat/BizDataAnalysis.git


(2) matplotlib 환경에서 한글 사용

In [None]:
!sudo apt-get install -y fonts-nanum
!sudo fc-cache -f -v
!rm ~/.cache/matplotlib -rf

런타임 메뉴에서 '세션 다시 시작' 후 다음의 명령문 실행

In [None]:
from matplotlib import rc
rc('font', family='NanumGothicCoding')
rc('axes', unicode_minus=False)

(3) 구글 드라이브 연결

In [None]:
from google.colab import drive
drive.mount('/content/gdrive/')

___

## 6.2 __예측모형의 이해와 활용__

### (1) Boston house-price 데이터

* 데이터 가져오기

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
boston = pd.read_table('/content/BizDataAnalysis/DATA/Boston.txt', sep='\s+')

In [None]:
boston

* 데이터 살펴보기

In [None]:
boston.describe()

In [None]:
boston.hist(bins=20)

In [None]:
import seaborn as sns

In [None]:
sns.pairplot(data=boston)

* 데이터 분할 : 학습데이터와 평가데이터

In [None]:
boston.iloc[:,[-1]]

In [None]:
boston.iloc[:,-1]

In [None]:
boston.iloc[:,0:13]

In [None]:
X = boston.iloc[:,0:13]
y = boston.iloc[:,[-1]]

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y)

In [None]:
y_train.mean()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=3)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=3,train_size=0.7)

In [None]:
X_train

---
__선형회귀 알고리즘 설명__

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
boston['LSTAT']

In [None]:
import numpy as np

In [None]:
tmp = np.arange(10)
tmp

In [None]:
tmp.reshape(2,5)

In [None]:
tmp.reshape(10,1)

In [None]:
tmp.reshape(-1,1)

In [None]:
indVar = np.array(boston['LSTAT']).reshape(-1,1)
depVar = np.array(boston['MEDV'])

In [None]:
plt.scatter(indVar,depVar,color="darkgrey")
plt.title('지역별 하위계층비율(LSTAT)과 주택가격(MEDV)의 산점도')
plt.xlabel('LSAT')
plt.ylabel('MEDV')
plt.show()

In [None]:
SLR = LinearRegression(fit_intercept=True) # 절편 포함
SLR.fit(indVar,depVar)
fitVal = SLR.predict(indVar)

In [None]:
SLR.score(indVar,depVar)

In [None]:
a = np.array([[10]])
SLR.predict(a)

In [None]:
plt.scatter(indVar[:,0],depVar,color="darkgrey",label='실제값')
plt.plot(indVar[:,0],fitVal,color='black',label='예측값')
plt.title('지역별 하위계층비율(LSTAT)과 주택가격(MEDV)의 산점도와 단순선형회귀적합 결과')
plt.xlabel('LSAT')
plt.ylabel('MEDV')
plt.legend()
plt.show()

In [None]:
SLR = LinearRegression(fit_intercept=False) # 절편 = 0
SLR.fit(indVar,depVar)
fitVal1 = SLR.predict(indVar)

In [None]:
plt.scatter(indVar[:,0],depVar,color="darkgrey",label='실제값')
plt.plot(indVar[:,0],fitVal,color='black',label='절편을 포함한 예측값')
plt.plot(indVar[:,0],fitVal1,color='darkorange',label='절편이 0인 경우의 예측값')
# plt.title('지역별 하위계층비율(LSTAT)과 주택가격(MEDV)의 산점도와 단순선형회귀적합 결과')
plt.xlabel('LSAT')
plt.ylabel('MEDV')
plt.legend()
plt.show()

In [None]:
SLR.intercept_ # 절편의 추정결과

In [None]:
SLR.coef_

* 선형회귀모형 학습 및 평가

In [None]:
LR = LinearRegression()
LR.fit(X_train, y_train)

In [None]:
LR.intercept_

In [None]:
LR.coef_

In [None]:
LR.rank_

In [None]:
LR.singular_

In [None]:
LR.feature_names_in_

In [None]:
len(LR.feature_names_in_)

In [None]:
enumerate(LR.feature_names_in_)

In [None]:
for i in range(0,len(LR.feature_names_in_)+1):
  if i == 0 :
    print(f'절편 : {LR.intercept_[0]}')
  else :
    print(f'{LR.feature_names_in_[i-1]} : {LR.coef_[0,i-1]}')

In [None]:
LR.predict(X_train)

In [None]:
LR.predict(X_test)

In [None]:
LR.score(X_train, y_train)

In [None]:
LR.score(X_test, y_test)

---
__kNN 회귀 알고리즘 설명__

In [None]:
from sklearn.neighbors import KNeighborsRegressor

In [None]:
SKR = KNeighborsRegressor(n_neighbors=5)

In [None]:
SKR = KNeighborsRegressor(n_neighbors=5,weights='uniform')

In [None]:
SKR = KNeighborsRegressor(n_neighbors=30,weights='uniform')

In [None]:
SKR = KNeighborsRegressor(n_neighbors=30,weights='distance')

In [None]:
SKR.fit(indVar,depVar)

In [None]:
indVar.min()

In [None]:
indVar.max()

In [None]:
term = np.linspace(indVar.min(), indVar.max(), 100)[:, np.newaxis]
term

In [None]:
fitVal = SKR.predict(term)
#fitVal = SKR.predict(indVar)

In [None]:
plt.scatter(indVar[:,0],depVar,color="darkgrey",label='실제값')
plt.plot(term,fitVal,color='black',label='예측값')
#plt.plot(indVar[:,0],fitVal,color='black',label='예측값')
# plt.title('지역별 하위계층비율(LSTAT)과 주택가격(MEDV)의 산점도와 kNN회귀 적합 결과')
plt.xlabel('LSAT')
plt.ylabel('MEDV')
plt.legend()
plt.show()

In [None]:
for k in range(1, 100):
    SKR = KNeighborsRegressor(n_neighbors=k)
    SKR.fit(indVar,depVar)
    print(f'Score for k={k}: {SKR.score(indVar,depVar)}')

In [None]:
optVal = np.zeros((100,2))

In [None]:
for k in range(1, 101):
    SKR = KNeighborsRegressor(n_neighbors=k)
    SKR.fit(indVar,depVar)
    optVal[k-1,0] = k
    optVal[k-1,1] = SKR.score(indVar,depVar)

In [None]:
plt.plot(optVal[:,0],optVal[:,1],color='black')
# plt.title('n_neighbors 값의 변화에 따른 적합결과 변화')
plt.xlabel('n_neighbors')
plt.ylabel('R^2')
plt.show()

* kNN 회귀모형 학습 및 평가

In [None]:
KR = KNeighborsRegressor(n_neighbors=5)
KR.fit(X_train,y_train)

In [None]:
for k in range(1, 20):
    KR = KNeighborsRegressor(n_neighbors=k)
    KR.fit(X_train, y_train)
    print(f'Score for k={k}: {KR.score(X_train, y_train)}')

In [None]:
niter = 100
# wgt_nm = 'distance'
wgt_nm = 'uniform'
optVal = np.zeros((niter,3))
for k in range(1, niter+1):
    KR = KNeighborsRegressor(n_neighbors=k,weights=wgt_nm)
    KR.fit(X_train, y_train)
    optVal[k-1,0] = k
    optVal[k-1,1] = KR.score(X_train, y_train)
    optVal[k-1,2] = KR.score(X_test, y_test)

In [None]:
plt.plot(optVal[:,0],optVal[:,1],color='navy',label='학습데이터')
plt.plot(optVal[:,0],optVal[:,2],color='darkorange',label='평가데이터')
plt.title('n_neighbors 값의 변화에 따른 적합결과 변화')
plt.xlabel('n_neighbors')
plt.ylabel('R^2')
plt.legend()
plt.show()

In [None]:
optVal[optVal[:,2] == max(optVal[:,2]),]

In [None]:
KR.predict(X_train)

In [None]:
KR.score(X_train, y_train)

In [None]:
KR.score(X_test, y_test)

In [None]:
for k in range(1, 20):
    KR = KNeighborsRegressor(n_neighbors=k)
    KR.fit(X_train, y_train)
    print(f'Score for k={k}: {KR.score(X_test, y_test)}')

---
__회귀나무 알고리즘 설명__

In [None]:
# @title 회귀나무 분석에 필요한 기본 라이브러리
from sklearn.tree import DecisionTreeRegressor
from sklearn import tree

In [None]:
RT = DecisionTreeRegressor(max_depth=5)
RT.fit(indVar,depVar)
fitVal = RT.predict(indVar)
RT.score(indVar, depVar)

In [None]:
term = np.linspace(indVar.min(), indVar.max(), 100)[:, np.newaxis]

In [None]:
fitVal = RT.predict(term)

In [None]:
plt.scatter(indVar[:,0],depVar,color="darkgrey",label='실제값')
plt.plot(term,fitVal,color='black',label='예측값')
# plt.title('지역별 하위계층비율(LSTAT)과 주택가격(MEDV)의 산점도와 회귀나무 적합 결과')
plt.xlabel('LSAT')
plt.ylabel('MEDV')
plt.legend()
plt.show()

In [None]:
# RT = DecisionTreeRegressor()
# RT = DecisionTreeRegressor(max_depth=None,min_samples_split=0.1)
# RT = DecisionTreeRegressor(max_depth=None,min_samples_split=0.1)
RT = DecisionTreeRegressor(max_depth=None,min_samples_leaf=5)
# RT = DecisionTreeRegressor(max_depth=None,min_samples_split=0.1,min_samples_leaf=5)
RT.fit(indVar,depVar)
fitVal = RT.predict(term)
# RT.tree_.value

In [None]:
tree.plot_tree(RT)
plt.show()

In [None]:
plt.scatter(indVar[:,0],depVar, color="grey", label="실제값")
plt.plot(term,fitVal, color="red", label="예측값")
plt.show()

In [None]:
RT.score(indVar,depVar)

In [None]:
plt.figure(figsize=(8,8))
for i in range(1,7):
    RT = DecisionTreeRegressor(max_depth=i)
    fitVal = RT.fit(indVar,depVar).predict(term)
    plt.subplot(3, 2, i)
    plt.scatter(indVar[:,0],depVar, color="grey", label="실제값")
    plt.plot(term,fitVal, color="red", label="예측값")
    plt.axis("tight")
    plt.legend()
    plt.title("max_depth = {}".format(i))

plt.tight_layout()
plt.show()

In [None]:
for k in range(1, 20):
    RT = DecisionTreeRegressor(max_depth=k)
    RT.fit(indVar,depVar)
    print(f'Score for k={k}: {RT.score(indVar,depVar)}')

In [None]:
plt.figure(figsize=(8,8))
for i in range(1,7):
    RT = DecisionTreeRegressor(max_depth=i)
    fitVal = RT.fit(indVar,depVar).predict(term)
    plt.subplot(3, 2, i)
    plt.scatter(indVar[:,0],depVar, color="grey", label="실제값")
    plt.plot(term,fitVal, color="red", label="예측값")
    plt.axis("tight")
    plt.legend()
    plt.title("max_depth = {}".format(i))

plt.tight_layout()
plt.show()

In [None]:
for k in range(2, 100):
    RT = DecisionTreeRegressor(min_samples_split=k)
    RT.fit(indVar,depVar)
    print(f'Score for k={k}: {RT.score(indVar,depVar)}')

In [None]:
optVal = np.zeros((99,2))
for k in range(2, 101):
    RT = DecisionTreeRegressor(min_samples_split=k)
    RT.fit(indVar,depVar)
    optVal[k-2,0] = k
    optVal[k-2,1] = RT.score(indVar,depVar)

In [None]:
plt.plot(optVal[:,0],optVal[:,1],color='black')
# plt.title('min_samples_split 값의 변화에 따른 적합결과 변화')
plt.xlabel('min_samples_split')
plt.ylabel('R^2')
plt.show()

* 회귀나무 학습 및 평가

In [None]:
from sklearn.tree import DecisionTreeRegressor

In [None]:
# @title 기본값을 이용한 회귀나무의 적합 결과
RT = DecisionTreeRegressor(random_state=3)
RT.fit(X_train, y_train)

In [None]:
RT.score(X_train, y_train)

In [None]:
RT.score(X_test, y_test)

In [None]:
# @title 최적모형의 적합력
print(f"- 학습데이터 : {RT.score(X_train, y_train)} \n- 평가데이터 : {RT.score(X_test, y_test)}")

In [None]:
# @title 최적모형의 모습
tree.plot_tree(RT)
plt.show()

In [None]:
# @title 회귀나무에 사용된 독립변수의 중요도
plt.bar(RT.feature_names_in_,RT.feature_importances_)
plt.show()

In [None]:
for i, col in enumerate(RT.feature_names_in_):
    print(f'{col} 중요도 : {RT.feature_importances_[i]}')

In [None]:
# @title max_depth 변화에 따른 회귀나무의 성능 비교
niter = 20
optVal = np.zeros((niter,3))
for k in range(2, niter+2):
    RT = DecisionTreeRegressor(max_depth=k,random_state=3)
    RT.fit(X_train, y_train)
    optVal[k-2,0] = k
    optVal[k-2,1] = RT.score(X_train, y_train)
    optVal[k-2,2] = RT.score(X_test, y_test)

plt.plot(optVal[:,0],optVal[:,1],color='navy',label='학습데이터')
plt.plot(optVal[:,0],optVal[:,2],color='darkorange',label='평가데이터')
plt.title('min_samples_leaf 값의 변화에 따른 적합결과 변화')
plt.xlabel('max_depth')
plt.ylabel('R^2')
plt.legend()
plt.show()

In [None]:
optVal[optVal[:,2] == max(optVal[:,2]),]

In [None]:
# @title 최적모형의 적합력
RT = DecisionTreeRegressor(max_depth=8,random_state=3)
RT.fit(X_train, y_train)
print(f"- 학습데이터 : {RT.score(X_train, y_train)} \n- 평가데이터 : {RT.score(X_test, y_test)}")

In [None]:
# @title 최적모형의 모습
tree.plot_tree(RT)
plt.show()

In [None]:
# @title 회귀나무에 사용된 독립변수의 중요도
plt.bar(RT.feature_names_in_,RT.feature_importances_)
plt.show()

In [None]:
# @title min_samples_leaf 변화에 따른 회귀나무의 성능 비교
niter = 50
optVal = np.zeros((niter,3))
for k in range(2, niter+2):
    RT = DecisionTreeRegressor(min_samples_leaf=k,random_state=3)
    RT.fit(X_train, y_train)
    optVal[k-2,0] = k
    optVal[k-2,1] = RT.score(X_train, y_train)
    optVal[k-2,2] = RT.score(X_test, y_test)

plt.plot(optVal[:,0],optVal[:,1],color='navy',label='학습데이터')
plt.plot(optVal[:,0],optVal[:,2],color='darkorange',label='평가데이터')
plt.title('min_samples_leaf 값의 변화에 따른 적합결과 변화')
plt.xlabel('min_samples_leaf')
plt.ylabel('R^2')
plt.legend()
plt.show()

In [None]:
optVal[optVal[:,2] == max(optVal[:,2]),]

In [None]:
# @title 최적모형의 적합력
RT = DecisionTreeRegressor(min_samples_leaf=4,random_state=3)
RT.fit(X_train, y_train)
print(f"- 학습데이터 : {RT.score(X_train, y_train)} \n- 평가데이터 : {RT.score(X_test, y_test)}")

In [None]:
# @title 최적모형의 모습
tree.plot_tree(RT)
plt.show()

In [None]:
# @title 회귀나무에 사용된 독립변수의 중요도
plt.bar(RT.feature_names_in_,RT.feature_importances_)
plt.show()

In [None]:
# @title min_samples_split 변화에 따른 회귀나무의 성능 비교
niter = 50
optVal = np.zeros((niter,3))
for k in range(2, niter+2):
    RT = DecisionTreeRegressor(min_samples_split=k,random_state=3)
    RT.fit(X_train, y_train)
    optVal[k-2,0] = k
    optVal[k-2,1] = RT.score(X_train, y_train)
    optVal[k-2,2] = RT.score(X_test, y_test)

plt.plot(optVal[:,0],optVal[:,1],color='navy',label='학습데이터')
plt.plot(optVal[:,0],optVal[:,2],color='darkorange',label='평가데이터')
plt.title('min_samples_split 값의 변화에 따른 적합결과 변화')
plt.xlabel('min_samples_split')
plt.ylabel('R^2')
plt.legend()
plt.show()

In [None]:
optVal[optVal[:,2] == max(optVal[:,2]),]

In [None]:
# @title 최적모형의 적합력
RT = DecisionTreeRegressor(min_samples_split=21,random_state=3)
RT.fit(X_train, y_train)
print(f"- 학습데이터 : {RT.score(X_train, y_train)} \n- 평가데이터 : {RT.score(X_test, y_test)}")

In [None]:
# @title 최적모형의 모습
tree.plot_tree(RT)
plt.show()

In [None]:
# @title 회귀나무에 사용된 독립변수의 중요도
plt.bar(RT.feature_names_in_,RT.feature_importances_)
plt.show()

---
### (2) 신용카드 채무 데이터

* 새롭게 시작할 경우 사용해야 할 라이브러리 가져오기

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn import tree

In [None]:
# @title 신용카드 채무 데이터 가져오기
balance = pd.read_csv('/content/BizDataAnalysis/DATA/Balance.csv')

* 데이터 탐색

In [None]:
balance.head()

In [None]:
balance.describe()

In [None]:
balance.groupby(['Gender','Student','Married','Ethnicity'])['Balance'].agg('mean')

In [None]:
balance.groupby(['Gender','Student','Married','Ethnicity'])['Balance'].agg(**{'Mean':'mean'})

In [None]:
balance.groupby(['Gender','Student','Married','Ethnicity'])['Balance'].agg(**{'N':'count','Mean':'mean','STD':'std'})

* 데이터의 분리

In [None]:
X = balance.iloc[:,1:-1]
y = balance.iloc[:,[-1]]

In [None]:
# @title 가변수 만들기
X = pd.get_dummies(X,drop_first=True,dtype=int)
# X = pd.get_dummies(X,dtype=int)

In [None]:
X.head()

In [None]:
# @markdown 난수초기값을 3으로 설정하고 전체의 70%를 무작위 추출하여 학습데이터 사용
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=3,train_size=0.7)

* 선형회귀모형

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
LR = LinearRegression()
LR.fit(X_train,y_train)

In [None]:
# @title 모형의 적합력
# print(f"- 학습데이터 : {LR.score(X_train, y_train)} \n- 평가데이터 : {LR.score(X_test, y_test)}")
print(f"- 학습데이터 : {np.round(LR.score(X_train, y_train),3)} \n- 평가데이터 : {np.round(LR.score(X_test, y_test),3)}")

In [None]:
for i in range(0,len(LR.feature_names_in_)+1):
  if i == 0 :
    print(f'절편 : {np.round(LR.intercept_[0],3)}')
  else :
    print(f'{LR.feature_names_in_[i-1]} : {np.round(LR.coef_[0,i-1],3)}')

* kNN 회귀

In [None]:
from sklearn.neighbors import KNeighborsRegressor

In [None]:
# @title kNN 회귀의 최적 모형 탐색
niter = 100
wgt_nm = 'uniform'
optVal = np.zeros((niter,3))
for k in range(1, niter+1):
    KR = KNeighborsRegressor(n_neighbors=k,weights=wgt_nm)
    KR.fit(X_train, y_train)
    optVal[k-1,0] = k
    optVal[k-1,1] = KR.score(X_train, y_train)
    optVal[k-1,2] = KR.score(X_test, y_test)
plt.plot(optVal[:,0],optVal[:,1],color='navy',label='학습데이터')
plt.plot(optVal[:,0],optVal[:,2],color='darkorange',label='평가데이터')
plt.title('n_neighbors 값의 변화에 따른 적합결과 변화')
plt.xlabel('n_neighbors')
plt.ylabel('R^2')
plt.legend()
plt.show()

In [None]:
# @title 최적모형
optVal[optVal[:,2] == max(optVal[:,2]),]

* 회귀나무

In [None]:
from sklearn.tree import DecisionTreeRegressor

In [None]:
# @title 기본값을 이용한 회귀나무의 적합 결과
RT = DecisionTreeRegressor(random_state=3)
RT.fit(X_train, y_train)

In [None]:
# @title 모형의 적합력
print(f"- 학습데이터 : {RT.score(X_train, y_train)} \n- 평가데이터 : {RT.score(X_test, y_test)}")

In [None]:
tree.plot_tree(RT)
plt.show()

In [None]:
# @title min_samples_split 변화에 따른 회귀나무의 성능 비교
niter = 50
optVal = np.zeros((niter,3))
for k in range(2, niter+2):
    RT = DecisionTreeRegressor(min_samples_split=k,random_state=3)
    RT.fit(X_train, y_train)
    optVal[k-2,0] = k
    optVal[k-2,1] = RT.score(X_train, y_train)
    optVal[k-2,2] = RT.score(X_test, y_test)

plt.plot(optVal[:,0],optVal[:,1],color='navy',label='학습데이터')
plt.plot(optVal[:,0],optVal[:,2],color='darkorange',label='평가데이터')
plt.title('min_samples_split 값의 변화에 따른 적합결과 변화')
plt.xlabel('min_samples_split')
plt.ylabel('R^2')
plt.legend()
plt.show()

In [None]:
# @title 최적모형
optVal[optVal[:,2] == max(optVal[:,2]),]

In [None]:
# @title 최적모형의 적합력
RT = DecisionTreeRegressor(min_samples_split=7,random_state=3)
RT.fit(X_train, y_train)
print(f"- 학습데이터 : {RT.score(X_train, y_train)} \n- 평가데이터 : {RT.score(X_test, y_test)}")

In [None]:
# @title 최적모형의 모습
tree.plot_tree(RT)
plt.show()

In [None]:
# @title 회귀나무에 사용된 독립변수의 중요도
plt.figure(figsize=(18,6))
plt.bar(RT.feature_names_in_,RT.feature_importances_)
plt.show()

In [None]:
for i, col in enumerate(RT.feature_names_in_):
    print(f'{col} 중요도 : {RT.feature_importances_[i]}')

In [None]:
# @title min_samples_leaf 변화에 따른 회귀나무의 성능 비교
niter = 50
optVal = np.zeros((niter,3))
for k in range(2, niter+2):
    RT = DecisionTreeRegressor(min_samples_leaf=k,random_state=3)
    RT.fit(X_train, y_train)
    optVal[k-2,0] = k
    optVal[k-2,1] = RT.score(X_train, y_train)
    optVal[k-2,2] = RT.score(X_test, y_test)

plt.plot(optVal[:,0],optVal[:,1],color='navy',label='학습데이터')
plt.plot(optVal[:,0],optVal[:,2],color='darkorange',label='평가데이터')
plt.title('min_samples_leaf 값의 변화에 따른 적합결과 변화')
plt.xlabel('min_samples_leaf')
plt.ylabel('R^2')
plt.legend()
plt.show()

In [None]:
# @title 최적모형
optVal[optVal[:,2] == max(optVal[:,2]),]

In [None]:
# @title 최적모형의 적합력
RT = DecisionTreeRegressor(min_samples_split=7,random_state=3)
RT.fit(X_train, y_train)
print(f"- 학습데이터 : {RT.score(X_train, y_train)} \n- 평가데이터 : {RT.score(X_test, y_test)}")

In [None]:
# @title 최적모형의 모습
tree.plot_tree(RT)
plt.show()

In [None]:
# @title 회귀나무에 사용된 독립변수의 중요도
plt.bar(RT.feature_names_in_,RT.feature_importances_)
plt.show()

In [None]:
for i, col in enumerate(RT.feature_names_in_):
    print(f'{col} 중요도 : {RT.feature_importances_[i]}')

In [None]:
# @title 회귀나무 분리 규칙 출력
print(tree.export_text(RT,feature_names=RT.feature_names_in_.tolist()))

---
### __예측모형의 적합 결과 활용__

* 예측하고자 하는 데이터 생성 : 2차원 배열의 형태

In [None]:
X_new = [[150,10000,850,6,60,16,1,0,1,1,0]]

In [None]:
RT.predict(X_new)

In [None]:
X_new = [[150,10000,850,6,60,16,1,0,1,1,0],[50,3000,450,2,60,16,1,0,1,1,0]]
RT.predict(X_new)