In [23]:
import pandas as pd
import numpy as np
import pyspark.pandas as ps
# from pyspark.ml.linalg import Vectors

In [24]:
# Local mode
spark = SparkSession.builder.appName("sqldemo").getOrCreate()

In [None]:
# standalone mode
spark = SparkSession\
        .builder\
        .master("spark://master.example.org:7077")\
        .config('spark.cores.max','1')\
        .config('spark.executor.memory','1G')\
        .appName("clusterdemo")\
        .getOrCreate()

In [25]:
# 開啟最佳化 Spark 效能
spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", True)

***

In [26]:
'''
在python pd.DataFram.to_csv(index=False) 匯出.csv 格式，避免 inferScaeme 抓不到第一欄名稱報錯。
'''
# 讀取.csv 轉為 spark.sql.dataframe
print("=== spark.read.csv('allmusic3s_new.csv',sep=",",header=True, inferSchema=True) ===")
%time sdf_3s = spark.read.csv('allmusic3s_new.csv',sep=",",header=True, inferSchema=True)

# 創建資料庫
print("=================== sdf_3s.createOrReplaceTempView('\'dfTable'\') ====================")
%time sdf_3s.createOrReplaceTempView("dfTable")

# 刪除 NaN row
print("=================== sdf_3s.createOrReplaceTempView('\"dfTable'\') ====================")
%time sdf_3s = sdf_3s.dropna(how='any')

=== spark.read.csv('allmusic3s_new.csv',sep= ,header=True, inferSchema=True) ===


                                                                                

CPU times: user 6.37 ms, sys: 0 ns, total: 6.37 ms
Wall time: 1.01 s
CPU times: user 490 µs, sys: 0 ns, total: 490 µs
Wall time: 10.3 ms
CPU times: user 15.1 ms, sys: 0 ns, total: 15.1 ms
Wall time: 84.7 ms


In [6]:
# 刪除多餘欄位
sdf_cols = sdf_3s.columns
select_sdf_3s = sdf_3s.select(sdf_cols[1:])

# 抓取非字串欄位的 list，辨識非字串型別 column != "label"
continuous_features = [d[0] for d in select_sdf_3s.dtypes if (d[1] != 'string')]

***

In [14]:
from pyspark.ml.feature import StringIndexer, MinMaxScaler,VectorAssembler
from pyspark.ml import Pipeline

# label 編碼
# 選取需要的標籤 input="label", output="y"
indexers = [StringIndexer(inputCol="label", outputCol="y")] 

# 特徵轉向量
# input = for迴圈取出所有column 並[]轉成 list, output= "features" 一個標籤
assemblers = VectorAssembler(inputCols=[col for col in continuous_features], outputCol="features")

# MinMixScaler 標準化
# input= "features"(Vector column), output="mmfeartures"
mmScalers = MinMaxScaler(inputCol="features", outputCol="mmfeatures")

# Piplin整合一起執行
pipeline = Pipeline(stages= [assemblers, mmScalers] + indexers)
print("=========== pipeline.fit(select_sdf_3s) ============")
%time scalerModel = pipeline.fit(select_sdf_3s) 
print("======= scalerModel.transform(select_sdf_3s) =======")
%time scaledData = scalerModel.transform(select_sdf_3s)

#切分訓練、測試資料
data = scaledData.select("features","mmfeatures", "y")
trainingData, testData = data.randomSplit([0.8, 0.2])



[Stage 41:>                                                         (0 + 2) / 2]

CPU times: user 7.84 ms, sys: 24.2 ms, total: 32 ms
Wall time: 2.48 s


CPU times: user 31 ms, sys: 0 ns, total: 31 ms
Wall time: 128 ms


                                                                                

In [None]:
# 查看結構 
#select_sdf_3s.printSchema()

In [None]:
# 查看訓練和測試筆數
# print("trainingData: {} count,testData: {} count".format(trainingData.count(), testData.count()))

***

## ML Part

In [16]:
from pyspark.ml.classification import LogisticRegression, RandomForestClassifier, GBTClassifier
def select_model(algo, train, test): #( 已建立模型物件, 訓練資料, 測試資料 )
    model = algo.fit(train)    # alog 傳入模型
    predictions = model.transform(test)   
    test_result = model.evaluate(test)
    print('{} Accuracy: {:5.3f}'.format(type(algo).__name__, test_result.accuracy)) # 列印準確度和模型名稱

In [17]:
# 建立空模
# LogisticRegression
logr = LogisticRegression(featuresCol='mmfeatures',labelCol='y')

# RandomForest
rf = RandomForestClassifier(featuresCol='mmfeatures',labelCol='y',numTrees=1000 ,maxDepth=10)

# GBTClassifier
gbt = GBTClassifier(featuresCol='mmfeatures',labelCol='y')

## Training model times

In [18]:
%time select_model(rf, trainingData, testData)

                                                                                

22/10/05 14:29:14 WARN DAGScheduler: Broadcasting large task binary with size 1844.7 KiB


                                                                                

22/10/05 14:29:29 WARN DAGScheduler: Broadcasting large task binary with size 3.7 MiB




22/10/05 14:29:45 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:29:49 WARN DAGScheduler: Broadcasting large task binary with size 3.9 MiB




22/10/05 14:30:02 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:30:09 WARN DAGScheduler: Broadcasting large task binary with size 4.0 MiB




22/10/05 14:30:14 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:30:17 WARN DAGScheduler: Broadcasting large task binary with size 4.2 MiB




22/10/05 14:30:20 WARN DAGScheduler: Broadcasting large task binary with size 1192.9 KiB


                                                                                

22/10/05 14:30:22 WARN DAGScheduler: Broadcasting large task binary with size 4.7 MiB




22/10/05 14:30:24 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:30:27 WARN DAGScheduler: Broadcasting large task binary with size 5.3 MiB


[Stage 64:>                                                         (0 + 2) / 2]

22/10/05 14:30:28 WARN DAGScheduler: Broadcasting large task binary with size 1192.9 KiB


                                                                                

22/10/05 14:30:31 WARN DAGScheduler: Broadcasting large task binary with size 6.2 MiB


[Stage 66:>                                                         (0 + 2) / 2]

22/10/05 14:30:32 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:30:35 WARN DAGScheduler: Broadcasting large task binary with size 5.7 MiB


[Stage 68:>                                                         (0 + 2) / 2]

22/10/05 14:30:37 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:30:39 WARN DAGScheduler: Broadcasting large task binary with size 6.0 MiB




22/10/05 14:30:40 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:30:43 WARN DAGScheduler: Broadcasting large task binary with size 5.1 MiB


[Stage 72:>                                                         (0 + 2) / 2]

22/10/05 14:30:45 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:30:47 WARN DAGScheduler: Broadcasting large task binary with size 5.5 MiB


[Stage 74:>                                                         (0 + 2) / 2]

22/10/05 14:30:49 WARN DAGScheduler: Broadcasting large task binary with size 1192.9 KiB


                                                                                

22/10/05 14:30:51 WARN DAGScheduler: Broadcasting large task binary with size 6.3 MiB




22/10/05 14:30:53 WARN DAGScheduler: Broadcasting large task binary with size 1192.9 KiB


                                                                                

22/10/05 14:30:55 WARN DAGScheduler: Broadcasting large task binary with size 4.9 MiB


[Stage 78:>                                                         (0 + 2) / 2]

22/10/05 14:30:58 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:31:00 WARN DAGScheduler: Broadcasting large task binary with size 5.6 MiB




22/10/05 14:31:02 WARN DAGScheduler: Broadcasting large task binary with size 1193.2 KiB


                                                                                

22/10/05 14:31:05 WARN DAGScheduler: Broadcasting large task binary with size 5.7 MiB


[Stage 82:>                                                         (0 + 2) / 2]

22/10/05 14:31:06 WARN DAGScheduler: Broadcasting large task binary with size 1193.2 KiB


                                                                                

22/10/05 14:31:09 WARN DAGScheduler: Broadcasting large task binary with size 6.2 MiB


[Stage 84:>                                                         (0 + 2) / 2]

22/10/05 14:31:10 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:31:12 WARN DAGScheduler: Broadcasting large task binary with size 5.2 MiB




22/10/05 14:31:15 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:31:18 WARN DAGScheduler: Broadcasting large task binary with size 5.4 MiB


[Stage 88:>                                                         (0 + 2) / 2]

22/10/05 14:31:19 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:31:22 WARN DAGScheduler: Broadcasting large task binary with size 6.4 MiB




22/10/05 14:31:23 WARN DAGScheduler: Broadcasting large task binary with size 1193.2 KiB


                                                                                

22/10/05 14:31:26 WARN DAGScheduler: Broadcasting large task binary with size 4.7 MiB


[Stage 92:>                                                         (0 + 2) / 2]

22/10/05 14:31:29 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:31:34 WARN DAGScheduler: Broadcasting large task binary with size 5.6 MiB


[Stage 94:>                                                         (0 + 2) / 2]

22/10/05 14:31:35 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:31:38 WARN DAGScheduler: Broadcasting large task binary with size 5.5 MiB


[Stage 96:>                                                         (0 + 2) / 2]

22/10/05 14:31:40 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:31:42 WARN DAGScheduler: Broadcasting large task binary with size 5.0 MiB




22/10/05 14:31:44 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:31:49 WARN DAGScheduler: Broadcasting large task binary with size 5.9 MiB


[Stage 100:>                                                        (0 + 2) / 2]

22/10/05 14:31:50 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:31:53 WARN DAGScheduler: Broadcasting large task binary with size 5.9 MiB


[Stage 102:>                                                        (0 + 2) / 2]

22/10/05 14:31:54 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:31:57 WARN DAGScheduler: Broadcasting large task binary with size 4.7 MiB




22/10/05 14:32:00 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:32:03 WARN DAGScheduler: Broadcasting large task binary with size 5.7 MiB


[Stage 106:>                                                        (0 + 2) / 2]

22/10/05 14:32:05 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:32:07 WARN DAGScheduler: Broadcasting large task binary with size 6.1 MiB




22/10/05 14:32:11 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:32:13 WARN DAGScheduler: Broadcasting large task binary with size 5.4 MiB




22/10/05 14:32:15 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:32:17 WARN DAGScheduler: Broadcasting large task binary with size 4.7 MiB


[Stage 112:>                                                        (0 + 2) / 2]

22/10/05 14:32:21 WARN DAGScheduler: Broadcasting large task binary with size 1192.9 KiB


                                                                                

22/10/05 14:32:24 WARN DAGScheduler: Broadcasting large task binary with size 5.4 MiB




22/10/05 14:32:26 WARN DAGScheduler: Broadcasting large task binary with size 1193.2 KiB


                                                                                

22/10/05 14:32:28 WARN DAGScheduler: Broadcasting large task binary with size 6.6 MiB


[Stage 116:>                                                        (0 + 2) / 2]

22/10/05 14:32:30 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:32:32 WARN DAGScheduler: Broadcasting large task binary with size 5.3 MiB


[Stage 118:>                                                        (0 + 2) / 2]

22/10/05 14:32:34 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:32:36 WARN DAGScheduler: Broadcasting large task binary with size 5.2 MiB




22/10/05 14:32:38 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:32:41 WARN DAGScheduler: Broadcasting large task binary with size 5.8 MiB


[Stage 122:>                                                        (0 + 2) / 2]

22/10/05 14:32:42 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:32:45 WARN DAGScheduler: Broadcasting large task binary with size 6.3 MiB


[Stage 124:>                                                        (0 + 2) / 2]

22/10/05 14:32:46 WARN DAGScheduler: Broadcasting large task binary with size 1193.2 KiB


                                                                                

22/10/05 14:32:49 WARN DAGScheduler: Broadcasting large task binary with size 4.2 MiB




22/10/05 14:32:56 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:32:58 WARN DAGScheduler: Broadcasting large task binary with size 4.7 MiB


[Stage 128:>                                                        (0 + 2) / 2]

22/10/05 14:33:01 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:33:04 WARN DAGScheduler: Broadcasting large task binary with size 5.5 MiB




22/10/05 14:33:06 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:33:09 WARN DAGScheduler: Broadcasting large task binary with size 5.4 MiB




22/10/05 14:33:10 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:33:13 WARN DAGScheduler: Broadcasting large task binary with size 6.3 MiB


[Stage 134:>                                                        (0 + 2) / 2]

22/10/05 14:33:14 WARN DAGScheduler: Broadcasting large task binary with size 1192.9 KiB


                                                                                

22/10/05 14:33:19 WARN DAGScheduler: Broadcasting large task binary with size 4.7 MiB




22/10/05 14:33:22 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:33:24 WARN DAGScheduler: Broadcasting large task binary with size 5.3 MiB


[Stage 138:>                                                        (0 + 2) / 2]

22/10/05 14:33:26 WARN DAGScheduler: Broadcasting large task binary with size 1192.9 KiB


                                                                                

22/10/05 14:33:29 WARN DAGScheduler: Broadcasting large task binary with size 5.6 MiB


[Stage 140:>                                                        (0 + 2) / 2]

22/10/05 14:33:30 WARN DAGScheduler: Broadcasting large task binary with size 1192.7 KiB


                                                                                

22/10/05 14:33:33 WARN DAGScheduler: Broadcasting large task binary with size 6.3 MiB


[Stage 142:>                                                        (0 + 2) / 2]

22/10/05 14:33:34 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:33:37 WARN DAGScheduler: Broadcasting large task binary with size 5.1 MiB


[Stage 144:>                                                        (0 + 2) / 2]

22/10/05 14:33:39 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:33:41 WARN DAGScheduler: Broadcasting large task binary with size 5.8 MiB




22/10/05 14:33:43 WARN DAGScheduler: Broadcasting large task binary with size 1192.9 KiB


                                                                                

22/10/05 14:33:45 WARN DAGScheduler: Broadcasting large task binary with size 6.0 MiB


[Stage 148:>                                                        (0 + 2) / 2]

22/10/05 14:33:46 WARN DAGScheduler: Broadcasting large task binary with size 1192.9 KiB


                                                                                

22/10/05 14:33:49 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB




22/10/05 14:33:53 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:33:55 WARN DAGScheduler: Broadcasting large task binary with size 5.1 MiB


[Stage 152:>                                                        (0 + 2) / 2]

22/10/05 14:33:58 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:34:00 WARN DAGScheduler: Broadcasting large task binary with size 5.1 MiB


[Stage 154:>                                                        (0 + 2) / 2]

22/10/05 14:34:02 WARN DAGScheduler: Broadcasting large task binary with size 1192.9 KiB


                                                                                

22/10/05 14:34:04 WARN DAGScheduler: Broadcasting large task binary with size 5.6 MiB


[Stage 156:>                                                        (0 + 2) / 2]

22/10/05 14:34:06 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:34:08 WARN DAGScheduler: Broadcasting large task binary with size 6.4 MiB


[Stage 158:>                                                        (0 + 2) / 2]

22/10/05 14:34:10 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:34:12 WARN DAGScheduler: Broadcasting large task binary with size 4.9 MiB


[Stage 160:>                                                        (0 + 2) / 2]

22/10/05 14:34:14 WARN DAGScheduler: Broadcasting large task binary with size 1193.2 KiB


                                                                                

22/10/05 14:34:17 WARN DAGScheduler: Broadcasting large task binary with size 5.8 MiB


[Stage 162:>                                                        (0 + 2) / 2]

22/10/05 14:34:19 WARN DAGScheduler: Broadcasting large task binary with size 1192.9 KiB


                                                                                

22/10/05 14:34:21 WARN DAGScheduler: Broadcasting large task binary with size 5.1 MiB


[Stage 164:>                                                        (0 + 2) / 2]

22/10/05 14:34:23 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:34:25 WARN DAGScheduler: Broadcasting large task binary with size 5.8 MiB




22/10/05 14:34:27 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:34:29 WARN DAGScheduler: Broadcasting large task binary with size 6.2 MiB


[Stage 168:>                                                        (0 + 2) / 2]

22/10/05 14:34:30 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:34:35 WARN DAGScheduler: Broadcasting large task binary with size 5.0 MiB




22/10/05 14:34:38 WARN DAGScheduler: Broadcasting large task binary with size 1192.9 KiB


                                                                                

22/10/05 14:34:40 WARN DAGScheduler: Broadcasting large task binary with size 5.2 MiB




22/10/05 14:34:42 WARN DAGScheduler: Broadcasting large task binary with size 1193.2 KiB


                                                                                

22/10/05 14:34:45 WARN DAGScheduler: Broadcasting large task binary with size 5.7 MiB


[Stage 174:>                                                        (0 + 2) / 2]

22/10/05 14:34:46 WARN DAGScheduler: Broadcasting large task binary with size 1192.9 KiB


                                                                                

22/10/05 14:34:49 WARN DAGScheduler: Broadcasting large task binary with size 5.9 MiB


[Stage 176:>                                                        (0 + 2) / 2]

22/10/05 14:34:51 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:34:53 WARN DAGScheduler: Broadcasting large task binary with size 4.9 MiB




22/10/05 14:34:57 WARN DAGScheduler: Broadcasting large task binary with size 1193.2 KiB


                                                                                

22/10/05 14:34:59 WARN DAGScheduler: Broadcasting large task binary with size 4.9 MiB


[Stage 180:>                                                        (0 + 2) / 2]

22/10/05 14:35:02 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:35:04 WARN DAGScheduler: Broadcasting large task binary with size 5.2 MiB


[Stage 182:>                                                        (0 + 2) / 2]

22/10/05 14:35:07 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:35:09 WARN DAGScheduler: Broadcasting large task binary with size 6.1 MiB




22/10/05 14:35:11 WARN DAGScheduler: Broadcasting large task binary with size 1192.9 KiB


                                                                                

22/10/05 14:35:13 WARN DAGScheduler: Broadcasting large task binary with size 5.9 MiB


[Stage 186:>                                                        (0 + 2) / 2]

22/10/05 14:35:15 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:35:17 WARN DAGScheduler: Broadcasting large task binary with size 5.5 MiB


[Stage 188:>                                                        (0 + 2) / 2]

22/10/05 14:35:18 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:35:23 WARN DAGScheduler: Broadcasting large task binary with size 4.6 MiB


[Stage 190:>                                                        (0 + 2) / 2]

22/10/05 14:35:30 WARN DAGScheduler: Broadcasting large task binary with size 1138.4 KiB


                                                                                

22/10/05 14:35:32 WARN DAGScheduler: Broadcasting large task binary with size 5.6 MiB


[Stage 192:>                                                        (0 + 2) / 2]

22/10/05 14:35:34 WARN DAGScheduler: Broadcasting large task binary with size 1192.9 KiB


                                                                                

22/10/05 14:35:37 WARN DAGScheduler: Broadcasting large task binary with size 5.5 MiB


[Stage 194:>                                                        (0 + 2) / 2]

22/10/05 14:35:38 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:35:41 WARN DAGScheduler: Broadcasting large task binary with size 5.4 MiB


[Stage 196:>                                                        (0 + 2) / 2]

22/10/05 14:35:43 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:35:46 WARN DAGScheduler: Broadcasting large task binary with size 5.9 MiB




22/10/05 14:35:48 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:35:50 WARN DAGScheduler: Broadcasting large task binary with size 5.5 MiB




22/10/05 14:35:52 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:35:54 WARN DAGScheduler: Broadcasting large task binary with size 5.8 MiB


[Stage 202:>                                                        (0 + 2) / 2]

22/10/05 14:35:56 WARN DAGScheduler: Broadcasting large task binary with size 1193.0 KiB


                                                                                

22/10/05 14:35:58 WARN DAGScheduler: Broadcasting large task binary with size 5.7 MiB


[Stage 204:>                                                        (0 + 2) / 2]

22/10/05 14:36:00 WARN DAGScheduler: Broadcasting large task binary with size 1190.3 KiB


                                                                                

22/10/05 14:36:03 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


                                                                                

22/10/05 14:36:07 WARN DAGScheduler: Broadcasting large task binary with size 3.4 MiB


                                                                                

22/10/05 14:36:09 WARN DAGScheduler: Broadcasting large task binary with size 5.3 MiB


[Stage 210:>                                                        (0 + 2) / 2]

22/10/05 14:36:11 WARN DAGScheduler: Broadcasting large task binary with size 1148.4 KiB


                                                                                

22/10/05 14:36:13 WARN DAGScheduler: Broadcasting large task binary with size 4.7 MiB


                                                                                

22/10/05 14:36:16 WARN DAGScheduler: Broadcasting large task binary with size 4.0 MiB


[Stage 216:>                                                        (0 + 0) / 2]

22/10/05 14:36:26 WARN DAGScheduler: Broadcasting large task binary with size 160.4 MiB




RandomForestClassifier Accuracy: 0.721
CPU times: user 269 ms, sys: 265 ms, total: 534 ms
Wall time: 7min 58s


                                                                                

In [None]:
%time select_model(logr, trainingData, testData)

In [None]:
%time select_model(gbt, trainingData, testData)