# Create Predict Price Model On Real Estate Data

## Connect to PostgreSQL

In [12]:
from pyspark.sql import SparkSession

jdbc_url = "jdbc:postgresql://postgres:5432/postgres"
table_name = "gold.gold_analytics_data"
connection_properties = {
    "user": "postgres",
    "password": "postgres"
}

## Import ML libraries

In [30]:
from pyspark.sql import SparkSession
from pyspark.ml.feature import VectorAssembler, StandardScaler, StringIndexer
from pyspark.ml.regression import GBTRegressor
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.tuning import ParamGridBuilder, CrossValidator
from pyspark.ml import Pipeline

## Load data

In [31]:
data = spark.read.jdbc(jdbc_url, table_name, properties=connection_properties)
data.printSchema()

root
 |-- sale_id: string (nullable = true)
 |-- account_id: integer (nullable = true)
 |-- ad_id: integer (nullable = true)
 |-- area: string (nullable = true)
 |-- area_name: string (nullable = true)
 |-- category: string (nullable = true)
 |-- category_name: string (nullable = true)
 |-- latitude: decimal(10,6) (nullable = true)
 |-- longitude: decimal(10,6) (nullable = true)
 |-- location: string (nullable = true)
 |-- date: string (nullable = true)
 |-- price: long (nullable = true)
 |-- region_name: string (nullable = true)
 |-- rooms: integer (nullable = true)
 |-- size: decimal(10,2) (nullable = true)
 |-- type: string (nullable = true)
 |-- year: integer (nullable = true)
 |-- month: integer (nullable = true)
 |-- quarter: integer (nullable = true)
 |-- price_per_sqm: decimal(33,11) (nullable = true)



## Data Preprocessing Pipeline

In [45]:
categorical_cols = ['area', 'area_name', 'category', 'category_name', 'location']
indexers = [StringIndexer(inputCol=col, outputCol=col + "_index").fit(data) for col in categorical_cols]
assembler = VectorAssembler(inputCols=['latitude', 'longitude', 'rooms', 'size', 'year', 'month', 'quarter'] + 
                                  [col + "_index" for col in categorical_cols], outputCol="raw_features")
scaler = StandardScaler(inputCol="raw_features", outputCol="features", withStd=True, withMean=True)
preprocessing_pipeline = Pipeline(stages=indexers + [assembler, scaler])
data_processed = preprocessing_pipeline.fit(data).transform(data)

## Split data into training and testing sets

In [46]:
train_data, test_data = data_processed.randomSplit([0.8, 0.2], seed=123)

## Initialize Gradient Boosted Trees Regressor

In [47]:
gbt = GBTRegressor(featuresCol="features", labelCol="price")

## Define parameter grid for hyperparameter tuning

In [48]:
param_grid = ParamGridBuilder() \
    .addGrid(gbt.maxDepth, [5, 10]) \
    .addGrid(gbt.maxIter, [50, 100]) \
    .build()

## Initialize CrossValidator

In [49]:
crossval = CrossValidator(estimator=gbt,
                          estimatorParamMaps=param_grid,
                          evaluator=RegressionEvaluator(labelCol="price", predictionCol="prediction", metricName="rmse"),
                          numFolds=3)

## Train the model

In [50]:
model = crossval.fit(train_data)

                                                                                

24/05/06 04:21:01 WARN DAGScheduler: Broadcasting large task binary with size 1001.3 KiB
24/05/06 04:21:01 WARN DAGScheduler: Broadcasting large task binary with size 1001.8 KiB
24/05/06 04:21:01 WARN DAGScheduler: Broadcasting large task binary with size 1002.3 KiB
24/05/06 04:21:01 WARN DAGScheduler: Broadcasting large task binary with size 1003.5 KiB
24/05/06 04:21:02 WARN DAGScheduler: Broadcasting large task binary with size 1005.7 KiB
24/05/06 04:21:02 WARN DAGScheduler: Broadcasting large task binary with size 1008.4 KiB
24/05/06 04:21:02 WARN DAGScheduler: Broadcasting large task binary with size 1008.9 KiB
24/05/06 04:21:02 WARN DAGScheduler: Broadcasting large task binary with size 1009.4 KiB
24/05/06 04:21:02 WARN DAGScheduler: Broadcasting large task binary with size 1010.6 KiB
24/05/06 04:21:02 WARN DAGScheduler: Broadcasting large task binary with size 1012.8 KiB
24/05/06 04:21:02 WARN DAGScheduler: Broadcasting large task binary with size 1015.5 KiB
24/05/06 04:21:02 WAR

                                                                                

24/05/06 04:23:34 WARN DAGScheduler: Broadcasting large task binary with size 3.7 MiB
24/05/06 04:23:34 WARN DAGScheduler: Broadcasting large task binary with size 3.7 MiB
24/05/06 04:23:34 WARN DAGScheduler: Broadcasting large task binary with size 3.7 MiB
24/05/06 04:23:34 WARN DAGScheduler: Broadcasting large task binary with size 3.7 MiB
24/05/06 04:23:35 WARN DAGScheduler: Broadcasting large task binary with size 3.7 MiB
24/05/06 04:23:35 WARN DAGScheduler: Broadcasting large task binary with size 3.7 MiB
24/05/06 04:23:36 WARN DAGScheduler: Broadcasting large task binary with size 3.8 MiB
24/05/06 04:23:36 WARN DAGScheduler: Broadcasting large task binary with size 3.8 MiB
24/05/06 04:23:36 WARN DAGScheduler: Broadcasting large task binary with size 3.8 MiB
24/05/06 04:23:37 WARN DAGScheduler: Broadcasting large task binary with size 3.8 MiB
24/05/06 04:23:37 WARN DAGScheduler: Broadcasting large task binary with size 3.8 MiB
24/05/06 04:23:37 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:24:16 WARN DAGScheduler: Broadcasting large task binary with size 5.1 MiB
24/05/06 04:24:17 WARN DAGScheduler: Broadcasting large task binary with size 5.1 MiB


                                                                                

24/05/06 04:24:17 WARN DAGScheduler: Broadcasting large task binary with size 5.1 MiB
24/05/06 04:24:17 WARN DAGScheduler: Broadcasting large task binary with size 5.1 MiB
24/05/06 04:24:18 WARN DAGScheduler: Broadcasting large task binary with size 5.1 MiB
24/05/06 04:24:18 WARN DAGScheduler: Broadcasting large task binary with size 5.1 MiB
24/05/06 04:24:18 WARN DAGScheduler: Broadcasting large task binary with size 5.1 MiB
24/05/06 04:24:19 WARN DAGScheduler: Broadcasting large task binary with size 5.1 MiB
24/05/06 04:24:19 WARN DAGScheduler: Broadcasting large task binary with size 5.1 MiB
24/05/06 04:24:19 WARN DAGScheduler: Broadcasting large task binary with size 5.1 MiB
24/05/06 04:24:19 WARN DAGScheduler: Broadcasting large task binary with size 5.1 MiB
24/05/06 04:24:20 WARN DAGScheduler: Broadcasting large task binary with size 5.1 MiB
24/05/06 04:24:20 WARN DAGScheduler: Broadcasting large task binary with size 5.2 MiB
24/05/06 04:24:20 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:24:40 WARN DAGScheduler: Broadcasting large task binary with size 5.6 MiB
24/05/06 04:24:40 WARN DAGScheduler: Broadcasting large task binary with size 5.7 MiB
24/05/06 04:24:41 WARN DAGScheduler: Broadcasting large task binary with size 5.7 MiB
24/05/06 04:24:41 WARN DAGScheduler: Broadcasting large task binary with size 5.7 MiB
24/05/06 04:24:42 WARN DAGScheduler: Broadcasting large task binary with size 5.7 MiB
24/05/06 04:24:42 WARN DAGScheduler: Broadcasting large task binary with size 5.7 MiB
24/05/06 04:24:42 WARN DAGScheduler: Broadcasting large task binary with size 5.7 MiB
24/05/06 04:24:43 WARN DAGScheduler: Broadcasting large task binary with size 5.7 MiB
24/05/06 04:24:43 WARN DAGScheduler: Broadcasting large task binary with size 5.7 MiB
24/05/06 04:24:43 WARN DAGScheduler: Broadcasting large task binary with size 5.7 MiB
24/05/06 04:24:44 WARN DAGScheduler: Broadcasting large task binary with size 5.8 MiB
24/05/06 04:24:44 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:25:13 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB
24/05/06 04:25:14 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB
24/05/06 04:25:14 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB
24/05/06 04:25:15 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB
24/05/06 04:25:15 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB
24/05/06 04:25:16 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB
24/05/06 04:25:16 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB
24/05/06 04:25:16 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB
24/05/06 04:25:17 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB
24/05/06 04:25:17 WARN DAGScheduler: Broadcasting large task binary with size 6.6 MiB
24/05/06 04:25:17 WARN DAGScheduler: Broadcasting large task binary with size 6.6 MiB
24/05/06 04:25:18 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:25:21 WARN DAGScheduler: Broadcasting large task binary with size 6.6 MiB
24/05/06 04:25:22 WARN DAGScheduler: Broadcasting large task binary with size 6.7 MiB
24/05/06 04:25:23 WARN DAGScheduler: Broadcasting large task binary with size 6.7 MiB


                                                                                

24/05/06 04:25:23 WARN DAGScheduler: Broadcasting large task binary with size 6.7 MiB
24/05/06 04:25:24 WARN DAGScheduler: Broadcasting large task binary with size 6.7 MiB
24/05/06 04:25:24 WARN DAGScheduler: Broadcasting large task binary with size 6.7 MiB
24/05/06 04:25:25 WARN DAGScheduler: Broadcasting large task binary with size 6.7 MiB
24/05/06 04:25:25 WARN DAGScheduler: Broadcasting large task binary with size 6.7 MiB
24/05/06 04:25:26 WARN DAGScheduler: Broadcasting large task binary with size 6.7 MiB
24/05/06 04:25:26 WARN DAGScheduler: Broadcasting large task binary with size 6.7 MiB
24/05/06 04:25:26 WARN DAGScheduler: Broadcasting large task binary with size 6.7 MiB
24/05/06 04:25:27 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:25:27 WARN DAGScheduler: Broadcasting large task binary with size 6.7 MiB
24/05/06 04:25:28 WARN DAGScheduler: Broadcasting large task binary with size 6.7 MiB
24/05/06 04:25:28 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:25:30 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:25:30 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:25:30 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:25:31 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:25:31 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB


                                                                                

24/05/06 04:25:32 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:25:33 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:25:33 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:25:34 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:25:34 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:25:35 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:25:35 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:25:35 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:25:36 WARN DAGScheduler: Broadcasting large task binary with size 6.9 MiB
24/05/06 04:25:36 WARN DAGScheduler: Broadcasting large task binary with size 6.9 MiB
24/05/06 04:25:37 WARN DAGScheduler: Broadcasting large task binary with size 6.9 MiB
24/05/06 04:25:37 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:25:42 WARN DAGScheduler: Broadcasting large task binary with size 7.0 MiB


                                                                                

24/05/06 04:25:42 WARN DAGScheduler: Broadcasting large task binary with size 7.0 MiB


                                                                                

24/05/06 04:25:43 WARN DAGScheduler: Broadcasting large task binary with size 7.0 MiB
24/05/06 04:25:44 WARN DAGScheduler: Broadcasting large task binary with size 7.0 MiB


                                                                                

24/05/06 04:25:44 WARN DAGScheduler: Broadcasting large task binary with size 7.0 MiB
24/05/06 04:25:45 WARN DAGScheduler: Broadcasting large task binary with size 7.0 MiB
24/05/06 04:25:45 WARN DAGScheduler: Broadcasting large task binary with size 7.0 MiB
24/05/06 04:25:46 WARN DAGScheduler: Broadcasting large task binary with size 7.1 MiB
24/05/06 04:25:46 WARN DAGScheduler: Broadcasting large task binary with size 7.1 MiB


                                                                                

24/05/06 04:25:47 WARN DAGScheduler: Broadcasting large task binary with size 7.1 MiB
24/05/06 04:25:48 WARN DAGScheduler: Broadcasting large task binary with size 7.1 MiB
24/05/06 04:25:48 WARN DAGScheduler: Broadcasting large task binary with size 7.1 MiB
24/05/06 04:25:49 WARN DAGScheduler: Broadcasting large task binary with size 7.1 MiB
24/05/06 04:25:49 WARN DAGScheduler: Broadcasting large task binary with size 7.1 MiB
24/05/06 04:25:50 WARN DAGScheduler: Broadcasting large task binary with size 7.1 MiB
24/05/06 04:25:50 WARN DAGScheduler: Broadcasting large task binary with size 7.1 MiB
24/05/06 04:25:51 WARN DAGScheduler: Broadcasting large task binary with size 7.1 MiB
24/05/06 04:25:51 WARN DAGScheduler: Broadcasting large task binary with size 7.1 MiB


                                                                                

24/05/06 04:25:52 WARN DAGScheduler: Broadcasting large task binary with size 7.2 MiB
24/05/06 04:25:53 WARN DAGScheduler: Broadcasting large task binary with size 7.2 MiB


                                                                                

24/05/06 04:25:53 WARN DAGScheduler: Broadcasting large task binary with size 7.2 MiB
24/05/06 04:25:54 WARN DAGScheduler: Broadcasting large task binary with size 7.2 MiB
24/05/06 04:25:55 WARN DAGScheduler: Broadcasting large task binary with size 7.2 MiB
24/05/06 04:25:55 WARN DAGScheduler: Broadcasting large task binary with size 7.2 MiB
24/05/06 04:25:56 WARN DAGScheduler: Broadcasting large task binary with size 7.2 MiB
24/05/06 04:25:56 WARN DAGScheduler: Broadcasting large task binary with size 7.2 MiB
24/05/06 04:25:56 WARN DAGScheduler: Broadcasting large task binary with size 7.2 MiB
24/05/06 04:25:57 WARN DAGScheduler: Broadcasting large task binary with size 7.2 MiB
24/05/06 04:25:57 WARN DAGScheduler: Broadcasting large task binary with size 7.3 MiB
24/05/06 04:25:58 WARN DAGScheduler: Broadcasting large task binary with size 7.3 MiB
24/05/06 04:25:58 WARN DAGScheduler: Broadcasting large task binary with size 7.3 MiB
24/05/06 04:25:59 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:26:07 WARN DAGScheduler: Broadcasting large task binary with size 7.5 MiB
24/05/06 04:26:07 WARN DAGScheduler: Broadcasting large task binary with size 7.5 MiB
24/05/06 04:26:08 WARN DAGScheduler: Broadcasting large task binary with size 7.5 MiB
24/05/06 04:26:08 WARN DAGScheduler: Broadcasting large task binary with size 7.5 MiB
24/05/06 04:26:08 WARN DAGScheduler: Broadcasting large task binary with size 7.5 MiB
24/05/06 04:26:09 WARN DAGScheduler: Broadcasting large task binary with size 7.5 MiB
24/05/06 04:26:09 WARN DAGScheduler: Broadcasting large task binary with size 7.5 MiB
24/05/06 04:26:10 WARN DAGScheduler: Broadcasting large task binary with size 7.5 MiB
24/05/06 04:26:10 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:26:11 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:26:11 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:26:12 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:26:16 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:26:17 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:26:17 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:26:18 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:26:18 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:26:18 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:26:19 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:26:19 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:26:19 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:26:20 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:26:21 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:26:21 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:26:35 WARN DAGScheduler: Broadcasting large task binary with size 8.0 MiB
24/05/06 04:26:36 WARN DAGScheduler: Broadcasting large task binary with size 8.0 MiB
24/05/06 04:26:36 WARN DAGScheduler: Broadcasting large task binary with size 8.0 MiB
24/05/06 04:26:37 WARN DAGScheduler: Broadcasting large task binary with size 8.0 MiB
24/05/06 04:26:37 WARN DAGScheduler: Broadcasting large task binary with size 8.0 MiB
24/05/06 04:26:37 WARN DAGScheduler: Broadcasting large task binary with size 8.0 MiB
24/05/06 04:26:38 WARN DAGScheduler: Broadcasting large task binary with size 8.0 MiB
24/05/06 04:26:38 WARN DAGScheduler: Broadcasting large task binary with size 8.1 MiB
24/05/06 04:26:39 WARN DAGScheduler: Broadcasting large task binary with size 8.1 MiB
24/05/06 04:26:39 WARN DAGScheduler: Broadcasting large task binary with size 8.1 MiB
24/05/06 04:26:40 WARN DAGScheduler: Broadcasting large task binary with size 8.1 MiB
24/05/06 04:26:40 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:27:00 WARN DAGScheduler: Broadcasting large task binary with size 8.5 MiB
24/05/06 04:27:00 WARN DAGScheduler: Broadcasting large task binary with size 8.5 MiB
24/05/06 04:27:01 WARN DAGScheduler: Broadcasting large task binary with size 8.5 MiB
24/05/06 04:27:01 WARN DAGScheduler: Broadcasting large task binary with size 8.5 MiB
24/05/06 04:27:02 WARN DAGScheduler: Broadcasting large task binary with size 8.5 MiB
24/05/06 04:27:03 WARN DAGScheduler: Broadcasting large task binary with size 8.5 MiB
24/05/06 04:27:03 WARN DAGScheduler: Broadcasting large task binary with size 8.5 MiB
24/05/06 04:27:03 WARN DAGScheduler: Broadcasting large task binary with size 8.5 MiB
24/05/06 04:27:04 WARN DAGScheduler: Broadcasting large task binary with size 8.6 MiB
24/05/06 04:27:05 WARN DAGScheduler: Broadcasting large task binary with size 8.6 MiB
24/05/06 04:27:05 WARN DAGScheduler: Broadcasting large task binary with size 8.6 MiB
24/05/06 04:27:06 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:27:09 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:27:10 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:27:10 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:27:11 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:27:11 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:27:12 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:27:13 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:27:13 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:27:13 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:27:14 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:27:15 WARN DAGScheduler: Broadcasting large task binary with size 8.8 MiB


                                                                                

24/05/06 04:28:20 WARN DAGScheduler: Broadcasting large task binary with size 1001.2 KiB
24/05/06 04:28:20 WARN DAGScheduler: Broadcasting large task binary with size 1003.9 KiB
24/05/06 04:28:21 WARN DAGScheduler: Broadcasting large task binary with size 1004.3 KiB
24/05/06 04:28:21 WARN DAGScheduler: Broadcasting large task binary with size 1004.9 KiB
24/05/06 04:28:21 WARN DAGScheduler: Broadcasting large task binary with size 1006.0 KiB
24/05/06 04:28:21 WARN DAGScheduler: Broadcasting large task binary with size 1008.3 KiB
24/05/06 04:28:21 WARN DAGScheduler: Broadcasting large task binary with size 1011.0 KiB
24/05/06 04:28:21 WARN DAGScheduler: Broadcasting large task binary with size 1011.4 KiB
24/05/06 04:28:21 WARN DAGScheduler: Broadcasting large task binary with size 1012.0 KiB
24/05/06 04:28:21 WARN DAGScheduler: Broadcasting large task binary with size 1013.1 KiB
24/05/06 04:28:21 WARN DAGScheduler: Broadcasting large task binary with size 1015.4 KiB
24/05/06 04:28:21 WAR

                                                                                

24/05/06 04:31:44 WARN DAGScheduler: Broadcasting large task binary with size 4.9 MiB
24/05/06 04:31:44 WARN DAGScheduler: Broadcasting large task binary with size 4.9 MiB
24/05/06 04:31:44 WARN DAGScheduler: Broadcasting large task binary with size 4.9 MiB
24/05/06 04:31:45 WARN DAGScheduler: Broadcasting large task binary with size 4.9 MiB
24/05/06 04:31:45 WARN DAGScheduler: Broadcasting large task binary with size 5.0 MiB
24/05/06 04:31:45 WARN DAGScheduler: Broadcasting large task binary with size 5.0 MiB
24/05/06 04:31:46 WARN DAGScheduler: Broadcasting large task binary with size 5.0 MiB
24/05/06 04:31:46 WARN DAGScheduler: Broadcasting large task binary with size 5.0 MiB
24/05/06 04:31:46 WARN DAGScheduler: Broadcasting large task binary with size 5.0 MiB
24/05/06 04:31:47 WARN DAGScheduler: Broadcasting large task binary with size 5.0 MiB
24/05/06 04:31:47 WARN DAGScheduler: Broadcasting large task binary with size 5.0 MiB
24/05/06 04:31:47 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:32:42 WARN DAGScheduler: Broadcasting large task binary with size 6.4 MiB
24/05/06 04:32:42 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB
24/05/06 04:32:43 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB
24/05/06 04:32:43 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB
24/05/06 04:32:43 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB
24/05/06 04:32:44 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB
24/05/06 04:32:44 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB
24/05/06 04:32:44 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB
24/05/06 04:32:45 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB
24/05/06 04:32:45 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB
24/05/06 04:32:45 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB
24/05/06 04:32:46 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:33:31 WARN DAGScheduler: Broadcasting large task binary with size 7.5 MiB
24/05/06 04:33:32 WARN DAGScheduler: Broadcasting large task binary with size 7.5 MiB
24/05/06 04:33:32 WARN DAGScheduler: Broadcasting large task binary with size 7.5 MiB
24/05/06 04:33:33 WARN DAGScheduler: Broadcasting large task binary with size 7.5 MiB
24/05/06 04:33:33 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:33:33 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:33:34 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:33:34 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:33:35 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:33:35 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:33:35 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:33:36 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:34:11 WARN DAGScheduler: Broadcasting large task binary with size 8.3 MiB
24/05/06 04:34:11 WARN DAGScheduler: Broadcasting large task binary with size 8.3 MiB
24/05/06 04:34:12 WARN DAGScheduler: Broadcasting large task binary with size 8.3 MiB
24/05/06 04:34:12 WARN DAGScheduler: Broadcasting large task binary with size 8.3 MiB
24/05/06 04:34:13 WARN DAGScheduler: Broadcasting large task binary with size 8.3 MiB
24/05/06 04:34:13 WARN DAGScheduler: Broadcasting large task binary with size 8.3 MiB
24/05/06 04:34:14 WARN DAGScheduler: Broadcasting large task binary with size 8.4 MiB
24/05/06 04:34:14 WARN DAGScheduler: Broadcasting large task binary with size 8.4 MiB
24/05/06 04:34:14 WARN DAGScheduler: Broadcasting large task binary with size 8.4 MiB
24/05/06 04:34:15 WARN DAGScheduler: Broadcasting large task binary with size 8.4 MiB
24/05/06 04:34:15 WARN DAGScheduler: Broadcasting large task binary with size 8.4 MiB
24/05/06 04:34:16 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:34:25 WARN DAGScheduler: Broadcasting large task binary with size 8.6 MiB
24/05/06 04:34:26 WARN DAGScheduler: Broadcasting large task binary with size 8.6 MiB
24/05/06 04:34:26 WARN DAGScheduler: Broadcasting large task binary with size 8.6 MiB


                                                                                

24/05/06 04:34:27 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:34:27 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:34:28 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:34:28 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:34:29 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:34:29 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:34:30 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:34:30 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:34:30 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:34:31 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:34:31 WARN DAGScheduler: Broadcasting large task binary with size 8.8 MiB
24/05/06 04:34:32 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:34:33 WARN DAGScheduler: Broadcasting large task binary with size 8.8 MiB


                                                                                

24/05/06 04:34:34 WARN DAGScheduler: Broadcasting large task binary with size 8.8 MiB


                                                                                

24/05/06 04:34:35 WARN DAGScheduler: Broadcasting large task binary with size 8.8 MiB
24/05/06 04:34:35 WARN DAGScheduler: Broadcasting large task binary with size 8.8 MiB
24/05/06 04:34:36 WARN DAGScheduler: Broadcasting large task binary with size 8.8 MiB
24/05/06 04:34:36 WARN DAGScheduler: Broadcasting large task binary with size 8.8 MiB
24/05/06 04:34:37 WARN DAGScheduler: Broadcasting large task binary with size 8.8 MiB
24/05/06 04:34:37 WARN DAGScheduler: Broadcasting large task binary with size 8.9 MiB
24/05/06 04:34:37 WARN DAGScheduler: Broadcasting large task binary with size 8.9 MiB
24/05/06 04:34:38 WARN DAGScheduler: Broadcasting large task binary with size 8.9 MiB
24/05/06 04:34:39 WARN DAGScheduler: Broadcasting large task binary with size 8.9 MiB
24/05/06 04:34:39 WARN DAGScheduler: Broadcasting large task binary with size 8.9 MiB
24/05/06 04:34:40 WARN DAGScheduler: Broadcasting large task binary with size 8.9 MiB
24/05/06 04:34:40 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:34:50 WARN DAGScheduler: Broadcasting large task binary with size 9.1 MiB


                                                                                

24/05/06 04:34:50 WARN DAGScheduler: Broadcasting large task binary with size 9.1 MiB
24/05/06 04:34:50 WARN DAGScheduler: Broadcasting large task binary with size 9.1 MiB
24/05/06 04:34:51 WARN DAGScheduler: Broadcasting large task binary with size 9.1 MiB
24/05/06 04:34:51 WARN DAGScheduler: Broadcasting large task binary with size 9.1 MiB
24/05/06 04:34:52 WARN DAGScheduler: Broadcasting large task binary with size 9.1 MiB
24/05/06 04:34:52 WARN DAGScheduler: Broadcasting large task binary with size 9.2 MiB
24/05/06 04:34:53 WARN DAGScheduler: Broadcasting large task binary with size 9.2 MiB
24/05/06 04:34:54 WARN DAGScheduler: Broadcasting large task binary with size 9.2 MiB
24/05/06 04:34:54 WARN DAGScheduler: Broadcasting large task binary with size 9.2 MiB
24/05/06 04:34:55 WARN DAGScheduler: Broadcasting large task binary with size 9.2 MiB
24/05/06 04:34:55 WARN DAGScheduler: Broadcasting large task binary with size 9.2 MiB
24/05/06 04:34:56 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:35:58 WARN DAGScheduler: Broadcasting large task binary with size 1002.3 KiB
24/05/06 04:35:58 WARN DAGScheduler: Broadcasting large task binary with size 1002.8 KiB
24/05/06 04:35:59 WARN DAGScheduler: Broadcasting large task binary with size 1003.4 KiB
24/05/06 04:35:59 WARN DAGScheduler: Broadcasting large task binary with size 1004.5 KiB
24/05/06 04:35:59 WARN DAGScheduler: Broadcasting large task binary with size 1006.4 KiB
24/05/06 04:35:59 WARN DAGScheduler: Broadcasting large task binary with size 1008.6 KiB
24/05/06 04:35:59 WARN DAGScheduler: Broadcasting large task binary with size 1009.1 KiB
24/05/06 04:35:59 WARN DAGScheduler: Broadcasting large task binary with size 1009.7 KiB
24/05/06 04:35:59 WARN DAGScheduler: Broadcasting large task binary with size 1010.8 KiB
24/05/06 04:35:59 WARN DAGScheduler: Broadcasting large task binary with size 1013.1 KiB
24/05/06 04:35:59 WARN DAGScheduler: Broadcasting large task binary with size 1015.7 KiB
24/05/06 04:36:00 WAR

                                                                                

24/05/06 04:40:23 WARN DAGScheduler: Broadcasting large task binary with size 6.7 MiB
24/05/06 04:40:23 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:40:23 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:40:24 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:40:24 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:40:25 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:40:25 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:40:25 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:40:26 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:40:26 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:40:26 WARN DAGScheduler: Broadcasting large task binary with size 6.8 MiB
24/05/06 04:40:27 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:40:57 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:40:57 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:40:58 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:40:58 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:40:59 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:40:59 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:40:59 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:41:00 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:41:00 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:41:01 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:41:01 WARN DAGScheduler: Broadcasting large task binary with size 7.6 MiB
24/05/06 04:41:02 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:41:03 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:41:04 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:41:04 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:41:05 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:41:05 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:41:06 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:41:06 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:41:07 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB


                                                                                

24/05/06 04:41:07 WARN DAGScheduler: Broadcasting large task binary with size 7.7 MiB
24/05/06 04:41:08 WARN DAGScheduler: Broadcasting large task binary with size 7.8 MiB


                                                                                

24/05/06 04:41:08 WARN DAGScheduler: Broadcasting large task binary with size 7.8 MiB


                                                                                

24/05/06 04:41:09 WARN DAGScheduler: Broadcasting large task binary with size 7.8 MiB
24/05/06 04:41:09 WARN DAGScheduler: Broadcasting large task binary with size 7.8 MiB
24/05/06 04:41:10 WARN DAGScheduler: Broadcasting large task binary with size 7.8 MiB


                                                                                

24/05/06 04:41:11 WARN DAGScheduler: Broadcasting large task binary with size 7.8 MiB
24/05/06 04:41:11 WARN DAGScheduler: Broadcasting large task binary with size 7.8 MiB
24/05/06 04:41:12 WARN DAGScheduler: Broadcasting large task binary with size 7.8 MiB
24/05/06 04:41:12 WARN DAGScheduler: Broadcasting large task binary with size 7.8 MiB
24/05/06 04:41:13 WARN DAGScheduler: Broadcasting large task binary with size 7.8 MiB
24/05/06 04:41:13 WARN DAGScheduler: Broadcasting large task binary with size 7.8 MiB
24/05/06 04:41:13 WARN DAGScheduler: Broadcasting large task binary with size 7.9 MiB
24/05/06 04:41:14 WARN DAGScheduler: Broadcasting large task binary with size 7.9 MiB
24/05/06 04:41:14 WARN DAGScheduler: Broadcasting large task binary with size 7.9 MiB
24/05/06 04:41:14 WARN DAGScheduler: Broadcasting large task binary with size 7.9 MiB
24/05/06 04:41:15 WARN DAGScheduler: Broadcasting large task binary with size 7.9 MiB
24/05/06 04:41:16 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:41:32 WARN DAGScheduler: Broadcasting large task binary with size 8.3 MiB
24/05/06 04:41:32 WARN DAGScheduler: Broadcasting large task binary with size 8.3 MiB
24/05/06 04:41:33 WARN DAGScheduler: Broadcasting large task binary with size 8.3 MiB
24/05/06 04:41:33 WARN DAGScheduler: Broadcasting large task binary with size 8.3 MiB
24/05/06 04:41:34 WARN DAGScheduler: Broadcasting large task binary with size 8.3 MiB
24/05/06 04:41:34 WARN DAGScheduler: Broadcasting large task binary with size 8.3 MiB
24/05/06 04:41:34 WARN DAGScheduler: Broadcasting large task binary with size 8.3 MiB
24/05/06 04:41:35 WARN DAGScheduler: Broadcasting large task binary with size 8.3 MiB
24/05/06 04:41:35 WARN DAGScheduler: Broadcasting large task binary with size 8.3 MiB
24/05/06 04:41:36 WARN DAGScheduler: Broadcasting large task binary with size 8.3 MiB
24/05/06 04:41:36 WARN DAGScheduler: Broadcasting large task binary with size 8.4 MiB
24/05/06 04:41:37 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:41:38 WARN DAGScheduler: Broadcasting large task binary with size 8.4 MiB
24/05/06 04:41:38 WARN DAGScheduler: Broadcasting large task binary with size 8.4 MiB
24/05/06 04:41:39 WARN DAGScheduler: Broadcasting large task binary with size 8.4 MiB
24/05/06 04:41:39 WARN DAGScheduler: Broadcasting large task binary with size 8.4 MiB
24/05/06 04:41:40 WARN DAGScheduler: Broadcasting large task binary with size 8.4 MiB
24/05/06 04:41:40 WARN DAGScheduler: Broadcasting large task binary with size 8.4 MiB
24/05/06 04:41:40 WARN DAGScheduler: Broadcasting large task binary with size 8.4 MiB
24/05/06 04:41:41 WARN DAGScheduler: Broadcasting large task binary with size 8.4 MiB
24/05/06 04:41:41 WARN DAGScheduler: Broadcasting large task binary with size 8.4 MiB
24/05/06 04:41:42 WARN DAGScheduler: Broadcasting large task binary with size 8.4 MiB
24/05/06 04:41:42 WARN DAGScheduler: Broadcasting large task binary with size 8.4 MiB
24/05/06 04:41:42 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:41:50 WARN DAGScheduler: Broadcasting large task binary with size 8.6 MiB
24/05/06 04:41:50 WARN DAGScheduler: Broadcasting large task binary with size 8.6 MiB
24/05/06 04:41:51 WARN DAGScheduler: Broadcasting large task binary with size 8.6 MiB
24/05/06 04:41:51 WARN DAGScheduler: Broadcasting large task binary with size 8.6 MiB


                                                                                

24/05/06 04:41:52 WARN DAGScheduler: Broadcasting large task binary with size 8.6 MiB


                                                                                

24/05/06 04:41:53 WARN DAGScheduler: Broadcasting large task binary with size 8.6 MiB


                                                                                

24/05/06 04:41:54 WARN DAGScheduler: Broadcasting large task binary with size 8.6 MiB


                                                                                

24/05/06 04:41:55 WARN DAGScheduler: Broadcasting large task binary with size 8.6 MiB
24/05/06 04:41:55 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:41:56 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB


                                                                                

24/05/06 04:41:56 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:41:57 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:41:57 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:41:57 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:41:58 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:41:58 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:41:59 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:41:59 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:42:00 WARN DAGScheduler: Broadcasting large task binary with size 8.7 MiB
24/05/06 04:42:00 WARN DAGScheduler: Broadcasting large task binary with size 8.8 MiB
24/05/06 04:42:01 WARN DAGScheduler: Broadcasting large task binary with size 8.8 MiB
24/05/06 04:42:01 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:42:06 WARN DAGScheduler: Broadcasting large task binary with size 8.9 MiB
24/05/06 04:42:06 WARN DAGScheduler: Broadcasting large task binary with size 8.9 MiB
24/05/06 04:42:07 WARN DAGScheduler: Broadcasting large task binary with size 8.9 MiB
24/05/06 04:42:07 WARN DAGScheduler: Broadcasting large task binary with size 8.9 MiB
24/05/06 04:42:08 WARN DAGScheduler: Broadcasting large task binary with size 8.9 MiB
24/05/06 04:42:08 WARN DAGScheduler: Broadcasting large task binary with size 8.9 MiB
24/05/06 04:42:08 WARN DAGScheduler: Broadcasting large task binary with size 8.9 MiB
24/05/06 04:42:09 WARN DAGScheduler: Broadcasting large task binary with size 8.9 MiB
24/05/06 04:42:09 WARN DAGScheduler: Broadcasting large task binary with size 8.9 MiB
24/05/06 04:42:10 WARN DAGScheduler: Broadcasting large task binary with size 9.0 MiB
24/05/06 04:42:10 WARN DAGScheduler: Broadcasting large task binary with size 9.0 MiB
24/05/06 04:42:11 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:42:14 WARN DAGScheduler: Broadcasting large task binary with size 9.0 MiB
24/05/06 04:42:15 WARN DAGScheduler: Broadcasting large task binary with size 9.1 MiB
24/05/06 04:42:15 WARN DAGScheduler: Broadcasting large task binary with size 9.1 MiB
24/05/06 04:42:16 WARN DAGScheduler: Broadcasting large task binary with size 9.1 MiB
24/05/06 04:42:16 WARN DAGScheduler: Broadcasting large task binary with size 9.1 MiB
24/05/06 04:42:17 WARN DAGScheduler: Broadcasting large task binary with size 9.1 MiB
24/05/06 04:42:17 WARN DAGScheduler: Broadcasting large task binary with size 9.1 MiB
24/05/06 04:42:18 WARN DAGScheduler: Broadcasting large task binary with size 9.1 MiB
24/05/06 04:42:18 WARN DAGScheduler: Broadcasting large task binary with size 9.1 MiB
24/05/06 04:42:18 WARN DAGScheduler: Broadcasting large task binary with size 9.2 MiB
24/05/06 04:42:19 WARN DAGScheduler: Broadcasting large task binary with size 9.2 MiB
24/05/06 04:42:19 WARN DAGScheduler: Broadcasting larg

                                                                                

24/05/06 04:42:21 WARN DAGScheduler: Broadcasting large task binary with size 9.2 MiB
24/05/06 04:42:22 WARN DAGScheduler: Broadcasting large task binary with size 9.2 MiB
24/05/06 04:42:22 WARN DAGScheduler: Broadcasting large task binary with size 9.2 MiB
24/05/06 04:42:22 WARN DAGScheduler: Broadcasting large task binary with size 9.2 MiB
24/05/06 04:42:23 WARN DAGScheduler: Broadcasting large task binary with size 9.2 MiB
24/05/06 04:42:23 WARN DAGScheduler: Broadcasting large task binary with size 9.3 MiB
24/05/06 04:42:24 WARN DAGScheduler: Broadcasting large task binary with size 9.3 MiB
24/05/06 04:42:24 WARN DAGScheduler: Broadcasting large task binary with size 9.3 MiB
24/05/06 04:42:25 WARN DAGScheduler: Broadcasting large task binary with size 9.3 MiB
24/05/06 04:42:25 WARN DAGScheduler: Broadcasting large task binary with size 9.3 MiB
24/05/06 04:42:26 WARN DAGScheduler: Broadcasting large task binary with size 9.3 MiB
24/05/06 04:42:26 WARN DAGScheduler: Broadcasting larg

                                                                                

## Make predictions

In [53]:
predictions = model.transform(test_data)

## Evaluate the model

In [54]:
evaluator = RegressionEvaluator(labelCol="price", predictionCol="prediction", metricName="rmse")
rmse = evaluator.evaluate(predictions)
print("Root Mean Squared Error (RMSE) on test data = %g" % rmse)

Root Mean Squared Error (RMSE) on test data = 4.59919e+10


## Bruh, this model is so bad, I dont know why @@