In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json

In [4]:
# Snowpark
from snowflake.snowpark.session import Session
from snowflake.snowpark import functions as F
from snowflake.snowpark.types import *
from snowflake.snowpark.version import VERSION
from snowflake.snowpark.functions import pandas_udf


In [5]:
# Read credentials
with open('creds.json') as f:
    connection_parameters = json.load(f)    
session = Session.builder.configs(connection_parameters).create()

In [6]:
snowpark_version = VERSION
print('Database                    : {}'.format(session.get_current_database()))
print('Schema                      : {}'.format(session.get_current_schema()))
print('Warehouse                   : {}'.format(session.get_current_warehouse()))
print('Role                        : {}'.format(session.get_current_role()))
print('Snowpark for Python version : {}.{}.{}'.format(snowpark_version[0],snowpark_version[1],snowpark_version[2]))

Database                    : "HOUSING"
Schema                      : "PUBLIC"
Warehouse                   : "XSMALL_WH"
Role                        : "ACCOUNTADMIN"
Snowpark for Python version : 1.0.0


In [7]:
session.sql("SELECT count(*) FROM HOUSING.PUBLIC.HOUSINGPRICE").collect()

[Row(COUNT(*)=30144)]

In [8]:
housepricingdf = session.table("HOUSING.PUBLIC.HOUSINGPRICE")

In [9]:
housepricingdf=housepricingdf.to_pandas()
type(housepricingdf)

pandas.core.frame.DataFrame

In [10]:
housepricingdf.columns = map(lambda x: str(x).upper(), housepricingdf.columns)

In [11]:
housepricingdf.head()

Unnamed: 0,DATE,SUBURB,ADDRESS,ROOMS,TYPE,PRICE,METHOD,SELLERG,DATE.1,DISTANCE,...,BATHROOM,CAR,LANDSIZE,BUILDINGAREA,YEARBUILT,COUNCILAREA,LATTITUDE,LONGTITUDE,REGIONNAME,PROPERTYCOUNT
0,2016-01-28,Surrey Hills,999A Riversdale Rd,3,h,1205000.0,S,Fletchers,2016-01-28,11.2,...,1.0,2.0,490.0,,,Boroondara City Council,-37.8361,145.1006,Southern Metropolitan,5457.0
1,2016-01-28,Surrey Hills,1/10 Florence Rd,2,u,813000.0,S,Fletchers,2016-01-28,11.2,...,1.0,2.0,108.0,,,Boroondara City Council,-37.8276,145.1023,Southern Metropolitan,5457.0
2,2016-01-28,Canterbury,140 Canterbury Rd,4,h,,SP,Fletchers,2016-01-28,9.0,...,2.0,0.0,808.0,198.0,1910.0,Boroondara City Council,-37.8235,145.0751,Southern Metropolitan,3265.0
3,2016-03-09,Williamstown,54 Twyford St,3,h,1535000.0,S,Greg,2016-03-09,8.0,...,1.0,1.0,507.0,186.0,1910.0,Hobsons Bay City Council,-37.8681,144.8994,Western Metropolitan,6380.0
4,2016-03-09,Newport,1/26 Thorpe St,3,h,826000.0,S,Village,2016-03-09,8.4,...,1.0,2.0,281.0,100.0,1957.0,Hobsons Bay City Council,-37.8457,144.8651,Western Metropolitan,5498.0


In [12]:
housepricingdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30144 entries, 0 to 30143
Data columns (total 22 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   DATE           30144 non-null  object 
 1   SUBURB         30144 non-null  object 
 2   ADDRESS        30144 non-null  object 
 3   ROOMS          30144 non-null  int8   
 4   TYPE           30144 non-null  object 
 5   PRICE          23573 non-null  float64
 6   METHOD         30144 non-null  object 
 7   SELLERG        30144 non-null  object 
 8   DATE.1         30144 non-null  object 
 9   DISTANCE       30144 non-null  float64
 10  POSTCODE       30144 non-null  float64
 11  BEDROOM2       23327 non-null  float64
 12  BATHROOM       23321 non-null  float64
 13  CAR            22982 non-null  float64
 14  LANDSIZE       20078 non-null  float64
 15  BUILDINGAREA   12147 non-null  float64
 16  YEARBUILT      13827 non-null  float64
 17  COUNCILAREA    30142 non-null  object 
 18  LATTIT

In [13]:
housepricingdf.isnull().sum()

DATE                 0
SUBURB               0
ADDRESS              0
ROOMS                0
TYPE                 0
PRICE             6571
METHOD               0
SELLERG              0
DATE.1               0
DISTANCE             0
POSTCODE             0
BEDROOM2          6817
BATHROOM          6823
CAR               7162
LANDSIZE         10066
BUILDINGAREA     17997
YEARBUILT        16317
COUNCILAREA          2
LATTITUDE         6600
LONGTITUDE        6600
REGIONNAME           2
PROPERTYCOUNT        2
dtype: int64

In [14]:
housepricingdf.describe()

Unnamed: 0,ROOMS,PRICE,DISTANCE,POSTCODE,BEDROOM2,BATHROOM,CAR,LANDSIZE,BUILDINGAREA,YEARBUILT,LATTITUDE,LONGTITUDE,PROPERTYCOUNT
count,30144.0,23573.0,30144.0,30144.0,23327.0,23321.0,22982.0,20078.0,12147.0,13827.0,23544.0,23544.0,30142.0
mean,3.015725,1055272.0,10.894533,3114.059216,3.039311,1.606063,1.693195,584.022263,158.001108,1964.95017,-37.812025,145.001666,7544.185754
std,0.974964,646449.2,6.5589,104.132112,0.993397,0.723174,1.001103,3557.012648,422.167797,37.401358,0.086615,0.115427,4423.509742
min,1.0,85000.0,0.0,3000.0,0.0,0.0,0.0,0.0,0.0,1196.0,-38.1856,144.42379,83.0
25%,2.0,636000.0,6.3,3049.0,2.0,1.0,1.0,203.0,100.0,1940.0,-37.861803,144.936245,4380.0
50%,3.0,880000.0,10.1,3103.0,3.0,1.0,2.0,493.0,133.0,1970.0,-37.80878,145.0068,6763.0
75%,4.0,1300000.0,13.8,3153.0,4.0,2.0,2.0,666.75,184.0,2000.0,-37.757,145.069163,10331.0
max,16.0,11200000.0,48.1,3978.0,30.0,12.0,26.0,433014.0,44515.0,2106.0,-37.3951,145.52635,21650.0


In [15]:
housepricingdf.nunique()

DATE                68
SUBURB             346
ADDRESS          29486
ROOMS               12
TYPE                 3
PRICE             2722
METHOD               9
SELLERG            363
DATE.1              68
DISTANCE           213
POSTCODE           208
BEDROOM2            15
BATHROOM            11
CAR                 14
LANDSIZE          1631
BUILDINGAREA       721
YEARBUILT          156
COUNCILAREA         33
LATTITUDE        11161
LONGTITUDE       12203
REGIONNAME           8
PROPERTYCOUNT      339
dtype: int64

In [16]:
cols=['SUBURB','ROOMS','TYPE','METHOD','SELLERG','REGIONNAME','PROPERTYCOUNT','DISTANCE','COUNCILAREA','BEDROOM2','BATHROOM'
           ,'CAR','LANDSIZE','BUILDINGAREA','PRICE']
housepricingdf=housepricingdf[cols]
housepricingdf

Unnamed: 0,SUBURB,ROOMS,TYPE,METHOD,SELLERG,REGIONNAME,PROPERTYCOUNT,DISTANCE,COUNCILAREA,BEDROOM2,BATHROOM,CAR,LANDSIZE,BUILDINGAREA,PRICE
0,Surrey Hills,3,h,S,Fletchers,Southern Metropolitan,5457.0,11.2,Boroondara City Council,3.0,1.0,2.0,490.0,,1205000.0
1,Surrey Hills,2,u,S,Fletchers,Southern Metropolitan,5457.0,11.2,Boroondara City Council,2.0,1.0,2.0,108.0,,813000.0
2,Canterbury,4,h,SP,Fletchers,Southern Metropolitan,3265.0,9.0,Boroondara City Council,4.0,2.0,0.0,808.0,198.0,
3,Williamstown,3,h,S,Greg,Western Metropolitan,6380.0,8.0,Hobsons Bay City Council,3.0,1.0,1.0,507.0,186.0,1535000.0
4,Newport,3,h,S,Village,Western Metropolitan,5498.0,8.4,Hobsons Bay City Council,2.0,1.0,2.0,281.0,100.0,826000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30139,Highett,3,h,S,Greg,Southern Metropolitan,4794.0,16.0,Bayside City Council,3.0,1.0,2.0,700.0,,1242000.0
30140,Highett,3,t,PI,Purplebricks,Southern Metropolitan,4794.0,16.0,Bayside City Council,,,,,,1395000.0
30141,Hoppers Crossing,4,h,S,Barry,Western Metropolitan,13830.0,18.4,Wyndham City Council,4.0,2.0,2.0,650.0,,540000.0
30142,South Melbourne,4,h,PI,Marshall,Southern Metropolitan,5943.0,1.9,Port Phillip City Council,4.0,2.0,0.0,178.0,174.0,


In [17]:
housepricingdf.isnull().sum()

SUBURB               0
ROOMS                0
TYPE                 0
METHOD               0
SELLERG              0
REGIONNAME           2
PROPERTYCOUNT        2
DISTANCE             0
COUNCILAREA          2
BEDROOM2          6817
BATHROOM          6823
CAR               7162
LANDSIZE         10066
BUILDINGAREA     17997
PRICE             6571
dtype: int64

In [18]:
cols_zero = ['PROPERTYCOUNT','DISTANCE','BEDROOM2','BATHROOM','CAR']
housepricingdf[cols_zero]=housepricingdf[cols_zero].fillna(0)


In [19]:
housepricingdf.LANDSIZE=housepricingdf.LANDSIZE.fillna(housepricingdf.LANDSIZE.mean())
housepricingdf.BUILDINGAREA=housepricingdf.BUILDINGAREA.fillna(housepricingdf.BUILDINGAREA.mean())


In [20]:
housepricingdf.dropna(inplace=True)
housepricingdf.isnull().sum()

SUBURB           0
ROOMS            0
TYPE             0
METHOD           0
SELLERG          0
REGIONNAME       0
PROPERTYCOUNT    0
DISTANCE         0
COUNCILAREA      0
BEDROOM2         0
BATHROOM         0
CAR              0
LANDSIZE         0
BUILDINGAREA     0
PRICE            0
dtype: int64

In [21]:
housepricingdf.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 23571 entries, 0 to 30141
Data columns (total 15 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   SUBURB         23571 non-null  object 
 1   ROOMS          23571 non-null  int8   
 2   TYPE           23571 non-null  object 
 3   METHOD         23571 non-null  object 
 4   SELLERG        23571 non-null  object 
 5   REGIONNAME     23571 non-null  object 
 6   PROPERTYCOUNT  23571 non-null  float64
 7   DISTANCE       23571 non-null  float64
 8   COUNCILAREA    23571 non-null  object 
 9   BEDROOM2       23571 non-null  float64
 10  BATHROOM       23571 non-null  float64
 11  CAR            23571 non-null  float64
 12  LANDSIZE       23571 non-null  float64
 13  BUILDINGAREA   23571 non-null  float64
 14  PRICE          23571 non-null  float64
dtypes: float64(8), int8(1), object(6)
memory usage: 2.7+ MB


In [22]:
housepricingdf.head()

Unnamed: 0,SUBURB,ROOMS,TYPE,METHOD,SELLERG,REGIONNAME,PROPERTYCOUNT,DISTANCE,COUNCILAREA,BEDROOM2,BATHROOM,CAR,LANDSIZE,BUILDINGAREA,PRICE
0,Surrey Hills,3,h,S,Fletchers,Southern Metropolitan,5457.0,11.2,Boroondara City Council,3.0,1.0,2.0,490.0,158.001108,1205000.0
1,Surrey Hills,2,u,S,Fletchers,Southern Metropolitan,5457.0,11.2,Boroondara City Council,2.0,1.0,2.0,108.0,158.001108,813000.0
3,Williamstown,3,h,S,Greg,Western Metropolitan,6380.0,8.0,Hobsons Bay City Council,3.0,1.0,1.0,507.0,186.0,1535000.0
4,Newport,3,h,S,Village,Western Metropolitan,5498.0,8.4,Hobsons Bay City Council,2.0,1.0,2.0,281.0,100.0,826000.0
5,Newport,3,h,SP,Sweeney,Western Metropolitan,5498.0,8.4,Hobsons Bay City Council,3.0,2.0,2.0,301.0,195.0,1025000.0


In [23]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()

In [24]:
# le.fit(housepricingdf) 
le1=LabelEncoder()
le2=LabelEncoder()
le3=LabelEncoder()
le4=LabelEncoder()
le5=LabelEncoder()
le6=LabelEncoder()
housepricingdf.SUBURB=le1.fit_transform(housepricingdf.SUBURB)
housepricingdf.TYPE = le2.fit_transform(housepricingdf.TYPE)
housepricingdf.METHOD = le3.fit_transform(housepricingdf.METHOD)
housepricingdf.SELLERG=le4.fit_transform(housepricingdf.SELLERG)
housepricingdf.REGIONNAME=le5.fit_transform(housepricingdf.REGIONNAME)
housepricingdf.COUNCILAREA = le6.fit_transform(housepricingdf.COUNCILAREA)
housepricingdf

Unnamed: 0,SUBURB,ROOMS,TYPE,METHOD,SELLERG,REGIONNAME,PROPERTYCOUNT,DISTANCE,COUNCILAREA,BEDROOM2,BATHROOM,CAR,LANDSIZE,BUILDINGAREA,PRICE
0,292,3,0,1,95,5,5457.0,11.2,2,3.0,1.0,2.0,490.000000,158.001108,1205000.0
1,292,2,2,1,95,5,5457.0,11.2,2,2.0,1.0,2.0,108.000000,158.001108,813000.0
3,329,3,0,1,110,6,6380.0,8.0,10,3.0,1.0,1.0,507.000000,186.000000,1535000.0
4,228,3,0,1,296,6,5498.0,8.4,10,2.0,1.0,2.0,281.000000,100.000000,826000.0
5,228,3,0,3,275,6,5498.0,8.4,10,3.0,2.0,2.0,301.000000,195.000000,1025000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30137,166,4,0,3,314,6,5556.0,18.0,3,4.0,2.0,2.0,600.000000,174.000000,580000.0
30138,165,2,2,1,121,5,4794.0,16.0,1,2.0,1.0,1.0,130.000000,158.001108,677500.0
30139,165,3,0,1,110,5,4794.0,16.0,1,3.0,1.0,2.0,700.000000,158.001108,1242000.0
30140,165,3,1,0,235,5,4794.0,16.0,1,0.0,0.0,0.0,584.022263,158.001108,1395000.0


In [25]:
housepricingdf.head()

Unnamed: 0,SUBURB,ROOMS,TYPE,METHOD,SELLERG,REGIONNAME,PROPERTYCOUNT,DISTANCE,COUNCILAREA,BEDROOM2,BATHROOM,CAR,LANDSIZE,BUILDINGAREA,PRICE
0,292,3,0,1,95,5,5457.0,11.2,2,3.0,1.0,2.0,490.0,158.001108,1205000.0
1,292,2,2,1,95,5,5457.0,11.2,2,2.0,1.0,2.0,108.0,158.001108,813000.0
3,329,3,0,1,110,6,6380.0,8.0,10,3.0,1.0,1.0,507.0,186.0,1535000.0
4,228,3,0,1,296,6,5498.0,8.4,10,2.0,1.0,2.0,281.0,100.0,826000.0
5,228,3,0,3,275,6,5498.0,8.4,10,3.0,2.0,2.0,301.0,195.0,1025000.0


In [26]:
housepricingdf.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 23571 entries, 0 to 30141
Data columns (total 15 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   SUBURB         23571 non-null  int64  
 1   ROOMS          23571 non-null  int8   
 2   TYPE           23571 non-null  int64  
 3   METHOD         23571 non-null  int64  
 4   SELLERG        23571 non-null  int64  
 5   REGIONNAME     23571 non-null  int64  
 6   PROPERTYCOUNT  23571 non-null  float64
 7   DISTANCE       23571 non-null  float64
 8   COUNCILAREA    23571 non-null  int64  
 9   BEDROOM2       23571 non-null  float64
 10  BATHROOM       23571 non-null  float64
 11  CAR            23571 non-null  float64
 12  LANDSIZE       23571 non-null  float64
 13  BUILDINGAREA   23571 non-null  float64
 14  PRICE          23571 non-null  float64
dtypes: float64(8), int64(6), int8(1)
memory usage: 2.7 MB


In [27]:
X = housepricingdf.drop(['PRICE'],axis=1)
y=housepricingdf.PRICE

In [28]:
X.shape,y.shape

((23571, 14), (23571,))

In [29]:
housepricingdf.shape

(23571, 15)

In [30]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)
X_train.shape,X_test.shape

((16499, 14), (7072, 14))

In [31]:
from sklearn.linear_model import LinearRegression
lr=LinearRegression()
lr.fit(X_train,y_train)

In [32]:
from sklearn import metrics
pred = lr.predict(X_test)
score = metrics.r2_score(pred,y_test)
score

-0.35594039989051396

In [33]:
df= pd.DataFrame({'Actual_Price': np.round(y_test), 
                   'Predicted_Price': np.round(pred),
                 })
df['difference']=df.apply(lambda x: x.Predicted_Price-x.Actual_Price,axis=1)
df.head(10)

Unnamed: 0,Actual_Price,Predicted_Price,difference
24685,860000.0,114674.0,-745326.0
1164,568500.0,1089824.0,521324.0
5481,1950000.0,1844661.0,-105339.0
2963,875000.0,900313.0,25313.0
27038,1094000.0,1095174.0,1174.0
645,1715000.0,1591761.0,-123239.0
3562,2040000.0,1271457.0,-768543.0
1058,1405000.0,1257373.0,-147627.0
2188,981000.0,1037770.0,56770.0
8327,570500.0,1375430.0,804930.0


In [35]:
# Score It
print('MAE:',metrics.mean_absolute_error(y_test,pred))
print('MSE:',metrics.mean_squared_error(y_test,pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test,pred)))

MAE: 332479.4735138926
MSE: 245028578812.31766
RMSE: 495003.6149487372


In [36]:
# Calculated R Squared
print('R^2 =',metrics.explained_variance_score(y_test,pred))

R^2 = 0.4213412340993177


In [37]:
lr.fit(X_train,y_train).score(X_test, y_test)

0.4213374327840367

In [38]:
errors = abs(pred - y_test)
MSE=round(np.mean(errors),2)
MAPE=100*(errors/y_test)
accuracy=round (100 - np.mean(MAPE),2)
print(accuracy)

64.73


In [39]:
# Creating a User Defined Function within Snowflake to do the scoring there
def predict_pandas_udf(df: pd.DataFrame) -> pd.Series:
    return pd.Series(lr.predict(df))  



In [40]:
housepricingdf.columns = map(lambda x: str(x).upper(), housepricingdf.columns)
snowdf_details = session.createDataFrame(housepricingdf)
snowdf_details.show()
snowdf_details.write.mode("overwrite").saveAsTable("HOUSING.PUBLIC.FULL_HOUSINGPRICE_encoded") 

session.table("HOUSING.PUBLIC.FULL_HOUSINGPRICE_encoded").show(5)

create_temp_table is deprecated. We still respect this parameter when it is True but please consider using `table_type="temporary"` instead.


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"SUBURB"  |"ROOMS"  |"TYPE"  |"METHOD"  |"SELLERG"  |"REGIONNAME"  |"PROPERTYCOUNT"  |"DISTANCE"  |"COUNCILAREA"  |"BEDROOM2"  |"BATHROOM"  |"CAR"  |"LANDSIZE"  |"BUILDINGAREA"      |"PRICE"    |
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|292       |3        |0       |1         |95         |5             |5457.0           |11.2        |2              |3.0         |1.0         |2.0    |490.0       |158.00110757388654  |1205000.0  |
|292       |2        |2       |1         |95         |5             |5457.0           |11.2        |2              |2.0         |1.0         |2.0    |108.0       |158.00110757388654  |813000.0   |
|329       |3  

In [41]:
linear_model_vec = pandas_udf(func=predict_pandas_udf,
                                return_type=FloatType(),
                                input_types=[IntegerType(),IntegerType(),IntegerType(),IntegerType(),IntegerType(),IntegerType(),FloatType(),FloatType(),FloatType(),IntegerType(),FloatType(),FloatType(),FloatType(),FloatType()],
                                session=session,
                                packages = ("pandas","scikit-learn"), max_batch_size=200)

In [42]:

# Calling the UDF to do the scoring (pushing down to Snowflake)
output = session.table('HOUSING.PUBLIC.FULL_HOUSINGPRICE_encoded').select(*list(X.columns),
                    linear_model_vec(list(X.columns)).alias('PREDICTED_PRICE'),
                    (F.col('Price')).alias('ACTUAL_PRICE')                                              
                    )

output.show(5)


Passing arguments to a UDF with a list or tuple is deprecated. We still respect this invocation but please consider passing variable-length arguments without a list or tuple.


------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"SUBURB"  |"ROOMS"  |"TYPE"  |"METHOD"  |"SELLERG"  |"REGIONNAME"  |"PROPERTYCOUNT"  |"DISTANCE"  |"COUNCILAREA"  |"BEDROOM2"  |"BATHROOM"  |"CAR"  |"LANDSIZE"  |"BUILDINGAREA"      |"PREDICTED_PRICE"   |"ACTUAL_PRICE"  |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|292       |3        |0       |1         |95         |5             |5457.0           |11.2        |2              |3.0         |1.0         |2.0    |490.0       |158.00110757388654  |1049874.6853844114  |1205000.0       |
|292       |2        |2       |1         |95         |5             |5457.0           |11.2        |2       

In [43]:
output=output.to_pandas()

In [44]:
output.head()

Unnamed: 0,SUBURB,ROOMS,TYPE,METHOD,SELLERG,REGIONNAME,PROPERTYCOUNT,DISTANCE,COUNCILAREA,BEDROOM2,BATHROOM,CAR,LANDSIZE,BUILDINGAREA,PREDICTED_PRICE,ACTUAL_PRICE
0,219,3,0,1,46,5,2555.0,14.6,12,3.0,1.0,3.0,771.0,158.001108,940103.6,1020000.0
1,11,2,2,1,136,5,4836.0,6.3,27,2.0,1.0,1.0,0.0,76.0,702111.2,599000.0
2,329,4,0,1,317,6,6380.0,8.0,10,3.0,2.0,1.0,452.0,158.001108,1588412.0,1325000.0
3,239,2,0,1,46,4,3692.0,14.7,12,0.0,0.0,0.0,584.022263,158.001108,686318.3,1242000.0
4,233,3,0,2,197,2,11364.0,5.5,6,3.0,2.0,2.0,286.0,188.0,1445616.0,1382500.0


In [45]:
output.SUBURB=le1.inverse_transform(output.SUBURB)
output.TYPE = le2.inverse_transform(output.TYPE)
output.METHOD = le3.inverse_transform(output.METHOD)
output.SELLERG=le4.inverse_transform(output.SELLERG)
output.REGIONNAME=le5.inverse_transform(output.REGIONNAME)
output.COUNCILAREA = le6.inverse_transform(output.COUNCILAREA)


In [46]:
output.head()

Unnamed: 0,SUBURB,ROOMS,TYPE,METHOD,SELLERG,REGIONNAME,PROPERTYCOUNT,DISTANCE,COUNCILAREA,BEDROOM2,BATHROOM,CAR,LANDSIZE,BUILDINGAREA,PREDICTED_PRICE,ACTUAL_PRICE
0,Moorabbin,3,h,S,Buxton,Southern Metropolitan,2555.0,14.6,Kingston City Council,3.0,1.0,3.0,771.0,158.001108,940103.6,1020000.0
1,Armadale,2,u,S,Jellis,Southern Metropolitan,4836.0,6.3,Stonnington City Council,2.0,1.0,1.0,0.0,76.0,702111.2,599000.0
2,Williamstown,4,h,S,hockingstuart,Western Metropolitan,6380.0,8.0,Hobsons Bay City Council,3.0,2.0,1.0,452.0,158.001108,1588412.0,1325000.0
3,Oakleigh South,2,h,S,Buxton,South-Eastern Metropolitan,3692.0,14.7,Kingston City Council,0.0,0.0,0.0,584.022263,158.001108,686318.3,1242000.0
4,Northcote,3,h,SA,Nelson,Northern Metropolitan,11364.0,5.5,Darebin City Council,3.0,2.0,2.0,286.0,188.0,1445616.0,1382500.0


In [48]:
snowdf_details = session.createDataFrame(output)
snowdf_details.show()
snowdf_details.write.mode("overwrite").saveAsTable("HOUSING.PUBLIC.FULL_HOUSINGPRICE_PREDICTED_LR") 




--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"SUBURB"        |"ROOMS"  |"TYPE"  |"METHOD"  |"SELLERG"      |"REGIONNAME"                |"PROPERTYCOUNT"  |"DISTANCE"  |"COUNCILAREA"               |"BEDROOM2"  |"BATHROOM"  |"CAR"  |"LANDSIZE"         |"BUILDINGAREA"      |"PREDICTED_PRICE"   |"ACTUAL_PRICE"  |
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|Moorabbin       |3        |h       |S         |Buxton         |Southern Metropolitan       |2555.0           |14.6        |Kingston City Council       |3.0         |1.0         |3.0    |771.0       

In [51]:
# Also get a local dataframe to review the results
snowdf_details=snowdf_details.toPandas()


AttributeError: 'DataFrame' object has no attribute 'toPandas'

In [52]:
snowdf_details

Unnamed: 0,SUBURB,ROOMS,TYPE,METHOD,SELLERG,REGIONNAME,PROPERTYCOUNT,DISTANCE,COUNCILAREA,BEDROOM2,BATHROOM,CAR,LANDSIZE,BUILDINGAREA,PREDICTED_PRICE,ACTUAL_PRICE
0,Moorabbin,3,h,S,Buxton,Southern Metropolitan,2555.0,14.6,Kingston City Council,3.0,1.0,3.0,771.000000,158.001108,9.401036e+05,1020000.0
1,Armadale,2,u,S,Jellis,Southern Metropolitan,4836.0,6.3,Stonnington City Council,2.0,1.0,1.0,0.000000,76.000000,7.021112e+05,599000.0
2,Williamstown,4,h,S,hockingstuart,Western Metropolitan,6380.0,8.0,Hobsons Bay City Council,3.0,2.0,1.0,452.000000,158.001108,1.588412e+06,1325000.0
3,Oakleigh South,2,h,S,Buxton,South-Eastern Metropolitan,3692.0,14.7,Kingston City Council,0.0,0.0,0.0,584.022263,158.001108,6.863183e+05,1242000.0
4,Northcote,3,h,SA,Nelson,Northern Metropolitan,11364.0,5.5,Darebin City Council,3.0,2.0,2.0,286.000000,188.000000,1.445616e+06,1382500.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23566,Lower Plenty,2,u,VB,Fletchers,Eastern Metropolitan,1624.0,14.6,Banyule City Council,2.0,1.0,1.0,119.000000,158.001108,3.113103e+05,420000.0
23567,Keilor,3,u,SP,Brad,Western Metropolitan,2339.0,14.5,Brimbank City Council,0.0,0.0,0.0,584.022263,158.001108,7.211543e+05,650000.0
23568,Heathmont,3,h,S,Ray,Eastern Metropolitan,3794.0,21.3,Maroondah City Council,3.0,2.0,2.0,471.000000,144.000000,7.910787e+05,912000.0
23569,Heidelberg Heights,3,h,S,Ray,Eastern Metropolitan,2947.0,8.8,Banyule City Council,3.0,1.0,3.0,619.000000,158.001108,1.150500e+06,810000.0
