# Realestate Rental ROI Prediction - Loading Dataset using Snowpark Python


## Loading Real Estates Ads into Snowflake

### Import the dependencies and connect to Snowflake

In [None]:
# Snowpark
# Print the version of Snowpark we are using
from importlib.metadata import version

from snowflake.snowpark import Session
from snowflake.snowpark.functions import *
from snowflake.snowpark.types import *

version("snowflake_snowpark_python")

'0.12.0'

In [None]:
# Other
import json

**Before connecting make sure you have updated creds.json with information for your Snowflake account**

In [None]:
with open("creds.json") as f:
    connection_parameters = json.load(f)

In [None]:
session = Session.builder.configs(connection_parameters).create()

The **get_** functions can be use to get information about the current database, schema, role etc

In [None]:
print(
    f"Current schema: {session.get_fully_qualified_current_schema()}, current role: {session.get_current_role()}, current warehouse:  {session.get_current_warehouse()}"
)

Current schema: "SNOWPARKDEMO_DB"."DATASCIENCEV1", current role: "SNOWPARKDEMO_ROLE", current warehouse:  "SNOWPARKDEMO_WH"


### Define Staging Area and the Schema for the transaction table

Using SQL we can create a internal stage and then use the **put** function to uplad the **realestatesads.csv.gz** file to it.

In [None]:
stage_name = "REAL_ESTATE_DATA"
# Create a internal staging area for uploading the source file
session.sql(f"CREATE or replace STAGE {stage_name}").collect()

# Upload the source file to the stage
putResult = session.file.put(
    "data/realestatesads.csv.gz", f"@{stage_name}", auto_compress=False
)

putResult

[PutResult(source='realestatesads.csv.gz', target='realestatesads.csv.gz', source_size=50042169, target_size=50042176, source_compression='GZIP', target_compression='GZIP', status='UPLOADED', message='')]

Define the schma for our **ADS** table

In [None]:
# Define the schema for the ADS table
df_ADS_Schema = StructType(
    [
        StructField("ADS_ID", IntegerType()),
        StructField("ADS_CATEGORY_NAME", StringType()),
        StructField("ADS_FIRST_PUBLICATION_DATE", DateType()),
        StructField("ADS_SUBJECT", StringType()),
        StructField("ADS_PRICE", IntegerType()),
        StructField("ADS_OPT_URGENT", BooleanType()),
        StructField("ADS_OWNER_TYPE", StringType()),
        StructField("ADS_ATTR_REAL_ESTATE_TYPE", StringType()),
        StructField("ADS_ATTR_ROOMS", IntegerType()),
        StructField("ADS_ATTR_SQUARE", IntegerType()),
        StructField("ADS_ATTR_GES", StringType()),
        StructField("ADS_ATTR_ENERGY_RATE", StringType()),
        StructField("ADS_ATTR_FURNISHED", StringType()),
        StructField("ADS_GEO_LAT", FloatType()),
        StructField("ADS_GEO_LNG", FloatType()),
        StructField("ADS_GEO_CITY_LAT", FloatType()),
        StructField("ADS_GEO_CITY_LNG", FloatType()),
        StructField("ADS_GEO_CITY", StringType()),
        StructField("ADS_GEO_ZIPCODE", StringType()),
        StructField("ADS_GEO_REGION", StringType()),
        StructField("ADS_GEO_DEPARTEMENT", StringType()),
        StructField("ADS_GEO_ARRONDISSEMENT", StringType()),
        StructField("ADS_GEO_ARRONDISSEMENT_LAT", FloatType()),
        StructField("ADS_GEO_ARRONDISSEMENT_LNG", FloatType()),
    ]
)

Load the **realestatesads.csv.gz** to a DataFrame reader and save into a table

In [None]:
# Crete a reader
df_Reader = session.read.schema(df_ADS_Schema)

# Get the data into the data frame
df_ADS = df_Reader.csv(f"@{stage_name}/realestatesads.csv.gz")

In [None]:
# Write the dataframe in a table
ret = df_ADS.write.mode("overwrite").saveAsTable("ANNONCES_IMMOBILIERES")
ret

Chek the result

In [None]:
import pandas as pd

ADS_df_Raw = session.table("ANNONCES_IMMOBILIERES").filter(
    col("ADS_CATEGORY_NAME") == "Locations"
)
pd.DataFrame(ADS_df_Raw.limit(1000).collect())

Unnamed: 0,ADS_ID,ADS_CATEGORY_NAME,ADS_FIRST_PUBLICATION_DATE,ADS_SUBJECT,ADS_PRICE,ADS_OPT_URGENT,ADS_OWNER_TYPE,ADS_ATTR_REAL_ESTATE_TYPE,ADS_ATTR_ROOMS,ADS_ATTR_SQUARE,...,ADS_GEO_LNG,ADS_GEO_CITY_LAT,ADS_GEO_CITY_LNG,ADS_GEO_CITY,ADS_GEO_ZIPCODE,ADS_GEO_REGION,ADS_GEO_DEPARTEMENT,ADS_GEO_ARRONDISSEMENT,ADS_GEO_ARRONDISSEMENT_LAT,ADS_GEO_ARRONDISSEMENT_LNG
0,2231477249,Locations,2022-10-03,T3- Croix Blanche - 64 m²,457,False,pro,Appartement,3,64,...,5.227580,46.204512,5.248452,Bourg-en-Bresse (01),01000,Auvergne-Rhône-Alpes,Ain (01),Bourg-en-Bresse,46.209915,5.092495
1,2231978322,Locations,2022-10-04,Appartement 3 pièces 66 m²,505,False,pro,Appartement,3,66,...,5.227580,46.204512,5.248452,Bourg-en-Bresse (01),01000,Auvergne-Rhône-Alpes,Ain (01),Bourg-en-Bresse,46.209915,5.092495
2,2231978329,Locations,2022-10-04,Appartement 3 pièces 67 m²,688,False,pro,Appartement,3,67,...,5.227580,46.204512,5.248452,Bourg-en-Bresse (01),01000,Auvergne-Rhône-Alpes,Ain (01),Bourg-en-Bresse,46.209915,5.092495
3,2231978327,Locations,2022-10-04,Appartement 1 pièce 30 m²,330,False,pro,Appartement,1,30,...,5.227580,46.204512,5.248452,Bourg-en-Bresse (01),01000,Auvergne-Rhône-Alpes,Ain (01),Bourg-en-Bresse,46.209915,5.092495
4,2231814469,Locations,2022-10-04,Appartement 3 pièces 67 m²,498,False,pro,Appartement,3,67,...,5.227580,46.204512,5.248452,Bourg-en-Bresse (01),01000,Auvergne-Rhône-Alpes,Ain (01),Bourg-en-Bresse,46.209915,5.092495
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,2217050757,Locations,2022-09-06,Duplex 3 pièces 76 m²,700,False,pro,Appartement,3,76,...,5.071280,46.062918,5.091758,Marlieux (01),01240,Auvergne-Rhône-Alpes,Ain (01),Bourg-en-Bresse,46.209915,5.092495
996,2210159893,Locations,2022-08-24,Appartement 2 pièces 44 m²,525,False,pro,Appartement,2,44,...,5.071280,46.062918,5.091758,Marlieux (01),01240,Auvergne-Rhône-Alpes,Ain (01),Bourg-en-Bresse,46.209915,5.092495
997,2187831199,Locations,2022-07-07,Appartement 3 pièces 54 m²,452,False,pro,Appartement,3,54,...,5.194470,46.123091,5.212342,Lent (01),01240,Auvergne-Rhône-Alpes,Ain (01),Bourg-en-Bresse,46.209915,5.092495
998,2193216210,Locations,2022-07-19,Appartement 2 pièces 61 m²,565,False,pro,Appartement,2,61,...,5.194470,46.123091,5.212342,Lent (01),01240,Auvergne-Rhône-Alpes,Ain (01),Bourg-en-Bresse,46.209915,5.092495
