# Spectrum Spatial Addressing for Databricks - Installation
This is a sample installation file having information about how to install all the required configurations related to OAS SDK.

# Configuration

In [None]:
# Alter the following SDK_URL to point to the location where you placed the spectrum-bigdata-addressing-VERSION.zip. See the README for links to the documentation covering how to create presigned URL's for AWS and Azure.

import datetime

# Update the following with the API and Secret keys for your DataExperience account; these can be generated by visiting https://data.precisely.com/autodownload.
PB_API_KEY = "YOUR_API_KEY"
PB_SECRET = "YOUR_API_SECRET"

# SDK Download URL
SDK_URL = "YOUR_PRESIGNED_AWS_S3_URL_OR_DBFS_ZIP_PATH"

# We will be installing to the following directory. You can change the directory to suit your environment - you will need to use the same value in the Geocoding Demo Workspace.
AddressingRootDBFS = "/addressing"

# This is the version of the latest vintage whenever updated to current. The required format is "(YEAR.MONTH)".
DATA_VINTAGE = "2022.3"

# This is the release date of the latest vintage by default "year-month-first date of the month". For example, 2019-12-01.
DATA_RELEASE_DATE ="2022-03-20"

# Configure the datasets to be downloaded from data.precisely.com.
SDM_GEOCODING_SPDS = [f"Geocoding MLD US#United States#All USA#Spectrum Platform Data#1.0.0#{DATA_VINTAGE}#{DATA_RELEASE_DATE}",
                      f"Geocoding TT Street US#United States#All USA#Spectrum Platform Data#1.0.0#{DATA_VINTAGE}#{DATA_RELEASE_DATE}"]

today = datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S')


# Local Environment Setup - The remaining lines should not need to be modified
DBFS_BASE_LOCATION = f"{AddressingRootDBFS}"
DBFS_SDK_EXTRACT_LOCATION = f"{DBFS_BASE_LOCATION}/sdk"
DBFS_SDK_LOCATION = f"{DBFS_SDK_EXTRACT_LOCATION}/spectrum-bigdata-addressing*"
DBFS_DATA_LOCATION = f"{DBFS_BASE_LOCATION}/data"

LOCAL_DATA_TMP = f"{DBFS_BASE_LOCATION}/tmp/data"
LOCAL_DATA_ZIPPED = f"{LOCAL_DATA_TMP}/zip"
LOCAL_DATA_UNZIPPED = f"{LOCAL_DATA_TMP}/unzipped"

# Add the pdx sdk jar from github to your Filestore.
PDX_SDK_URL = "https://raw.githubusercontent.com/PreciselyData/big-data/dev/databricks-geocoding/lib/precisely-bigdata-pdx-sdk3.0.1-full.jar"

PDX_CLASSNAME = "com.precisely.pdx.sdkexample.SampleDemoApp"
DBFS_PDX_SDK_JAR = f"{DBFS_SDK_EXTRACT_LOCATION}/pdx-sdk.jar"

# We did this in any %sh command to ensure variables are available in the environment.
dbutils.fs.put("file:///dbricks_env.sh", f"""#!/bin/bash

export SDK_URL="{SDK_URL}"
export PDX_API_KEY={PB_API_KEY}
export PDX_SECRET={PB_SECRET}
export DATA_VINTAGE={DATA_VINTAGE}
export DATA_RELEASE_DATE={DATA_RELEASE_DATE}
export DBFS_SDK_EXTRACT_LOCATION=/dbfs{DBFS_SDK_EXTRACT_LOCATION}
export DBFS_SDK_LOCATION=/dbfs{DBFS_SDK_LOCATION}
export DBFS_DATA_LOCATION=/dbfs{DBFS_DATA_LOCATION}
export LOCAL_DATA_TMP={LOCAL_DATA_TMP}
export LOCAL_DATA_ZIPPED={LOCAL_DATA_ZIPPED}
export LOCAL_DATA_UNZIPPED={LOCAL_DATA_UNZIPPED}
export PDX_SDK_URL={PDX_SDK_URL}
export PDX_CLASSNAME={PDX_CLASSNAME}
export DBFS_PDX_SDK_JAR=/dbfs{DBFS_PDX_SDK_JAR}
export GEOCODING_SPDS={"({})".format(" ".join(list(map(lambda x: '"{}"'.format(x), SDM_GEOCODING_SPDS))))}

""", True)

In [None]:
%sh . /dbricks_env.sh

rm -rf $DBFS_SDK_EXTRACT_LOCATION
mkdir -p $DBFS_SDK_EXTRACT_LOCATION

if [ ! -z "$SDK_URL" ]
then
  echo "Installing addressing SDK..."
  curl -o addressing-sdk.zip "$SDK_URL"
  unzip -d $DBFS_SDK_EXTRACT_LOCATION addressing-sdk.zip
else
  echo "Not installing addressing SDK"
fi

In [None]:
%sh . /dbricks_env.sh

if [ ! -z "$PDX_SDK_URL" ]
then
  echo "Installing PDX SDK..."
  curl -o $DBFS_PDX_SDK_JAR "$PDX_SDK_URL"
else
  echo "Not installing geocoding SDK"
fi

## Install Data

In [None]:
%sh . /dbricks_env.sh
rm -rf $DBFS_DATA_LOCATION
mkdir -p $DBFS_DATA_LOCATION
printf '%s\n' "${GEOCODING_SPDS[@]}" | xargs -P 4 -I {spd} java -cp $DBFS_PDX_SDK_JAR $PDX_CLASSNAME -a $PDX_API_KEY -s $PDX_SECRET -d $DBFS_DATA_LOCATION -dd \"{spd}\"

In [None]:
%sh . /dbricks_env.sh

echo "If the SDK has been installed or updated, you should now create the Databricks library and attach it to your cluster."
echo ""
echo "Choose the jar below to be uploaded in the cluster:"

ls $DBFS_SDK_LOCATION/pyspark/sdk/lib/spectrum-bigdata-addressing-sdk-spark*.jar | sed 's/\/dbfs/dbfs:/'