###Define Data Path

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as f
import urllib

In [0]:
spark = SparkSession \
    .builder \
    .appName("Salary") \
    .getOrCreate()

In [0]:
# Set-up parameters for reading csv file
file_type = "csv"
first_row_is_header = "true"
delimiter = ","

In [0]:
aws_keys_df = (
    spark.read.format(file_type)
    .option("header", first_row_is_header)
    .option("sep", delimiter)
    .load("/FileStore/tables/S3Admin_user_credentials.csv")
)

In [0]:
# Get the AWS access key and secret key from the spark dataframe
ACCESS_KEY = aws_keys_df.where(f.col("User name") == "S3Admin").collect()[0][
    "Access key ID"
]
SECRET_KEY = (
    aws_keys_df.where(f.col("User name") == "S3Admin")
    .select("Secret access key")
    .collect()[0]["Secret access key"]
)
ENCODED_SECRET_KEY = urllib.parse.quote(string=SECRET_KEY, safe="")



In [0]:
# Set-up AWS S3 constants
S3_BUCKET = "so-salary-survey"
S3_REGION = "eu-west-3"
S3_RAW_PATH = f"{S3_BUCKET}/raw/"
S3_PROCESS_PATH = f"{S3_BUCKET}/processed/"
S3_GOLD_PATH = f"{S3_BUCKET}/gold/"
# Copy S3 URI "so-salary-survey/raw/2021-survey_results_public.csv"



In [0]:
# Mount name for the bucket
MOUNT_NAME = "/mnt/so-salary-survey"
# Source url
SOURCE_URL = "s3a://{0}:{1}@{2}".format(ACCESS_KEY, ENCODED_SECRET_KEY, S3_BUCKET)



In [0]:
# Build and configure SparkSession
spark = SparkSession.builder.appName("salary").getOrCreate()
spark._jsc.hadoopConfiguration().set("fs.s3a.access.key", ACCESS_KEY)
spark._jsc.hadoopConfiguration().set("fs.s3a.secret.key", SECRET_KEY)
spark._jsc.hadoopConfiguration().set(
    "fs.s3a.endpoint", "s3." + S3_REGION + ".amazonaws.com"
)



In [0]:
# Mount name for the bucket
MOUNT_NAME = "/mnt/so-salary-survey"
# Source url
SOURCE_URL = "s3a://{0}:{1}@{2}".format(ACCESS_KEY, ENCODED_SECRET_KEY, S3_BUCKET)



In [0]:
# Mount the AWS S3 bucket if not mounted yet
if not any(mount.mountPoint == MOUNT_NAME for mount in dbutils.fs.mounts()):
  dbutils.fs.mount(SOURCE_URL, MOUNT_NAME)

