### Acessar o Azure Data Lake utilizando uma chave de acesso

1. Definir as configurações do Spark fs.azure.account.key
2. Listar os arquivos presentes na camada "bronze"
3. Ler os dados do arquivo "circuits.csv"

In [None]:
# 1. Definir as configurações do Spark fs.azure.account.key
spark.conf.set(
    "fs.azure.account.key.<storage-account>.dfs.core.windows.net",
    "<key-access>"
)

In [None]:
# 2. Listar os arquivos presentes na camada bronze
dbutils.fs.ls('abfss://bronze@<storage-account>.dfs.core.windows.net/')

In [None]:
# 3. Ler os dados do arquivo "circuits.csv"

# 3.1 Retornar apenas o nome das colunas
spark.read.csv('abfss://bronze@<storage-account>.dfs.core.windows.net/circuits.csv', header = True)

# 3.2 Retornar em formato colunar
spark.read.csv('abfss://bronze@<storage-account>.dfs.core.windows.net', header = True).show(n = 10)

+---------+--------------+--------------------+------------+---------+--------+--------+---+--------------------+
|circuitId|    circuitRef|                name|    location|  country|     lat|     lng|alt|                 url|
+---------+--------------+--------------------+------------+---------+--------+--------+---+--------------------+
|        1|   albert_park|Albert Park Grand...|   Melbourne|Australia|-37.8497| 144.968| 10|http://en.wikiped...|
|        2|        sepang|Sepang Internatio...|Kuala Lumpur| Malaysia| 2.76083| 101.738| 18|http://en.wikiped...|
|        3|       bahrain|Bahrain Internati...|      Sakhir|  Bahrain| 26.0325| 50.5106|  7|http://en.wikiped...|
|        4|     catalunya|Circuit de Barcel...|    Montmeló|    Spain|   41.57| 2.26111|109|http://en.wikiped...|
|        5|      istanbul|       Istanbul Park|    Istanbul|   Turkey| 40.9517|  29.405|130|http://en.wikiped...|
|        6|        monaco|   Circuit de Monaco| Monte-Carlo|   Monaco| 43.7347| 7.42056|

In [None]:
# Lendo o arquivo, porém definindo o schema
from pyspark.sql.types import StructType, IntegerType, StringType, DoubleType

schema = StructType() \
    .add('circuitId', IntegerType(), True) \
    .add('circuitRef', StringType(), True) \
    .add('name', StringType(), True) \
    .add('location', StringType(), True) \
    .add('country', StringType(), True) \
    .add('lat', DoubleType(), True) \
    .add('lng', DoubleType(), True) \
    .add('alt', IntegerType(), True) \
    .add('url', StringType(), True)

df = spark.read.format("csv") \
    .schema(schema) \
    .option("header", "true") \
    .load('abfss://bronze@<storage-account>.dfs.core.windows.net')

df.show()

+---------+--------------+--------------------+------------+---------+--------+---------+---+--------------------+
|circuitId|    circuitRef|                name|    location|  country|     lat|      lng|alt|                 url|
+---------+--------------+--------------------+------------+---------+--------+---------+---+--------------------+
|        1|   albert_park|Albert Park Grand...|   Melbourne|Australia|-37.8497|  144.968| 10|http://en.wikiped...|
|        2|        sepang|Sepang Internatio...|Kuala Lumpur| Malaysia| 2.76083|  101.738| 18|http://en.wikiped...|
|        3|       bahrain|Bahrain Internati...|      Sakhir|  Bahrain| 26.0325|  50.5106|  7|http://en.wikiped...|
|        4|     catalunya|Circuit de Barcel...|    Montmeló|    Spain|   41.57|  2.26111|109|http://en.wikiped...|
|        5|      istanbul|       Istanbul Park|    Istanbul|   Turkey| 40.9517|   29.405|130|http://en.wikiped...|
|        6|        monaco|   Circuit de Monaco| Monte-Carlo|   Monaco| 43.7347| 