# Extract data from the customers json file
1. Query single file
2. Query list of files using wildcard characters
3. Query all the files in a folder
4. Select file metadata
5. Register files in Unity Catalog using Views

## 1. Query single file

In [0]:
%fs ls /Volumes/gizmobox/landing/operational_data/customers

In [0]:
df = spark.read.format("json").load("/Volumes/gizmobox/landing/operational_data/customers/customers_2024_10.json")
display(df)

## 2. Query multiple json files

In [0]:
df = spark.read.format("json").load("/Volumes/gizmobox/landing/operational_data/customers/customers_2024_*.json")
display(df)

## 3. Query all the files in a folder

In [0]:
df = spark.read.format("json").load('/Volumes/gizmobox/landing/operational_data/customers/')
display(df)

## 4. Select file metadata

In [0]:
df = spark.read.json('/Volumes/gizmobox/landing/operational_data/customers/')\
        .select('_metadata.file_path', '*')

display(df)

## 5. Register files in Unity Catalog using Views

In [0]:
# using DataFrameWriter V1 API
# write as delta table format
df.write.format('delta').mode('overwrite')\
  .saveAsTable('gizmobox.bronze.py_customers')

In [0]:
# using DataFrameWriter V2 API
df.writeTo('gizmobox.bronze.py_customers').createOrReplace()

In [0]:
%sql
SELECT 
  * 
FROM gizmobox.bronze.py_customers;

In [0]:
# using V2 API

## 6. Create temporary view

In [0]:
%sql
CREATE OR REPLACE TEMPORARY VIEW tv_customers AS -- no need for catalog and schema name
SELECT 
  *
FROM json.`/Volumes/gizmobox/landing/operational_data/customers/`;

In [0]:
%sql
SELECT * FROM tv_customers;

## 7. Create global temporary view

In [0]:
%sql
CREATE OR REPLACE GLOBAL TEMPORARY VIEW tv_customers AS -- no need for catalog and schema name
SELECT 
  *
FROM json.`/Volumes/gizmobox/landing/operational_data/customers/`;

In [0]:
%sql
SELECT * FROM global_temp.tv_customers; -- global temp views needs to be accessed from a schema