In [0]:
account_key = ""
spark.conf.set("fs.azure.account.key.arulrajgopalshare.dfs.core.windows.net",account_key)

#Read file with metadata

In [0]:
%sql
CREATE DATABASE IF NOT EXISTS my_database;

In [0]:
%scala
// Databricks notebook source
val data = Seq(
  (1, "Alice", 25),
  (2, "Bob", 30),
  (3, "Charlie", 35)
)

val df = data.toDF("id", "name", "age")

df.write.mode("overwrite").format("delta").saveAsTable("my_database.my_sample_table")

In [0]:
%sql
CREATE OR REPLACE TEMPORARY VIEW my_temp_view AS
SELECT id, name FROM my_database.my_sample_table

In [0]:
%sql
select * from my_temp_view

In [0]:
%sql
CREATE OR REPLACE TEMPORARY VIEW temp_table_name AS
SELECT id, name FROM my_database.my_sample_table


In [0]:
%sql
CREATE MATERIALIZED VIEW my_database.my_materialized_view AS
SELECT id, name FROM my_database.my_sample_table;

In [0]:
%sql
CREATE OR REPLACE MATERIALIZED VIEW my_database.my_materialized_view
  AS SELECT id, name FROM my_database.my_sample_table;

In [0]:
df_expected = spark.createDataFrame(data=[("Alfred", 1500), ("Alfred", 2500), ("Anna", 
500), ("Anna", 3000)], schema=["name", "amount"])

df_actual = spark.createDataFrame(data=[("Alfred", 1200), ("Alfred", 2500), ("Anna", 500), 
("Anna", 3000)], schema=["name", "amount"])

from pyspark.testing import assertDataFrameEqual

assertDataFrameEqual(df_actual, df_expected)

In [0]:
df_expected = spark.createDataFrame(data=[
                                            ("Alfred", 1500), 
                                            ("Alfred", 2500), 
                                            ("Anna", 500), 
                                            ("Anna", 3000)], 
                                    schema=["name", "amount"]
                                    )

df_actual = spark.createDataFrame(data=[
                                 ("Alfred", 1200), 
                                 ("Alfred", 300), 
                                 ("Anna", 500), 
                                 ("Anna", 3000)], 
                            schema=["name", "amount"]
                            )


from pyspark.testing import assertDataFrameEqual
from pyspark.errors import PySparkAssertionError

try:
    assertDataFrameEqual(df_actual, df_expected, includeDiffRows=True)
except PySparkAssertionError as e:
    # `e.data` here looks like:
    # [(Row(name='Alfred', amount=1200), Row(name='Alfred', amount=1500))]
    errored_recrods = spark.createDataFrame(e.data, schema=["Actual", "Expected"])

display(errored_recrods)

In [0]:
schema_actual = "name STRING, amount DOUBLE"

data_expected = [["Alfred", 1500], ["Alfred", 2500], ["Anna", 500], ["Anna", 3000]]
data_actual = [["Alfred", 1500.0], ["Alfred", 2500.0], ["Anna", 500.0], ["Anna", 3000.0]]

df_expected = spark.createDataFrame(data = data_expected)
df_actual = spark.createDataFrame(data = data_actual, schema = schema_actual)

from pyspark.testing import assertSchemaEqual

assertSchemaEqual(df_actual.schema, df_expected.schema)

#sort array

In [0]:
import pyspark.sql.functions as sf
df = spark.createDataFrame([([2, 1, None, 3],)], ['data'])
df.select(sf.sort_array(df.data)).show()

In [0]:
import pyspark.sql.functions as sf
df = spark.createDataFrame([([2, 1, None, 3],)], ['data'])
df.select(sf.sort_array(df.data, asc=False)).show()

In [0]:
import pyspark.sql.functions as sf
df = spark.createDataFrame([([1],)], ['data'])
df.select(sf.sort_array(df.data)).show()


In [0]:

from pyspark.sql import functions as sf
from pyspark.sql.types import ArrayType, StringType, StructField, StructType
schema = StructType([StructField("data", ArrayType(StringType()), True)])
df = spark.createDataFrame([([],)], schema=schema)
df.select(sf.sort_array(df.data)).show()


In [0]:
from pyspark.sql import functions as sf
from pyspark.sql.types import ArrayType, IntegerType, StructType, StructField
schema = StructType([StructField("data", ArrayType(IntegerType()), True)])
df = spark.createDataFrame([([None, None, None],)], schema=schema)
df.select(sf.sort_array(df.data)).show()


In [0]:
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, ArrayType
from pyspark.sql.functions import sort_array

schema = StructType([
    StructField("id", IntegerType(), True),
    StructField("nested_array", ArrayType(
        StructType([
            StructField("level1_field", StringType(), True),
            StructField("level2_struct", StructType([
                StructField("level2_field1", IntegerType(), True),
                StructField("level2_field2", StringType(), True)
            ]), True)
        ])
    ), True)
])

data = [
    (1, [
        {"level1_field": "a", "level2_struct": {"level2_field2": "x", "level2_field1": 10}},
        {"level1_field": "b", "level2_struct": {"level2_field1": 20, "level2_field2": "y"}},
        {"level1_field": "z", "level2_struct": {"level2_field1": 30, "level2_field2": "z"}},
        {"level1_field": "o", "level2_struct": {"level2_field1": 40, "level2_field2": "w"}},
        {"level1_field": "e", "level2_struct": {"level2_field1": 50, "level2_field2": "v"}}
    ]),
    (2, [
        {"level1_field": "f", "level2_struct": {"level2_field1": 60, "level2_field2": "u"}},
        {"level1_field": "g", "level2_struct": {"level2_field1": 70, "level2_field2": "t"}}
    ]),
    (3, [
        {"level1_field": "h", "level2_struct": {"level2_field1": 80, "level2_field2": "s"}},
        {"level1_field": "i", "level2_struct": {"level2_field1": 90, "level2_field2": "r"}},
        {"level1_field": "j", "level2_struct": {"level2_field1": 100, "level2_field2": "q"}}
    ])
]

df = spark.createDataFrame(data, schema=schema)

In [0]:
display(df)

In [0]:
display(df.select("id",sort_array(sf.col("nested_array"))))