## **Data Reading**

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import * 

df = spark.read.format("parquet")\
        .load("abfss://bronze@storagetosty.dfs.core.windows.net/orders")

display(df)

In [0]:
df.printSchema()

## **Column Transformation**

In [0]:
df = df.withColumnRenamed("_rescued_data","rescued_data")
display(df)

In [0]:
df = df.drop("rescued_data")
display(df)

In [0]:
if df is not None:
    df = df.withColumn("order_date", to_timestamp(col('order_date')))

display(df)

In [0]:
df = df.withColumn("year", year(col('order_date')))
df = df.withColumn("month", month(col('order_date')))
df = df.withColumn("Quarter", concat(lit("Q"), quarter(col("order_date"))))

display(df)

## **Window Functions**

In [0]:
df_windows = df.withColumn("dense_flag", dense_rank().over(Window.partitionBy("year").orderBy(desc("total_amount"))))
df_windows = df_windows.withColumn("rank_flag", rank().over(Window.partitionBy("year").orderBy(desc("total_amount"))))
df_windows = df_windows.withColumn("row_flag", row_number().over(Window.partitionBy("year").orderBy(desc("total_amount"))))
display(df_windows)

## **Classes - OOP**

In [0]:
class window_functions:
    def dense_flag(self, df, parititon_column, order_column):
        df_dense_rank = df.withColumn("dense_flag", dense_rank().over(Window.partitionBy(parititon_column).orderBy(desc(order_column))))
        return df_dense_rank
    def rank_flag(self, df, parititon_column, order_column):
        df_rank = df.withColumn("rank_flag", rank().over(Window.partitionBy(parititon_column).orderBy(desc(order_column))))
        return df_rank
    def row_flag(self, df, parititon_column, order_column):
        df_row = df.withColumn("row_flag", row_number().over(Window.partitionBy(parititon_column).orderBy(desc(order_column))))
        return df_row

In [0]:
df_class = df
display(df_class)

In [0]:
class_object = window_functions()
df_class = class_object.dense_flag(df_class, "year", "total_amount")
df_class = class_object.rank_flag(df_class, "year", "total_amount")
df_class = class_object.row_flag(df_class, "year", "total_amount")
display(df_class)

## **Data Writting**

In [0]:
df_class.write.format("delta")\
                .mode("overwrite")\
                .save("abfss://silver@storagetosty.dfs.core.windows.net/orders")

In [0]:
df_validation = spark.read.format("delta").load("abfss://silver@storagetosty.dfs.core.windows.net/orders")
display(df_validation)