In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from datetime import date, timedelta, datetime

In [0]:
dbutils.fs.mounts()

[MountInfo(mountPoint='/databricks-datasets', source='databricks-datasets', encryptionType=''),
 MountInfo(mountPoint='/mnt/gold', source='wasbs://gold@capitaldev.blob.core.windows.net/', encryptionType=''),
 MountInfo(mountPoint='/mnt/shiba-inshightl-ayer', source='wasbs://shiba-inshightl-ayer@capitaldev.blob.core.windows.net/', encryptionType=''),
 MountInfo(mountPoint='/mnt/gold-edwh', source='abfss://raw-edwh@capitaldev.dfs.core.windows.net/', encryptionType=''),
 MountInfo(mountPoint='/Volumes', source='UnityCatalogVolumes', encryptionType=''),
 MountInfo(mountPoint='/mnt/silver', source='wasbs://silver@capitaldev.blob.core.windows.net/', encryptionType=''),
 MountInfo(mountPoint='/mnt/silver-edwh', source='abfss://raw-edwh@capitaldev.dfs.core.windows.net/', encryptionType=''),
 MountInfo(mountPoint='/mnt/bronze1', source='wasbs://bronze@capitaldev.blob.core.windows.net/', encryptionType=''),
 MountInfo(mountPoint='/databricks/mlflow-tracking', source='databricks/mlflow-tracking',

In [0]:
# now = datetime.now()
# print("now =", now)
# dt_string = now.strftime("%Y-%m-%d")
# print(dt_string)

# Get today's date
today = date.today()
print("Today is: ", today)
 
# Yesterday date
# yesterday = today - timedelta(days = 1)
# print("Yesterday was: ", yesterday)

dt_string = today.strftime("%Y/%m/%d")
# print(dt_string)

Today is:  2023-10-24


In [0]:
folder = 'Banglore'
a = f"/mnt/stock-agg-silver/{folder}/{dt_string}".format(folder,dt_string)

In [0]:
schema_def = StructType([StructField('id', IntegerType(), True),
                         StructField('StockDate', DateType(), True),
                         StructField('WarehouseID', StringType(), True),
                         StructField('ItemName', StringType(), True),
                         StructField('OpeningStock', IntegerType(), True),
                         StructField('Receipts', IntegerType(), True),
                         StructField('Issues', IntegerType(), True),
                         StructField('UnitValue', FloatType(), True)])

Banglore_df = spark.read.schema(schema_def).option("header", True).option('inferSchema', True).format('csv').load(a)

In [0]:
# Create a temporary view
Banglore_df.createOrReplaceTempView("GLOBAL_STOCK")

print("Total Records available : ")
spark.sql("SELECT count(*) FROM GLOBAL_STOCK").show()

# Perform the aggregation using DataFrame API
Banglore_stockSummary = Banglore_df.groupBy("StockDate", "ItemName").agg(
    count("*").alias("TOTAL_REC"),
    sum("OpeningStock").alias("OPENING_STOCK"),
    sum("Receipts").alias("RECEIPTS"),
    sum("Issues").alias("ISSUES"),
    sum((col("OpeningStock") + col("Receipts") - col("Issues"))).alias("CLOSING_STOCK"),
    sum((col("OpeningStock") + col("Receipts") - col("Issues")) * col("UnitValue")).alias("CLOSING_VALUE")
)

print("Global Stock Summary: ")
daily_Banglore_agg = Banglore_stockSummary.withColumn('CLOSING_VALUE', round(col('CLOSING_VALUE'),2))

daily_Banglore_agg.show()

Total Records available : 
+--------+
|count(1)|
+--------+
|      15|
+--------+

Global Stock Summary: 
+----------+----------------+---------+-------------+--------+------+-------------+-------------+
| StockDate|        ItemName|TOTAL_REC|OPENING_STOCK|RECEIPTS|ISSUES|CLOSING_STOCK|CLOSING_VALUE|
+----------+----------------+---------+-------------+--------+------+-------------+-------------+
|2023-10-24|Labeling Machine|        2|           52|       7|     5|           54|       507.31|
|2023-10-24|      Calculator|        2|           80|      70|    97|           53|       1444.9|
|2023-10-24|        Scissors|        2|           15|      29|    83|          -39|      -841.68|
|2023-10-24|        Notebook|        1|           48|      20|    77|           -9|       -63.27|
|2023-10-24|    Sticky Notes|        1|            1|      32|    25|            8|        62.56|
|2023-10-24|  Tape Dispenser|        2|           37|      73|   116|           -6|      -393.84|
|2023-10-24|

In [0]:
#writing to Gold

year_month = today.strftime("%Y-%m")
print(year_month)
day = today.strftime("%d")
print(day)
output_path = '/mnt/stock-agg-gold/Banglore/{0}/{1}/Banglore_data_.csv'.format(year_month, day)
print(output_path)

daily_Banglore_agg.write.mode('overwrite').format('csv').save(output_path)

2023-10
24
/mnt/stock-agg-gold/Banglore/2023-10/24/Banglore_data_.csv
