In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from datetime import date, timedelta, datetime

In [0]:
# now = datetime.now()
# print("now =", now)
# dt_string = now.strftime("%Y-%m-%d")
# print(dt_string)

# Get today's date
today = date.today()
print("Today is: ", today)
 
# Yesterday date
# yesterday = today - timedelta(days = 1)
# print("Yesterday was: ", yesterday)

dt_string = today.strftime("%Y/%m/%d")
print(dt_string)

Today is:  2023-10-24
2023/10/24


In [0]:
dbutils.fs.ls('/mnt/stock-agg-silver')

[FileInfo(path='dbfs:/mnt/stock-agg-silver/Banglore/', name='Banglore/', size=0, modificationTime=1698123082000),
 FileInfo(path='dbfs:/mnt/stock-agg-silver/Hyderabad/', name='Hyderabad/', size=0, modificationTime=1698123078000),
 FileInfo(path='dbfs:/mnt/stock-agg-silver/Mumbai/', name='Mumbai/', size=0, modificationTime=1698123082000),
 FileInfo(path='dbfs:/mnt/stock-agg-silver/london/', name='london/', size=0, modificationTime=1698123079000)]

In [0]:
folder = 'Mumbai'
a = f"dbfs:/mnt/stock-agg-silver/Mumbai/{dt_string}".format(dt_string)

In [0]:
a

'dbfs:/mnt/stock-agg-silver/Mumbai/2023/10/23'

In [0]:
schema_def = StructType([StructField('id', IntegerType(), True),
                         StructField('StockDate', DateType(), True),
                         StructField('WarehouseID', StringType(), True),
                         StructField('ItemName', StringType(), True),
                         StructField('OpeningStock', IntegerType(), True),
                         StructField('Receipts', IntegerType(), True),
                         StructField('Issues', IntegerType(), True),
                         StructField('UnitValue', FloatType(), True)])

mumbai_df = spark.read.schema(schema_def).option("header", True).option('inferSchema', True).format('csv').load(a)

In [0]:
# Create a temporary view
mumbai_df.createOrReplaceTempView("GLOBAL_STOCK")

print("Total Records available : ")
spark.sql("SELECT count(*) FROM GLOBAL_STOCK").show()

# Perform the aggregation using DataFrame API
mumbai_stockSummary = mumbai_df.groupBy("StockDate", "ItemName").agg(
    count("*").alias("TOTAL_REC"),
    sum("OpeningStock").alias("OPENING_STOCK"),
    sum("Receipts").alias("RECEIPTS"),
    sum("Issues").alias("ISSUES"),
    sum((col("OpeningStock") + col("Receipts") - col("Issues"))).alias("CLOSING_STOCK"),
    sum((col("OpeningStock") + col("Receipts") - col("Issues")) * col("UnitValue")).alias("CLOSING_VALUE")
)

print("Global Stock Summary: ")
daily_mumbai_agg = mumbai_stockSummary.withColumn('CLOSING_VALUE', round(col('CLOSING_VALUE'),2))

daily_mumbai_agg.show()

Total Records available : 
+--------+
|count(1)|
+--------+
|      16|
+--------+

Global Stock Summary: 
+----------+----------------+---------+-------------+--------+------+-------------+-------------+
| StockDate|        ItemName|TOTAL_REC|OPENING_STOCK|RECEIPTS|ISSUES|CLOSING_STOCK|CLOSING_VALUE|
+----------+----------------+---------+-------------+--------+------+-------------+-------------+
|2023-10-24|Labeling Machine|        2|          166|      24|    94|           96|      1160.13|
|2023-10-24|      Calculator|        2|          124|      65|    10|          179|      1903.96|
|2023-10-24|        Scissors|        2|           32|      47|   131|          -52|      -697.16|
|2023-10-24|        Notebook|        1|           50|      35|    73|           12|        40.08|
|2023-10-24|    Sticky Notes|        2|          140|      51|     3|          188|       1549.9|
|2023-10-24|  Tape Dispenser|        2|          122|      81|   103|          100|       1543.0|
|2023-10-24|

In [0]:
# import os

# # Define folder and date
# folder = 'Banglore'
# dt_string = '2023/10/23'

# # Build the path
# path = f"/dbfs/mnt/stock-agg-silver/{folder}/{dt_string}"

# # Check if the path exists
# if os.path.exists(path):
#     print(f"Path exists: {path}")
# else:
#     print(f"Path does not exist: {path}")

Path exists: /dbfs/mnt/stock-agg-silver/Banglore/2023/10/23


In [0]:
year_month = today.strftime("%Y-%m")
print(year_month)
day = today.strftime("%d")
print(day)
output_path = '/mnt/stock-agg-gold/Mumbai/{0}/{1}/Mumbai_data_.csv'.format(year_month, day)
print(output_path)

daily_mumbai_agg.write.mode('overwrite').format('csv').save(output_path)

2023-10
24
/mnt/stock-agg-gold/Mumbai/2023-10/24/Mumbai_data_.csv
