In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from datetime import date, timedelta, datetime

In [0]:
# now = datetime.now()
# print("now =", now)
# dt_string = now.strftime("%Y-%m-%d")
# print(dt_string)

# Get today's date
today = date.today()
print("Today is: ", today)
 
# Yesterday date
yesterday = today - timedelta(days = 1)
print("Yesterday was: ", yesterday)

dt_string = today.strftime("%Y/%m/%d")
print(dt_string)

Today is:  2023-10-24
Yesterday was:  2023-10-23
2023/10/24


In [0]:
folder = 'Hyderabad'
a = f"/mnt/stock-agg-silver/{folder}/{dt_string}".format(folder,dt_string)

In [0]:
schema_def = StructType([StructField('id', IntegerType(), True),
                         StructField('StockDate', DateType(), True),
                         StructField('WarehouseID', StringType(), True),
                         StructField('ItemName', StringType(), True),
                         StructField('OpeningStock', IntegerType(), True),
                         StructField('Receipts', IntegerType(), True),
                         StructField('Issues', IntegerType(), True),
                         StructField('UnitValue', FloatType(), True)])

Hyderabad_df = spark.read.schema(schema_def).option("header", True).option('inferSchema', True).format('csv').load(a)

In [0]:
# Create a temporary view
Hyderabad_df.createOrReplaceTempView("GLOBAL_STOCK")

print("Total Records available : ")
spark.sql("SELECT count(*) FROM GLOBAL_STOCK").show()

# Perform the aggregation using DataFrame API
Hyderabad_stockSummary = Hyderabad_df.groupBy("StockDate", "ItemName").agg(
    count("*").alias("TOTAL_REC"),
    sum("OpeningStock").alias("OPENING_STOCK"),
    sum("Receipts").alias("RECEIPTS"),
    sum("Issues").alias("ISSUES"),
    sum((col("OpeningStock") + col("Receipts") - col("Issues"))).alias("CLOSING_STOCK"),
    sum((col("OpeningStock") + col("Receipts") - col("Issues")) * col("UnitValue")).alias("CLOSING_VALUE")
)

print("Global Stock Summary: ")
daily_Hyderabad_agg = Hyderabad_stockSummary.withColumn('CLOSING_VALUE', round(col('CLOSING_VALUE'),2))

daily_Hyderabad_agg.show()

Total Records available : 
+--------+
|count(1)|
+--------+
|      17|
+--------+

Global Stock Summary: 
+----------+----------------+---------+-------------+--------+------+-------------+-------------+
| StockDate|        ItemName|TOTAL_REC|OPENING_STOCK|RECEIPTS|ISSUES|CLOSING_STOCK|CLOSING_VALUE|
+----------+----------------+---------+-------------+--------+------+-------------+-------------+
|2023-10-24|Labeling Machine|        2|          164|      56|   149|           71|       632.85|
|2023-10-24|      Calculator|        2|          134|       7|   163|          -22|       -258.4|
|2023-10-24|        Scissors|        2|           71|      71|    65|           77|      1090.21|
|2023-10-24|        Notebook|        2|           67|      21|   127|          -39|       -182.0|
|2023-10-24|    Sticky Notes|        2|           12|      41|    11|           42|       333.16|
|2023-10-24|  Tape Dispenser|        2|          114|      30|    68|           76|       814.43|
|2023-10-24|

In [0]:
#writing to Gold

year_month = today.strftime("%Y-%m")
print(year_month)
day = today.strftime("%d")
print(day)
output_path = '/mnt/stock-agg-gold/Hyderabad/{0}/{1}/Hyderabad_data_.csv'.format(year_month, day)
print(output_path)

daily_Hyderabad_agg.write.mode('overwrite').format('csv').save(output_path)

2023-10
24
/mnt/stock-agg-gold/Hyderabad/2023-10/24/Hyderabad_data_.csv
