In [0]:
# bnf list: https://www.nhsbsa.nhs.uk/prescription-data/understanding-our-data/bnf-snomed-mapping
import pyspark.sql.functions as sfn
from delta.tables import *

In [0]:
# append fact table and rebuild dim tables 
def saveIntoLandingTable(tableName, inputDf):
  print("saving table {}...".format(tableName))
  spark.sql("use nhsgp")
  spark.sql("DROP TABLE IF EXISTS {}".format(tableName))
  tempPath = "/user/hive/warehouse/{}".format(tableName)
  if(checkPathExist(tempPath)):
    dbutils.fs.rm(tempPath, True)
  inputDf.write.format("delta").save(tempPath)
  spark.sql("CREATE TABLE {} USING DELTA LOCATION '{}'".format(tableName, tempPath))

  
#append fact table   
def appenFactTable(tableName, newDf):
  print("saving table {}...".format(tableName))
  spark.sql("use nhsgp")
  tempPath = "/user/hive/warehouse/{}/".format(tableName)
  existingTable  = DeltaTable.forPath(spark, tempPath)

  existingTable.alias("old").merge(
      newDf.alias("new"),
      "1 = 2") \
    .whenNotMatchedInsert(values =
      {
        "SHA": "new.SHA",
        "PCT": "new.PCT",
        "PRACTICE": "new.PRACTICE",
        "BNF_CODE": "new.BNF_CODE",
        "BNF_NAME": "new.BNF_NAME",
        "ITEMS": "new.ITEMS",
        "NIC": "new.NIC",
        "ACT_COST": "new.ACT_COST",
        "QUANTITY": "new.QUANTITY",        
        "PERIOD": "new.PERIOD"
      }
    ) \
    .execute()

In [0]:
# https://openprescribing.net/bnf/
spark.sql("use nhsgp ")
spark.sql("select count(1) as landing_fact_predescription_TotalRow from landing_fact_predescription").show()
spark.sql("select count(1) as landing_dim_practices_TotalRow from landing_dim_practices").show()
spark.sql("select count(1) as landing_dim_chem_TotalRow from landing_dim_chem").show()
spark.sql("select count(1) as landing_column_mappings_TotalRow from landing_column_mappings").show()
spark.sql("select count(1) as landing_dim_chem_TotalRow from landing_dim_chem").show()
spark.sql("select count(1) as landing_dim_bnfsnomedmapping_TotalRow from landing_dim_bnfsnomedmapping").show()

spark.sql("show tables").select(["tableName"]).show(truncate=False)
# spark.sql("show views").select(["viewName"]).show(truncate=False)

In [0]:
# add presentation layer

# 1. description and gp info
spark.sql('CREATE or replace VIEW nhsgp.Presentation_Fact_Description_Gp AS select pre.BNF_CODE, pre.BNF_NAME, pre.ITEMS, pre.NIC, pre.ACT_COST, pre.QUANTITY, pre.PERIOD, pra.practice, pra.name, pra.address1, pra.address2, pra.city, pra.county, pra.postcode from landing_fact_predescription pre left join landing_dim_practices as pra on pre.PRACTICE =pra.practice ;')


# 2. gp list 
spark.sql('CREATE or replace VIEW nhsgp.Presentation_Dim_Gp AS select practice, name,address1, address2, city, county, postcode from landing_dim_practices;')


# 3. gp bnf list 
spark.sql('CREATE or replace VIEW nhsgp.Presentation_Dim_Gp_Bnf AS select bnf_code,    bnf_name, practice from landing_column_mappings;')



# 4. bnf snomed mapping 
spark.sql('CREATE or replace VIEW nhsgp.Presentation_Dim_Bnf_snomed AS select BNF_Code,BNF_Name, SNOMED_Code from landing_dim_bnfsnomedmapping;')



# 5. chem list 
spark.sql('CREATE or replace VIEW nhsgp.Presentation_Dim_Chem AS select ChemSub, NAME from landing_dim_chem;')



In [0]:
spark.sql("show tables").show(truncate=False)

In [0]:

# tablePath = "/user/hive/warehouse/{}".format("landing_column_mappings")
# mapDf = spark.read.format('delta').load(tablePath)

# tablePath = "/user/hive/warehouse/{}".format("landing_dim_practices")
# practiceDf = spark.read.format('delta').load(tablePath)

# tablePath = "/user/hive/warehouse/{}".format("landing_fact_predescription")
# predDf = spark.read.format('delta').load(tablePath)

# tablePath = "/user/hive/warehouse/{}".format("landing_dim_chem")
# chemDf = spark.read.format('delta').load(tablePath) 

# tablePath = "/user/hive/warehouse/{}".format("landing_dim_bnfsnomedmapping")
# bnfsnomedDf = spark.read.format('delta').load(tablePath)

# mapDf.show(1)
# practiceDf.show(1)
# predDf.show(1)
# chemDf.show(1)
# spark.sql('select * from landing_dim_bnfsnomedmapping').show(1)

In [0]:
# chemDf = chemDf.drop(col('201912'))
# chemDf.show(1, truncate=False)
# chemDf = chemDf.withColumnRenamed('ChemSub','ChemCode')

# chemDf.sort(sfn.desc('NAME')).show()

# spark.sql('select count(1) from DescriptionWithGpInfoView').show()
# spark.sql('drop view nhsgp.presentataion_description_gp')


In [0]:
# show differnt version of the fact table 

tablePath = "/user/hive/warehouse/{}".format("landing_fact_predescription")
preTable = DeltaTable.forPath(spark, tablePath)


preTable.history().show()

predDfv0 = spark.read.format('delta').option("versionAsOf", 0).load(tablePath)
print("Version 0 row number: {}".format(predDfv0.count()))


predDfv1 = spark.read.format('delta').option("versionAsOf", 1).load(tablePath)
print("Version 1 row number: {}".format(predDfv1.count()))


predDfv2 = spark.read.format('delta').option("versionAsOf", 2).load(tablePath)
print("Version 2 row number: {}".format(predDfv2.count()))
 