In [1]:
import pyspark.sql.functions as F

StatementMeta(, , -1, SessionStarting, , SessionStarting)

In [None]:
# Cargamos el dataset de customers desde bronze
CustomerSilver = spark.read.format("delta").load("Files/bronze/customers")

# Calculamos los riesgos asociados a cada factor del cliente
CustomerSilverPartialRisk = CustomerSilver.withColumn("risk_kyc", F.when(F.col("is_kyc_identified") == 1, 1).otherwise(0))\
                                          .withColumn("risk_pep", F.when(F.col("is_customer_pep") == 1, 1).otherwise(0))\
                                          .withColumn("risk_region", F.when(F.col("region").isin("ESP", "NOR", "USA", "PRT", "FRA", "DEU"), 0)
                                                                      .when(F.col("region").isin("BTN", "ISR", "IMN", "KWT", "LUX"), 0.2)
                                                                      .when(F.col("region").isin("ARG", "AZE", "BRB", "ECU"), 0.5)
                                                                      .when(F.col("region").isin("AFG", "BLR", "CUB", "SSD", "VEN"), 0.7)
                                                                      .when(F.col("region").isin("IRN", "PRK", "SYR"), 1))\
                                          .withColumn("risk_employed", F.when(F.col("is_employed") == 0, 1).otherwise(0))\
                                          .withColumn("risk_naic", F.when(F.col("naic_code").isin("441110","441210", "522390", "713210"), 1).otherwise(0))\
                                          .withColumn("risk_ns", F.when(F.col("name_screaning_hit") == 1, 1).otherwise(0))\
                                          .withColumn("risk_ams", F.when(F.col("adverse_media_screaning_hit") == 1, 1).otherwise(0))\
                                          .withColumn("risk_minor", F.when(F.col("is_minor") == 1, 1).otherwise(0))

# Calculamos el riesgo cliente final 
CustomerSilverRisk = CustomerSilverPartialRisk.withColumn("risk_customer", F.col("risk_kyc")*0.111 +
                                                                                   F.col("risk_pep")*0.111 +
                                                                                   F.col("risk_region")*0.111 +
                                                                                   F.col("risk_employed")*0.111 +
                                                                                   F.col("risk_naic")*0.111 +
                                                                                   F.col("risk_ns")*0.111 +
                                                                                   F.col("risk_ams")*0.111 +
                                                                                   F.col("risk_minor")*0.111) 

# Guardamos el archivo en la capa silver
CustomerSilverRisk.write.format("delta")\
                          .mode("overwrite")\
                          .save("Files/silver/customers")

StatementMeta(, , , Waiting, , Waiting)

In [None]:
# Definimos el directorio del checkoint y donde se van a almacenar los datos
checkpoint_dir = "Files/silver/transactions/checkpoint"
silver_transactions_dir = "Files/silver/transactions"


# Cargamos el dataset
TransactionSilver = spark.readStream.format("delta").option("inferSchema", "true").load("Files/bronze/transactions")

# Realizamos el cálculo del riesgo para cada transacción
TransactionSilverPartialRisk = TransactionSilver.withColumn("risk_quantity", F.when(F.col("quantity") < 1000, 0)
                                                                              .when(F.col("quantity") < 5000, 0.2)
                                                                              .when(F.col("quantity") < 10000, 0.5)
                                                                              .otherwise(1))\
                                                .withColumn("risk_quantity_transfer", F.when((F.col("transfer_type") == "cash top up") & (F.col("quantity") < 1000), 0.5)
                                                                                       .when((F.col("transfer_type") == "cash top up") & (F.col("quantity") >= 1000), 1)
                                                                                       .when((F.col("transfer_type") == "cash withdraw") & (F.col("quantity") < 1000), 0.2)
                                                                                       .when((F.col("transfer_type") == "cash withdraw") & (F.col("quantity") < 1500), 0.4)
                                                                                       .when((F.col("transfer_type") == "cash withdraw") & (F.col("quantity") >= 1500), 1)
                                                                                       .when((F.col("transfer_type") == "transfer sent") & (F.col("quantity") < 1000), 0.2)
                                                                                       .when((F.col("transfer_type") == "transfer sent") & (F.col("quantity") < 2000), 0.4)
                                                                                       .when((F.col("transfer_type") == "transfer sent") & (F.col("quantity") < 10000), 0.7)
                                                                                       .when((F.col("transfer_type") == "transfer sent") & (F.col("quantity") > 10000), 1)
                                                                                       .when((F.col("transfer_type") == "transfer recieved") & (F.col("quantity") < 1000), 0.2)
                                                                                       .when((F.col("transfer_type") == "transfer recieved") & (F.col("quantity") < 2000), 0.4)
                                                                                       .when((F.col("transfer_type") == "transfer recieved") & (F.col("quantity") < 10000), 0.7)
                                                                                       .when((F.col("transfer_type") == "transfer recieved") & (F.col("quantity") > 10000), 1))\
                                                .withColumn("risk_transfer_origin", F.when((F.col("transfer_type") == "cash top up") & (F.col("country_of_origin").isin("ESP","NOR", "USA", "PRT", "FRA", "DEU")), 0)
                                                                                     .when((F.col("transfer_type") == "cash top up") & (F.col("country_of_origin").isin("BTN", "ISR", "IMN", "KWT", "LUX")), 0.3)
                                                                                     .when((F.col("transfer_type") == "cash top up") & (F.col("country_of_origin").isin("ARG", "AZE", "BRB", "ECU")), 0.8)
                                                                                     .when((F.col("transfer_type") == "cash top up") & (F.col("country_of_origin").isin("AFG", "BLR", "CUB", "SSD", "VEN")), 1)
                                                                                     .when((F.col("transfer_type") == "cash top up") & (F.col("country_of_origin").isin("IRN", "PRK", "SYR")), 1)
                                                                                     .when((F.col("transfer_type") == "cash withdraw") & (F.col("country_of_origin").isin("ESP","NOR", "USA", "PRT", "FRA", "DEU")), 0)
                                                                                     .when((F.col("transfer_type") == "cash withdraw") & (F.col("country_of_origin").isin("BTN", "ISR", "IMN", "KWT", "LUX")), 0.2)
                                                                                     .when((F.col("transfer_type") == "cash withdraw") & (F.col("country_of_origin").isin("ARG", "AZE", "BRB", "ECU")), 0.6)
                                                                                     .when((F.col("transfer_type") == "cash withdraw") & (F.col("country_of_origin").isin("AFG", "BLR", "CUB", "SSD", "VEN")), 0.8)
                                                                                     .when((F.col("transfer_type") == "cash withdraw") & (F.col("country_of_origin").isin("IRN", "PRK", "SYR")), 1)
                                                                                     .when((F.col("transfer_type") == "transfer sent") & (F.col("country_of_origin").isin("ESP","NOR", "USA", "PRT", "FRA", "DEU")), 0)
                                                                                     .when((F.col("transfer_type") == "transfer sent") & (F.col("country_of_origin").isin("BTN", "ISR", "IMN", "KWT", "LUX")), 0.3)
                                                                                     .when((F.col("transfer_type") == "transfer sent") & (F.col("country_of_origin").isin("ARG", "AZE", "BRB", "ECU")), 0.6)
                                                                                     .when((F.col("transfer_type") == "transfer sent") & (F.col("country_of_origin").isin("AFG", "BLR", "CUB", "SSD", "VEN")), 1)
                                                                                     .when((F.col("transfer_type") == "transfer sent") & (F.col("country_of_origin").isin("IRN", "PRK", "SYR")), 1)
                                                                                     .when((F.col("transfer_type") == "transfer recieved") & (F.col("country_of_origin").isin("ESP","NOR", "USA", "PRT", "FRA", "DEU")), 0)
                                                                                     .when((F.col("transfer_type") == "transfer recieved") & (F.col("country_of_origin").isin("BTN", "ISR", "IMN", "KWT", "LUX")), 0.3)
                                                                                     .when((F.col("transfer_type") == "transfer recieved") & (F.col("country_of_origin").isin("ARG", "AZE", "BRB", "ECU")), 0.6)
                                                                                     .when((F.col("transfer_type") == "transfer recieved") & (F.col("country_of_origin").isin("AFG", "BLR", "CUB", "SSD", "VEN")), 1)
                                                                                     .when((F.col("transfer_type") == "transfer recieved") & (F.col("country_of_origin").isin("IRN", "PRK", "SYR")), 1))\
                                                .withColumn("risk_transfer_destiny", F.when((F.col("transfer_type") == "cash top up") & (F.col("country_of_destiny").isin("ESP","NOR", "USA", "PRT", "FRA", "DEU")), 0)
                                                                                     .when((F.col("transfer_type") == "cash top up") & (F.col("country_of_destiny").isin("BTN", "ISR", "IMN", "KWT", "LUX")), 0.3)
                                                                                     .when((F.col("transfer_type") == "cash top up") & (F.col("country_of_destiny").isin("ARG", "AZE", "BRB", "ECU")), 0.8)
                                                                                     .when((F.col("transfer_type") == "cash top up") & (F.col("country_of_destiny").isin("AFG", "BLR", "CUB", "SSD", "VEN")), 1)
                                                                                     .when((F.col("transfer_type") == "cash top up") & (F.col("country_of_destiny").isin("IRN", "PRK", "SYR")), 1)
                                                                                     .when((F.col("transfer_type") == "cash withdraw") & (F.col("country_of_destiny").isin("ESP","NOR", "USA", "PRT", "FRA", "DEU")), 0)
                                                                                     .when((F.col("transfer_type") == "cash withdraw") & (F.col("country_of_destiny").isin("BTN", "ISR", "IMN", "KWT", "LUX")), 0.2)
                                                                                     .when((F.col("transfer_type") == "cash withdraw") & (F.col("country_of_destiny").isin("ARG", "AZE", "BRB", "ECU")), 0.6)
                                                                                     .when((F.col("transfer_type") == "cash withdraw") & (F.col("country_of_destiny").isin("AFG", "BLR", "CUB", "SSD", "VEN")), 0.8)
                                                                                     .when((F.col("transfer_type") == "cash withdraw") & (F.col("country_of_destiny").isin("IRN", "PRK", "SYR")), 1)
                                                                                     .when((F.col("transfer_type") == "transfer sent") & (F.col("country_of_destiny").isin("ESP","NOR", "USA", "PRT", "FRA", "DEU")), 0)
                                                                                     .when((F.col("transfer_type") == "transfer sent") & (F.col("country_of_destiny").isin("BTN", "ISR", "IMN", "KWT", "LUX")), 0.3)
                                                                                     .when((F.col("transfer_type") == "transfer sent") & (F.col("country_of_destiny").isin("ARG", "AZE", "BRB", "ECU")), 0.6)
                                                                                     .when((F.col("transfer_type") == "transfer sent") & (F.col("country_of_destiny").isin("AFG", "BLR", "CUB", "SSD", "VEN")), 1)
                                                                                     .when((F.col("transfer_type") == "transfer sent") & (F.col("country_of_destiny").isin("IRN", "PRK", "SYR")), 1)
                                                                                     .when((F.col("transfer_type") == "transfer recieved") & (F.col("country_of_destiny").isin("ESP","NOR", "USA", "PRT", "FRA", "DEU")), 0)
                                                                                     .when((F.col("transfer_type") == "transfer recieved") & (F.col("country_of_destiny").isin("BTN", "ISR", "IMN", "KWT", "LUX")), 0.3)
                                                                                     .when((F.col("transfer_type") == "transfer recieved") & (F.col("country_of_destiny").isin("ARG", "AZE", "BRB", "ECU")), 0.6)
                                                                                     .when((F.col("transfer_type") == "transfer recieved") & (F.col("country_of_destiny").isin("AFG", "BLR", "CUB", "SSD", "VEN")), 1)
                                                                                     .when((F.col("transfer_type") == "transfer recieved") & (F.col("country_of_destiny").isin("IRN", "PRK", "SYR")), 1))
# Calculamos el riesgo transaccional final 
TransactionSilverRisk = TransactionSilverPartialRisk.withColumn("risk_transactional", F.col("risk_quantity")*0.25 +
                                                                                        F.col("risk_quantity_transfer")*0.25 +
                                                                                        F.col("risk_transfer_origin")*0.25 +
                                                                                        F.col("risk_transfer_destiny")*0.25)            
print("hola que tal")

# Persitimos los datos en la capa silver
query = TransactionSilverRisk.writeStream.outputMode("append").format("delta")\
                                                             .option("checkpointLocation", checkpoint_dir)\
                                                             .option("path", silver_transactions_dir)\
                                                             .trigger(availableNow=True)\
                                                             .start()

query.awaitTermination()

StatementMeta(, , , Waiting, , Waiting)