In [1]:
# Encontrar Spark
import findspark

findspark.init()
findspark.find()

'E:\\spark-3.1.3\\python\\pyspark'

In [2]:
from IPython.core.display import HTML

display(HTML("<style>pre { white-space: pre !important; }</style>"))

In [3]:
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql import functions as F

spark = SparkSession.builder.appName('spark_Pipeline').getOrCreate()

# Exploración Rapida de los Datos

In [4]:
df_csv = (spark.read.format("csv")
      .option("header", "true") 
      .option("mode", "FAILFAST") 
      .option("inferSchema", "true")
      .load("DataSet/HIS Niños 2019/HB/Niños AREQUIPA.csv")
     )

In [5]:
df_csv.printSchema()

root
 |-- Diresa: string (nullable = true)
 |-- Red: string (nullable = true)
 |-- Microred: string (nullable = true)
 |-- EESS: string (nullable = true)
 |-- Renipress: integer (nullable = true)
 |-- FechaAtencion: string (nullable = true)
 |-- Sexo: string (nullable = true)
 |-- FechaNacimiento: string (nullable = true)
 |-- EdadMeses: integer (nullable = true)
 |-- UbigeoPN: double (nullable = true)
 |-- DepartamentoPN: string (nullable = true)
 |-- ProvinciaPN: string (nullable = true)
 |-- DistritoPN: string (nullable = true)
 |-- CentroPobladoPN: string (nullable = true)
 |-- Juntos: integer (nullable = true)
 |-- SIS: integer (nullable = true)
 |-- Pin: integer (nullable = true)
 |-- Qaliwarma: integer (nullable = true)
 |-- Peso: string (nullable = true)
 |-- Talla: string (nullable = true)
 |-- Hemoglobina: double (nullable = true)
 |-- FechaHemoglobina: string (nullable = true)
 |-- Cred: integer (nullable = true)
 |-- Suplementacion: integer (nullable = true)
 |-- Consejeria

In [6]:
df_csv.show(n=5)

+--------+--------------------+--------+--------------------+---------+-------------+----+---------------+---------+--------+--------------+-----------+----------+----------------+------+---+---+---------+----+-----+-----------+----------------+----+--------------+----------+------+------------+---------+--------+-------+--------------+-----------+
|  Diresa|                 Red|Microred|                EESS|Renipress|FechaAtencion|Sexo|FechaNacimiento|EdadMeses|UbigeoPN|DepartamentoPN|ProvinciaPN|DistritoPN| CentroPobladoPN|Juntos|SIS|Pin|Qaliwarma|Peso|Talla|Hemoglobina|FechaHemoglobina|Cred|Suplementacion|Consejeria|Sesion|DEPARTAMENTO|PROVINCIA|DISTRITO|ALTITUD|           HBC|  Dx_Anemia|
+--------+--------------------+--------+--------------------+---------+-------------+----+---------------+---------+--------+--------------+-----------+----------+----------------+------+---+---+---------+----+-----+-----------+----------------+----+--------------+----------+------+------------+--

In [7]:
df_csv.select("Dx_Anemia").distinct().show()

+---------------+
|      Dx_Anemia|
+---------------+
|  Anemia Severa|
|Anemia Moderada|
|    Anemia Leve|
|         Normal|
+---------------+



In [8]:
df_csv.select("Suplementacion").distinct().show()

+--------------+
|Suplementacion|
+--------------+
|             1|
|             0|
+--------------+



In [9]:
df_csv.select("EESS").distinct().show()

+--------------------+
|                EESS|
+--------------------+
|I-2   - 00001263 ...|
|I-4   - 00001444 ...|
|I-2   - 00001244 ...|
|I-2   - 00001402 ...|
|I-3   - 00001440 ...|
|I-2   - 00001319 ...|
|I-2   - 00001436 ...|
|I-1   - 00001399 ...|
|I-1   - 00001413 ...|
|I-2   - 00001457 ...|
|I-3   - 00001314 ...|
|I-2   - 00001398 ...|
|I-3   - 00001385 ...|
|I-2   - 00001394 ...|
|I-3   - 00001298 ...|
|I-3   - 00001387 ...|
|I-3   - 00001323 ...|
|I-1   - 00001441 ...|
|I-2   - 00001391 ...|
|I-3   - 00001261 ...|
+--------------------+
only showing top 20 rows



In [10]:
del(df_csv)

# Creación del diccionario de datos propio

| Nombre del campo | Ejemplo de entrada       | Descrión del campo                                                                                                                                                                                                                                                            | Links para enteder mejor el campo                                                                                                 |
|------------------|--------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------|
| Diresa           | AMAZONAS                 | Codigo de la Dirección regional de salud (DIRESA)                                                                                                                                                                                                                             |                                                                                                                                   |
| Red              | CHACHAPOYAS              | Indica la organización de salud acerca de la redes de servicio de Salud                                                                                                                                                                                                       |                                                                                                                                   |
| Microred         | JUMBILLA                 | Indica la organización de salud acerca de la micro redes de salud                                                                                                                                                                                                             | https://www.geogpsperu.com/2017/09/establecimientos-de-salud-eess.html                                                            |
| EESS             | I-1   - 00004926 - RECTA | Codigo para la identificación de establecimiento de salud                                                                                                                                                                                                                     | https://www.gob.pe/10202-obtener-informacion-de-las-instituciones-prestadoras-de-servicios-de-salud-renipress                     |
| Renipress        | 4926                     | Codigo de Registro Nacional de Instituciones Prestadoras de Servicios de Salud (Renipress)                                                                                                                                                                                    |                                                                                                                                   |
| FechaAtencion    | 11/13/2019               | Fecha de atención y por ende analisis del niño acerca de la anemia                                                                                                                                                                                                            |                                                                                                                                   |
| Sexo             | M                        | Indica el sexo del niño con los valores de M y F donde M se refiere a masculino y F a femenino                                                                                                                                                                                |                                                                                                                                   |
| FechaNacimiento  | 11/13/2017               | Fecha de cuando nació el infante en el formato MM/DD/YYYY                                                                                                                                                                                                                     |                                                                                                                                   |
| EdadMeses        | 24                       | Edad del infante en cantidad de meses                                                                                                                                                                                                                                         |                                                                                                                                   |
| UbigeoPN         | 10308                    | Indica el codigo de ubicaión geografico (Ubigeo) del lugar donde se hizo el analisis al infante                                                                                                                                                                               |                                                                                                                                   |
| DepartamentoPN   | AMAZONAS                 | Indica el departamento                                                                                                                                                                                                                                                        |                                                                                                                                   |
| ProvinciaPN      | BONGARA                  | Indica la provincia                                                                                                                                                                                                                                                           |                                                                                                                                   |
| DistritoPN       | RECTA                    | Indica el distrito                                                                                                                                                                                                                                                            |                                                                                                                                   |
| CentroPobladoPN  | CHUELTA, ANEXO           | Indica el centro poblado que se encuentra en decir la subdivisión que se hacen en la provincia                                                                                                                                                                                |                                                                                                                                   |
| Juntos           | 1                        | Indica un estado de si vive con ambos padres                                                                                                                                                                                                                                  |                                                                                                                                   |
| SIS              | 1                        | Indica el estado si esta afiliado a un seguro integral de salud (SIS)                                                                                                                                                                                                         |                                                                                                                                   |
| Pin              | 0                        | Indeterminado                                                                                                                                                                                                                                                                 |                                                                                                                                   |
| Qaliwarma        | 0                        | Indica el estado acerca de que en su colegio publico esta afiliado al programa nacional de alimetación escolar Qali Warma                                                                                                                                                     |                                                                                                                                   |
| Peso             | 11.36                    | Indica el peso del infante                                                                                                                                                                                                                                                    |                                                                                                                                   |
| Talla            | 89.3                     | Indica la talla del infante                                                                                                                                                                                                                                                   |                                                                                                                                   |
| Hemoglobina      | 12.2                     | Indica la porción de hemoglbina en la sangre, en el formato gramos por decilitro                                                                                                                                                                                              |                                                                                                                                   |
| FechaHemoglobina | 11/13/2019               | Fecha de cuando al infante se le hizo el analisis de hemoglobina en la sangre en el formato MM/DD/YYYY                                                                                                                                                                        |                                                                                                                                   |
| Cred             | 1                        | Indica el estado del niño esta afiliado al control de crecimiento y desarrollo CRED que se refiere al control de salud periodico por el que todo los bebe y niños menores de edad deben pasarhttps://www.facebook.com/minsaperu/videos/2388417114509871/                      |                                                                                                                                   |
| Suplementacion   | 0                        | Indica el estado si el niño esta tomando suplementos                                                                                                                                                                                                                          |                                                                                                                                   |
| Consejeria       | 0                        | Indica el estado acerca de que el niño esta siendo tratado psicologicamente en alguna consejeria                                                                                                                                                                              |                                                                                                                                   |
| Sesion           | 0                        | Indeterminado                                                                                                                                                                                                                                                                 |                                                                                                                                   |
| DEPARTAMENTO     | AMAZONAS                 | Nombre del departamento en MAYUSCULAS                                                                                                                                                                                                                                         |                                                                                                                                   |
| PROVINCIA        | BONGARA                  | Nombre del provincia en MAYUSCULAS                                                                                                                                                                                                                                            |                                                                                                                                   |
| DISTRITO         | RECTA                    | Nombre del distrito en MAYUSCULAS                                                                                                                                                                                                                                             |                                                                                                                                   |
| ALTITUD          | 1950                     | Indica la altitud de la ubicación donde fue analisado el infante                                                                                                                                                                                                              | Importancia de la hemoglobina C en la anemia https://www.nicklauschildrens.org/condiciones/anemia-drepanocitica-con-hemoglobina-c |
| HBC              | 11.49491705              | Tipo de hemoglobina, donde la la hemoglobina S (HbS) es la hemoglobinopatía estructural más frecuente, seguida de la HbC, HbD, HbE y Hb Lepore. https://www.seqc.es/download/tema/13/4413/803351306/2167177/cms/tema-5-diagnositco-diferencial-de-las-hemoglobionopatias.pdf/ |                                                                                                                                   |
| Dx_Anemia        | Normal                   | Indica el nivel de hemoglobina desde normal a anomalo lo que en consuecuencia es un tipo de anemia                                                                                                                                                                            |                                                                                                                                   |

# Conclusiones del analisis inicial
- Algunos campos o datos no son influyentes para la determinación o predicción  de la anemia como los codigos del miniesterio de salud que  como lo son Red, microred y EESS.
- Otros datos son repetidos es decir una redundancia inncesaria como la fecha de atención y fecha de hemoglobina siendo mas util solo conservar un unico campo
- Otra redundancia de datos es del campo de provinvia, distrito, altitud y departamente en el cual lo ponene como un campo donde este lleva un nombre final con PN
- En base a los puntos anteriores seran eliminados algunos campo o en su respectivo caso juntarlos, se conservaran campos que considero relevantes para el desarrollo del proyecto



# Eliminación de campos y excluir CSV por regiones

In [13]:
import os
import re


def get_all_df_and_exclude_fields(folders,regions,excludesFields):
    # folders = ["HIS Niños 2019","HIS Niños 2020","HIS Niños 2021"]
    list_dir = os.listdir('DataSet/')
    list_DF = []
    for i,fd in enumerate(folders):
        filterCSV = []
        if fd in list_dir:
            CSVs = os.listdir('DataSet/'+fd+"/HB/")
            # print(CSVs)
            for r in regions:
                pattern = re.compile(r'.*'+r+'.*')
                filterCSV = list(filter(lambda c:re.match(pattern,c), CSVs) )
            
            for csv in filterCSV:
                # ------pyspark------
                # set options
                options = {
                    'header': True,
                    'sep': ',',
                    'format':'csv',
                }
                
                path = "DataSet/"+ fd +"/HB/"+ csv
                
                df_csv = spark.read.options(**options).csv(path)
                # Delete multiple fields in dataframe                 
                # https://stackoverflow.com/questions/47830915/how-to-drop-multiple-column-names-given-in-a-list-from-spark-dataframe
                df_csv = df_csv.drop(*excludesFields)
                list_DF.append(df_csv)
    return list_DF

In [36]:
excludeColums = ["Diresa","Red","Microred","EESS","Renipress","UbigeoPN","DepartamentoPN","ProvinciaPN",
                 "DistritoPN","CentroPobladoPN","Pin","FechaHemoglobina","FechaHemoglobina","ALTITUD","Sesion"] 
regions = ["Lima"]
# en los CSV las regiones estan en mayusculas
regions = list(map(lambda r:r.upper(),regions))
folders = ["HIS Niños 2019"]


list_df = get_all_df_and_exclude_fields(folders,regions,excludeColums)

In [15]:
list_df

[DataFrame[FechaAtencion: string, Sexo: string, FechaNacimiento: string, EdadMeses: string, Juntos: string, SIS: string, Qaliwarma: string, Peso: string, Talla: string, Hemoglobina: string, Cred: string, Suplementacion: string, Consejeria: string, DEPARTAMENTO: string, PROVINCIA: string, DISTRITO: string, HBC: string, Dx_Anemia: string],
 DataFrame[FechaAtencion: string, Sexo: string, FechaNacimiento: string, EdadMeses: string, Juntos: string, SIS: string, Qaliwarma: string, Peso: string, Talla: string, Hemoglobina: string, Cred: string, Suplementacion: string, Consejeria: string, DEPARTAMENTO: string, PROVINCIA: string, DISTRITO: string, HBC: string, Dx_Anemia: string],
 DataFrame[FechaAtencion: string, Sexo: string, FechaNacimiento: string, EdadMeses: string, Juntos: string, SIS: string, Qaliwarma: string, Peso: string, Talla: string, Hemoglobina: string, Cred: string, Suplementacion: string, Consejeria: string, DEPARTAMENTO: string, PROVINCIA: string, DISTRITO: string, HBC: string, 

In [16]:
len(list_df)

5

# Merge de los dataframes

In [17]:
# https://www.geeksforgeeks.org/merge-two-dataframes-in-pyspark/
import functools as fs  # For Python 3.x


def unionAll(dfs):
    return fs.reduce(lambda df1, df2: df1.union(df2.select(df1.columns)), dfs)

In [38]:
df_total = unionAll(list_df)
del(list_df)

In [39]:
print("Total de filas:")
df_total.count()

Total de filas:


156027

# Tratar con valores nulos y valores perdidos

In [20]:
# Missing Values and NULL
# https://www.datasciencemadesimple.com/count-of-missing-nanna-and-null-values-in-pyspark/#:~:text=Count%20of%20Missing%20values%20of%20single%20column%20in%20pyspark%20is,values%20of%20that%20particular%20columns.
from pyspark.sql.functions import col, count, isnan, when

df_count_null_missing = df_total.select([count(when(isnan(c) | col(c).isNull(), c)).alias(c) for c in df_total.columns]).show()
df_count_null_missing

+-------------+----+---------------+---------+------+---+---------+----+-----+-----------+----+--------------+----------+------------+---------+--------+---+---------+
|FechaAtencion|Sexo|FechaNacimiento|EdadMeses|Juntos|SIS|Qaliwarma|Peso|Talla|Hemoglobina|Cred|Suplementacion|Consejeria|DEPARTAMENTO|PROVINCIA|DISTRITO|HBC|Dx_Anemia|
+-------------+----+---------------+---------+------+---+---------+----+-----+-----------+----+--------------+----------+------------+---------+--------+---+---------+
|            0|   0|              0|        0|     0|  0|        0|   0|    0|          0|   0|             0|         0|           0|        0|       0|  0|        0|
+-------------+----+---------------+---------+------+---+---------+----+-----+-----------+----+--------------+----------+------------+---------+--------+---+---------+



*No existen valores nulos o valores perdidos en nuestro dataframe* 
<br>
En el caso que hubiera valores nulos o perdidos una forma de tratarlos es eliminarlos se puede guiar del [link](https://es.acervolima.com/pyspark-dataframe-eliminar-filas-con-valores-null-o-ninguno/)

In [21]:
del(df_count_null_missing)

# Guardar DataFrame juntado en la Capa Raw

In [46]:
path_out = 'DataSet/LayerRawCSV'

(df_total.write
  .format("csv") #"com.databricks.spark.csv"
  .option("header", "true")
  .option("delimiter",",")
  .option("mode","overwrite")
  .save(path_out))

# (df_total.write.csv(,header=True)
#  .mode("overwrite")
#  .option(**options)   
# )

In [49]:
del(df_total)

# Capa Master: Cambio de tipos de Datos del dataframe y nombre de la columna

In [104]:
options = {
    'header': True,
    'sep': ',',
    "inferSchema": True
}

path_raw = "DataSet/LayerRawCSV/"

df_raw = spark.read.options(**options).csv(path_raw)

## Cambiar el nombre del campo

In [105]:
# Cambiar nombre de las columnas
# https://sparkbyexamples.com/pyspark/pyspark-rename-dataframe-column/
old_names_columns = df_raw.columns
news_names_columns = list(map(lambda c:c.lower(),old_names_columns))

news_names_columns[0] = news_names_columns[0][:5] + news_names_columns[0][5].upper() + news_names_columns[0][6:]
news_names_columns[2] = news_names_columns[2][:5] + news_names_columns[2][5].upper() + news_names_columns[2][6:]
news_names_columns[3] = news_names_columns[3][:4] + news_names_columns[3][4].upper() + news_names_columns[3][5:]
news_names_columns[-1] = "diag_anemia"
news_names_columns

dict_news_names_colums = dict(zip(old_names_columns, news_names_columns))
dict_news_names_colums
for key,value in dict_news_names_colums.items():
    df_raw = df_raw.withColumnRenamed(key,value)
df_raw.printSchema()

root
 |-- fechaAtencion: string (nullable = true)
 |-- sexo: string (nullable = true)
 |-- fechaNacimiento: string (nullable = true)
 |-- edadMeses: integer (nullable = true)
 |-- juntos: integer (nullable = true)
 |-- sis: integer (nullable = true)
 |-- qaliwarma: integer (nullable = true)
 |-- peso: double (nullable = true)
 |-- talla: double (nullable = true)
 |-- hemoglobina: double (nullable = true)
 |-- cred: integer (nullable = true)
 |-- suplementacion: integer (nullable = true)
 |-- consejeria: integer (nullable = true)
 |-- departamento: string (nullable = true)
 |-- provincia: string (nullable = true)
 |-- distrito: string (nullable = true)
 |-- hbc: double (nullable = true)
 |-- diag_anemia: string (nullable = true)



## Cambiar tipo de Datos

| Nombre del campo | Tipo inferido | Nuevo tipo a cambiar  |
|------------------|---------------|-----------------------|
| fechaAtencion    | StringType    | DateType (MM/DD/YYYY) |
| sexo             | StringType    | CharType (replace string)             |
| fechaNacimiento  | StringType    | DateType (MM/DD/YYYY) |
| edadMeses        | StringType    | IntegerType           |
| juntos           | StringType    | BooleanType           |
| sis              | StringType    | BooleanType           |
| qaliwarma        | StringType    | BooleanType           |
| peso             | StringType    | FloatType             |
| talla            | StringType    | FloatType             |
| hemoglobina      | StringType    | FloatType             |
| cred             | StringType    | BooleanType           |
| suplementacion   | StringType    | BooleanType           |
| consejeria       | StringType    | BooleanType           |
| departamento     | StringType    | StringType            |
| provincia        | StringType    | StringType            |
| distrito         | StringType    | StringType            |
| hbc              | StringType    | DoubleType            |
| diag_anemia        | StringType    | StringType            |

In [106]:
columns_booleans = news_names_columns[4:7] + news_names_columns[10:13]
columnns_string  = [news_names_columns[1]]+news_names_columns[13:16] +  [news_names_columns[17]]
columnns_float = news_names_columns[7:10]
columns_double =[news_names_columns[16]]
columns_integer =[news_names_columns[3]]
columns_date =[news_names_columns[0]] + [news_names_columns[2]] 

In [107]:
columns_date

['fechaAtencion', 'fechaNacimiento']

In [108]:
df_raw.show(5)

+-------------+----+---------------+---------+------+---+---------+----+-----+-----------+----+--------------+----------+------------+----------+--------------------+--------------+-----------+
|fechaAtencion|sexo|fechaNacimiento|edadMeses|juntos|sis|qaliwarma|peso|talla|hemoglobina|cred|suplementacion|consejeria|departamento| provincia|            distrito|           hbc|diag_anemia|
+-------------+----+---------------+---------+------+---+---------+----+-----+-----------+----+--------------+----------+------------+----------+--------------------+--------------+-----------+
|     3/7/2019|   M|      3/22/2014|       60|     0|  1|        0|null| null|       12.9|   0|             0|         0|        LIMA|    HUARAL|             LAMPIAN|   11.72064105|     Normal|
|     6/1/2019|   F|      1/18/2015|       53|     0|  1|        0|14.1|110.0|       12.3|   1|             0|         0|        LIMA|    HUARAL|             LAMPIAN|   11.12064105|     Normal|
|     1/8/2019|   F|       7/8

In [118]:
df_temp = df_raw.select("*")
df_temp.show(30)

+-------------+----+---------------+---------+------+---+---------+----+-----+-----------+----+--------------+----------+------------+----------+--------------------+--------------+---------------+
|fechaAtencion|sexo|fechaNacimiento|edadMeses|juntos|sis|qaliwarma|peso|talla|hemoglobina|cred|suplementacion|consejeria|departamento| provincia|            distrito|           hbc|    diag_anemia|
+-------------+----+---------------+---------+------+---+---------+----+-----+-----------+----+--------------+----------+------------+----------+--------------------+--------------+---------------+
|     3/7/2019|   M|      3/22/2014|       60|     0|  1|        0|null| null|       12.9|   0|             0|         0|        LIMA|    HUARAL|             LAMPIAN|   11.72064105|         Normal|
|     6/1/2019|   F|      1/18/2015|       53|     0|  1|        0|14.1|110.0|       12.3|   1|             0|         0|        LIMA|    HUARAL|             LAMPIAN|   11.12064105|         Normal|
|     1/8/

In [116]:
df_temp = df_temp.select(col("fechaAtencion"),F.to_date(col("fechaAtencion"),"m/dd/YYYY"))

In [117]:
df_temp.show(5)

Py4JJavaError: An error occurred while calling o1308.showString.
: org.apache.spark.SparkUpgradeException: You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'm/dd/YYYY' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
	at org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper$$anonfun$checkLegacyFormatter$1.applyOrElse(DateTimeFormatterHelper.scala:196)
	at org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper$$anonfun$checkLegacyFormatter$1.applyOrElse(DateTimeFormatterHelper.scala:185)
	at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:38)
	at org.apache.spark.sql.catalyst.util.Iso8601TimestampFormatter.validatePatternString(TimestampFormatter.scala:109)
	at org.apache.spark.sql.catalyst.util.TimestampFormatter$.getFormatter(TimestampFormatter.scala:300)
	at org.apache.spark.sql.catalyst.util.TimestampFormatter$.apply(TimestampFormatter.scala:333)
	at org.apache.spark.sql.catalyst.expressions.TimestampFormatterHelper.getFormatter(datetimeExpressions.scala:72)
	at org.apache.spark.sql.catalyst.expressions.TimestampFormatterHelper.getFormatter$(datetimeExpressions.scala:67)
	at org.apache.spark.sql.catalyst.expressions.ToTimestamp.getFormatter(datetimeExpressions.scala:919)
	at org.apache.spark.sql.catalyst.expressions.TimestampFormatterHelper.$anonfun$formatterOption$1(datetimeExpressions.scala:64)
	at scala.Option.map(Option.scala:230)
	at org.apache.spark.sql.catalyst.expressions.TimestampFormatterHelper.formatterOption(datetimeExpressions.scala:64)
	at org.apache.spark.sql.catalyst.expressions.TimestampFormatterHelper.formatterOption$(datetimeExpressions.scala:62)
	at org.apache.spark.sql.catalyst.expressions.ToTimestamp.formatterOption$lzycompute(datetimeExpressions.scala:919)
	at org.apache.spark.sql.catalyst.expressions.ToTimestamp.formatterOption(datetimeExpressions.scala:919)
	at org.apache.spark.sql.catalyst.expressions.ToTimestamp.doGenCode(datetimeExpressions.scala:979)
	at org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$genCode$3(Expression.scala:146)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.sql.catalyst.expressions.Expression.genCode(Expression.scala:141)
	at org.apache.spark.sql.catalyst.expressions.CastBase.doGenCode(Cast.scala:853)
	at org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$genCode$3(Expression.scala:146)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.sql.catalyst.expressions.Expression.genCode(Expression.scala:141)
	at org.apache.spark.sql.catalyst.expressions.CastBase.genCode(Cast.scala:848)
	at org.apache.spark.sql.catalyst.expressions.CastBase.doGenCode(Cast.scala:853)
	at org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$genCode$3(Expression.scala:146)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.sql.catalyst.expressions.Expression.genCode(Expression.scala:141)
	at org.apache.spark.sql.catalyst.expressions.CastBase.genCode(Cast.scala:848)
	at org.apache.spark.sql.catalyst.expressions.Alias.genCode(namedExpressions.scala:163)
	at org.apache.spark.sql.execution.ProjectExec.$anonfun$doConsume$2(basicPhysicalOperators.scala:73)
	at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)
	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
	at scala.collection.TraversableLike.map(TraversableLike.scala:238)
	at scala.collection.TraversableLike.map$(TraversableLike.scala:231)
	at scala.collection.AbstractTraversable.map(Traversable.scala:108)
	at org.apache.spark.sql.execution.ProjectExec.$anonfun$doConsume$1(basicPhysicalOperators.scala:73)
	at org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext.withSubExprEliminationExprs(CodeGenerator.scala:1026)
	at org.apache.spark.sql.execution.ProjectExec.doConsume(basicPhysicalOperators.scala:73)
	at org.apache.spark.sql.execution.CodegenSupport.constructDoConsumeFunction(WholeStageCodegenExec.scala:221)
	at org.apache.spark.sql.execution.CodegenSupport.consume(WholeStageCodegenExec.scala:192)
	at org.apache.spark.sql.execution.CodegenSupport.consume$(WholeStageCodegenExec.scala:149)
	at org.apache.spark.sql.execution.InputAdapter.consume(WholeStageCodegenExec.scala:496)
	at org.apache.spark.sql.execution.InputRDDCodegen.doProduce(WholeStageCodegenExec.scala:483)
	at org.apache.spark.sql.execution.InputRDDCodegen.doProduce$(WholeStageCodegenExec.scala:456)
	at org.apache.spark.sql.execution.InputAdapter.doProduce(WholeStageCodegenExec.scala:496)
	at org.apache.spark.sql.execution.CodegenSupport.$anonfun$produce$1(WholeStageCodegenExec.scala:95)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215)
	at org.apache.spark.sql.execution.CodegenSupport.produce(WholeStageCodegenExec.scala:90)
	at org.apache.spark.sql.execution.CodegenSupport.produce$(WholeStageCodegenExec.scala:90)
	at org.apache.spark.sql.execution.InputAdapter.produce(WholeStageCodegenExec.scala:496)
	at org.apache.spark.sql.execution.ProjectExec.doProduce(basicPhysicalOperators.scala:54)
	at org.apache.spark.sql.execution.CodegenSupport.$anonfun$produce$1(WholeStageCodegenExec.scala:95)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215)
	at org.apache.spark.sql.execution.CodegenSupport.produce(WholeStageCodegenExec.scala:90)
	at org.apache.spark.sql.execution.CodegenSupport.produce$(WholeStageCodegenExec.scala:90)
	at org.apache.spark.sql.execution.ProjectExec.produce(basicPhysicalOperators.scala:41)
	at org.apache.spark.sql.execution.WholeStageCodegenExec.doCodeGen(WholeStageCodegenExec.scala:655)
	at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:718)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215)
	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176)
	at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:321)
	at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:439)
	at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:425)
	at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:47)
	at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3709)
	at org.apache.spark.sql.Dataset.$anonfun$head$1(Dataset.scala:2735)
	at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3700)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3698)
	at org.apache.spark.sql.Dataset.head(Dataset.scala:2735)
	at org.apache.spark.sql.Dataset.take(Dataset.scala:2942)
	at org.apache.spark.sql.Dataset.getRows(Dataset.scala:302)
	at org.apache.spark.sql.Dataset.showString(Dataset.scala:339)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:750)
Caused by: java.lang.IllegalArgumentException: All week-based patterns are unsupported since Spark 3.0, detected: Y, Please use the SQL function EXTRACT instead
	at org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper$.$anonfun$convertIncompatiblePattern$4(DateTimeFormatterHelper.scala:323)
	at org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper$.$anonfun$convertIncompatiblePattern$4$adapted(DateTimeFormatterHelper.scala:321)
	at scala.collection.TraversableLike$WithFilter.$anonfun$foreach$1(TraversableLike.scala:877)
	at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
	at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
	at scala.collection.immutable.StringOps.foreach(StringOps.scala:33)
	at scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:876)
	at org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper$.$anonfun$convertIncompatiblePattern$2(DateTimeFormatterHelper.scala:321)
	at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)
	at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
	at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
	at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
	at scala.collection.TraversableLike.map(TraversableLike.scala:238)
	at scala.collection.TraversableLike.map$(TraversableLike.scala:231)
	at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:198)
	at org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper$.convertIncompatiblePattern(DateTimeFormatterHelper.scala:318)
	at org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper.getOrCreateFormatter(DateTimeFormatterHelper.scala:121)
	at org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper.getOrCreateFormatter$(DateTimeFormatterHelper.scala:117)
	at org.apache.spark.sql.catalyst.util.Iso8601TimestampFormatter.getOrCreateFormatter(TimestampFormatter.scala:59)
	at org.apache.spark.sql.catalyst.util.Iso8601TimestampFormatter.formatter$lzycompute(TimestampFormatter.scala:68)
	at org.apache.spark.sql.catalyst.util.Iso8601TimestampFormatter.formatter(TimestampFormatter.scala:67)
	at org.apache.spark.sql.catalyst.util.Iso8601TimestampFormatter.validatePatternString(TimestampFormatter.scala:108)
	... 94 more


In [100]:
from pyspark.sql.functions import col
from pyspark.sql.types import StringType,BooleanType,DateType,FloatType,DoubleType,IntegerType

for c in columns_booleans:
    df_raw = df_raw.withColumn(c,col(c).cast(BooleanType()))

for c in columnns_string:
    df_raw = df_raw.withColumn(c,col(c).cast(StringType()))
    
for c in columnns_float:
    df_raw = df_raw.withColumn(c,col(c).cast(FloatType()))

for c in columns_double:
    df_raw = df_raw.withColumn(c,col(c).cast(DoubleType()))
    
for c in columns_integer:
    df_raw = df_raw.withColumn(c,col(c).cast(IntegerType()))

for c in columns_date:
    df_raw = df_raw.select(col(c),F.to_date(col(c),"mm/dd/YYYY"))

AnalysisException: cannot resolve '`fechaNacimiento`' given input columns: [fechaAtencion, to_date(fechaAtencion, mm/dd/YYYY)];
'Project ['fechaNacimiento, to_date('fechaNacimiento, Some(mm/dd/YYYY)) AS to_date(fechaNacimiento, mm/dd/YYYY)#5627]
+- Project [fechaAtencion#3362, to_date('fechaAtencion, Some(mm/dd/YYYY)) AS to_date(fechaAtencion, mm/dd/YYYY)#5624]
   +- Project [fechaAtencion#3362, sexo#5434, fechaNacimiento#3400, cast(edadMeses#5300 as int) AS edadMeses#5605, juntos#5320, sis#5339, qaliwarma#5358, peso#5529, talla#5548, hemoglobina#5567, cred#5377, suplementacion#5396, consejeria#5415, departamento#5453, provincia#5472, distrito#5491, hbc#5586, diag_anemia#5510]
      +- Project [fechaAtencion#3362, sexo#5434, fechaNacimiento#3400, edadMeses#5300, juntos#5320, sis#5339, qaliwarma#5358, peso#5529, talla#5548, hemoglobina#5567, cred#5377, suplementacion#5396, consejeria#5415, departamento#5453, provincia#5472, distrito#5491, cast(hbc#5281 as double) AS hbc#5586, diag_anemia#5510]
         +- Project [fechaAtencion#3362, sexo#5434, fechaNacimiento#3400, edadMeses#5300, juntos#5320, sis#5339, qaliwarma#5358, peso#5529, talla#5548, cast(hemoglobina#5262 as float) AS hemoglobina#5567, cred#5377, suplementacion#5396, consejeria#5415, departamento#5453, provincia#5472, distrito#5491, hbc#5281, diag_anemia#5510]
            +- Project [fechaAtencion#3362, sexo#5434, fechaNacimiento#3400, edadMeses#5300, juntos#5320, sis#5339, qaliwarma#5358, peso#5529, cast(talla#5243 as float) AS talla#5548, hemoglobina#5262, cred#5377, suplementacion#5396, consejeria#5415, departamento#5453, provincia#5472, distrito#5491, hbc#5281, diag_anemia#5510]
               +- Project [fechaAtencion#3362, sexo#5434, fechaNacimiento#3400, edadMeses#5300, juntos#5320, sis#5339, qaliwarma#5358, cast(peso#5224 as float) AS peso#5529, talla#5243, hemoglobina#5262, cred#5377, suplementacion#5396, consejeria#5415, departamento#5453, provincia#5472, distrito#5491, hbc#5281, diag_anemia#5510]
                  +- Project [fechaAtencion#3362, sexo#5434, fechaNacimiento#3400, edadMeses#5300, juntos#5320, sis#5339, qaliwarma#5358, peso#5224, talla#5243, hemoglobina#5262, cred#5377, suplementacion#5396, consejeria#5415, departamento#5453, provincia#5472, distrito#5491, hbc#5281, cast(diag_anemia#5205 as string) AS diag_anemia#5510]
                     +- Project [fechaAtencion#3362, sexo#5434, fechaNacimiento#3400, edadMeses#5300, juntos#5320, sis#5339, qaliwarma#5358, peso#5224, talla#5243, hemoglobina#5262, cred#5377, suplementacion#5396, consejeria#5415, departamento#5453, provincia#5472, cast(distrito#5186 as string) AS distrito#5491, hbc#5281, diag_anemia#5205]
                        +- Project [fechaAtencion#3362, sexo#5434, fechaNacimiento#3400, edadMeses#5300, juntos#5320, sis#5339, qaliwarma#5358, peso#5224, talla#5243, hemoglobina#5262, cred#5377, suplementacion#5396, consejeria#5415, departamento#5453, cast(provincia#5167 as string) AS provincia#5472, distrito#5186, hbc#5281, diag_anemia#5205]
                           +- Project [fechaAtencion#3362, sexo#5434, fechaNacimiento#3400, edadMeses#5300, juntos#5320, sis#5339, qaliwarma#5358, peso#5224, talla#5243, hemoglobina#5262, cred#5377, suplementacion#5396, consejeria#5415, cast(departamento#5148 as string) AS departamento#5453, provincia#5167, distrito#5186, hbc#5281, diag_anemia#5205]
                              +- Project [fechaAtencion#3362, cast(sexo#5129 as string) AS sexo#5434, fechaNacimiento#3400, edadMeses#5300, juntos#5320, sis#5339, qaliwarma#5358, peso#5224, talla#5243, hemoglobina#5262, cred#5377, suplementacion#5396, consejeria#5415, departamento#5148, provincia#5167, distrito#5186, hbc#5281, diag_anemia#5205]
                                 +- Project [fechaAtencion#3362, sexo#5129, fechaNacimiento#3400, edadMeses#5300, juntos#5320, sis#5339, qaliwarma#5358, peso#5224, talla#5243, hemoglobina#5262, cred#5377, suplementacion#5396, cast(consejeria#5110 as boolean) AS consejeria#5415, departamento#5148, provincia#5167, distrito#5186, hbc#5281, diag_anemia#5205]
                                    +- Project [fechaAtencion#3362, sexo#5129, fechaNacimiento#3400, edadMeses#5300, juntos#5320, sis#5339, qaliwarma#5358, peso#5224, talla#5243, hemoglobina#5262, cred#5377, cast(suplementacion#5091 as boolean) AS suplementacion#5396, consejeria#5110, departamento#5148, provincia#5167, distrito#5186, hbc#5281, diag_anemia#5205]
                                       +- Project [fechaAtencion#3362, sexo#5129, fechaNacimiento#3400, edadMeses#5300, juntos#5320, sis#5339, qaliwarma#5358, peso#5224, talla#5243, hemoglobina#5262, cast(cred#5072 as boolean) AS cred#5377, suplementacion#5091, consejeria#5110, departamento#5148, provincia#5167, distrito#5186, hbc#5281, diag_anemia#5205]
                                          +- Project [fechaAtencion#3362, sexo#5129, fechaNacimiento#3400, edadMeses#5300, juntos#5320, sis#5339, cast(qaliwarma#5053 as boolean) AS qaliwarma#5358, peso#5224, talla#5243, hemoglobina#5262, cred#5072, suplementacion#5091, consejeria#5110, departamento#5148, provincia#5167, distrito#5186, hbc#5281, diag_anemia#5205]
                                             +- Project [fechaAtencion#3362, sexo#5129, fechaNacimiento#3400, edadMeses#5300, juntos#5320, cast(sis#5034 as boolean) AS sis#5339, qaliwarma#5053, peso#5224, talla#5243, hemoglobina#5262, cred#5072, suplementacion#5091, consejeria#5110, departamento#5148, provincia#5167, distrito#5186, hbc#5281, diag_anemia#5205]
                                                +- Project [fechaAtencion#3362, sexo#5129, fechaNacimiento#3400, edadMeses#5300, cast(juntos#5015 as boolean) AS juntos#5320, sis#5034, qaliwarma#5053, peso#5224, talla#5243, hemoglobina#5262, cred#5072, suplementacion#5091, consejeria#5110, departamento#5148, provincia#5167, distrito#5186, hbc#5281, diag_anemia#5205]
                                                   +- Project [fechaAtencion#3362, sexo#5129, fechaNacimiento#3400, cast(edadMeses#4996 as int) AS edadMeses#5300, juntos#5015, sis#5034, qaliwarma#5053, peso#5224, talla#5243, hemoglobina#5262, cred#5072, suplementacion#5091, consejeria#5110, departamento#5148, provincia#5167, distrito#5186, hbc#5281, diag_anemia#5205]
                                                      +- Project [fechaAtencion#3362, sexo#5129, fechaNacimiento#3400, edadMeses#4996, juntos#5015, sis#5034, qaliwarma#5053, peso#5224, talla#5243, hemoglobina#5262, cred#5072, suplementacion#5091, consejeria#5110, departamento#5148, provincia#5167, distrito#5186, cast(hbc#4977 as double) AS hbc#5281, diag_anemia#5205]
                                                         +- Project [fechaAtencion#3362, sexo#5129, fechaNacimiento#3400, edadMeses#4996, juntos#5015, sis#5034, qaliwarma#5053, peso#5224, talla#5243, cast(hemoglobina#4958 as float) AS hemoglobina#5262, cred#5072, suplementacion#5091, consejeria#5110, departamento#5148, provincia#5167, distrito#5186, hbc#4977, diag_anemia#5205]
                                                            +- Project [fechaAtencion#3362, sexo#5129, fechaNacimiento#3400, edadMeses#4996, juntos#5015, sis#5034, qaliwarma#5053, peso#5224, cast(talla#4939 as float) AS talla#5243, hemoglobina#4958, cred#5072, suplementacion#5091, consejeria#5110, departamento#5148, provincia#5167, distrito#5186, hbc#4977, diag_anemia#5205]
                                                               +- Project [fechaAtencion#3362, sexo#5129, fechaNacimiento#3400, edadMeses#4996, juntos#5015, sis#5034, qaliwarma#5053, cast(peso#4920 as float) AS peso#5224, talla#4939, hemoglobina#4958, cred#5072, suplementacion#5091, consejeria#5110, departamento#5148, provincia#5167, distrito#5186, hbc#4977, diag_anemia#5205]
                                                                  +- Project [fechaAtencion#3362, sexo#5129, fechaNacimiento#3400, edadMeses#4996, juntos#5015, sis#5034, qaliwarma#5053, peso#4920, talla#4939, hemoglobina#4958, cred#5072, suplementacion#5091, consejeria#5110, departamento#5148, provincia#5167, distrito#5186, hbc#4977, cast(diag_anemia#4901 as string) AS diag_anemia#5205]
                                                                     +- Project [fechaAtencion#3362, sexo#5129, fechaNacimiento#3400, edadMeses#4996, juntos#5015, sis#5034, qaliwarma#5053, peso#4920, talla#4939, hemoglobina#4958, cred#5072, suplementacion#5091, consejeria#5110, departamento#5148, provincia#5167, cast(distrito#4882 as string) AS distrito#5186, hbc#4977, diag_anemia#4901]
                                                                        +- Project [fechaAtencion#3362, sexo#5129, fechaNacimiento#3400, edadMeses#4996, juntos#5015, sis#5034, qaliwarma#5053, peso#4920, talla#4939, hemoglobina#4958, cred#5072, suplementacion#5091, consejeria#5110, departamento#5148, cast(provincia#4863 as string) AS provincia#5167, distrito#4882, hbc#4977, diag_anemia#4901]
                                                                           +- Project [fechaAtencion#3362, sexo#5129, fechaNacimiento#3400, edadMeses#4996, juntos#5015, sis#5034, qaliwarma#5053, peso#4920, talla#4939, hemoglobina#4958, cred#5072, suplementacion#5091, consejeria#5110, cast(departamento#4844 as string) AS departamento#5148, provincia#4863, distrito#4882, hbc#4977, diag_anemia#4901]
                                                                              +- Project [fechaAtencion#3362, cast(sexo#4825 as string) AS sexo#5129, fechaNacimiento#3400, edadMeses#4996, juntos#5015, sis#5034, qaliwarma#5053, peso#4920, talla#4939, hemoglobina#4958, cred#5072, suplementacion#5091, consejeria#5110, departamento#4844, provincia#4863, distrito#4882, hbc#4977, diag_anemia#4901]
                                                                                 +- Project [fechaAtencion#3362, sexo#4825, fechaNacimiento#3400, edadMeses#4996, juntos#5015, sis#5034, qaliwarma#5053, peso#4920, talla#4939, hemoglobina#4958, cred#5072, suplementacion#5091, cast(consejeria#4806 as boolean) AS consejeria#5110, departamento#4844, provincia#4863, distrito#4882, hbc#4977, diag_anemia#4901]
                                                                                    +- Project [fechaAtencion#3362, sexo#4825, fechaNacimiento#3400, edadMeses#4996, juntos#5015, sis#5034, qaliwarma#5053, peso#4920, talla#4939, hemoglobina#4958, cred#5072, cast(suplementacion#4787 as boolean) AS suplementacion#5091, consejeria#4806, departamento#4844, provincia#4863, distrito#4882, hbc#4977, diag_anemia#4901]
                                                                                       +- Project [fechaAtencion#3362, sexo#4825, fechaNacimiento#3400, edadMeses#4996, juntos#5015, sis#5034, qaliwarma#5053, peso#4920, talla#4939, hemoglobina#4958, cast(cred#4768 as boolean) AS cred#5072, suplementacion#4787, consejeria#4806, departamento#4844, provincia#4863, distrito#4882, hbc#4977, diag_anemia#4901]
                                                                                          +- Project [fechaAtencion#3362, sexo#4825, fechaNacimiento#3400, edadMeses#4996, juntos#5015, sis#5034, cast(qaliwarma#4749 as boolean) AS qaliwarma#5053, peso#4920, talla#4939, hemoglobina#4958, cred#4768, suplementacion#4787, consejeria#4806, departamento#4844, provincia#4863, distrito#4882, hbc#4977, diag_anemia#4901]
                                                                                             +- Project [fechaAtencion#3362, sexo#4825, fechaNacimiento#3400, edadMeses#4996, juntos#5015, cast(sis#4730 as boolean) AS sis#5034, qaliwarma#4749, peso#4920, talla#4939, hemoglobina#4958, cred#4768, suplementacion#4787, consejeria#4806, departamento#4844, provincia#4863, distrito#4882, hbc#4977, diag_anemia#4901]
                                                                                                +- Project [fechaAtencion#3362, sexo#4825, fechaNacimiento#3400, edadMeses#4996, cast(juntos#4711 as boolean) AS juntos#5015, sis#4730, qaliwarma#4749, peso#4920, talla#4939, hemoglobina#4958, cred#4768, suplementacion#4787, consejeria#4806, departamento#4844, provincia#4863, distrito#4882, hbc#4977, diag_anemia#4901]
                                                                                                   +- Project [fechaAtencion#3362, sexo#4825, fechaNacimiento#3400, cast(edadMeses#3419 as int) AS edadMeses#4996, juntos#4711, sis#4730, qaliwarma#4749, peso#4920, talla#4939, hemoglobina#4958, cred#4768, suplementacion#4787, consejeria#4806, departamento#4844, provincia#4863, distrito#4882, hbc#4977, diag_anemia#4901]
                                                                                                      +- Project [fechaAtencion#3362, sexo#4825, fechaNacimiento#3400, edadMeses#3419, juntos#4711, sis#4730, qaliwarma#4749, peso#4920, talla#4939, hemoglobina#4958, cred#4768, suplementacion#4787, consejeria#4806, departamento#4844, provincia#4863, distrito#4882, cast(hbc#4692 as double) AS hbc#4977, diag_anemia#4901]
                                                                                                         +- Project [fechaAtencion#3362, sexo#4825, fechaNacimiento#3400, edadMeses#3419, juntos#4711, sis#4730, qaliwarma#4749, peso#4920, talla#4939, cast(hemoglobina#4673 as float) AS hemoglobina#4958, cred#4768, suplementacion#4787, consejeria#4806, departamento#4844, provincia#4863, distrito#4882, hbc#4692, diag_anemia#4901]
                                                                                                            +- Project [fechaAtencion#3362, sexo#4825, fechaNacimiento#3400, edadMeses#3419, juntos#4711, sis#4730, qaliwarma#4749, peso#4920, cast(talla#4654 as float) AS talla#4939, hemoglobina#4673, cred#4768, suplementacion#4787, consejeria#4806, departamento#4844, provincia#4863, distrito#4882, hbc#4692, diag_anemia#4901]
                                                                                                               +- Project [fechaAtencion#3362, sexo#4825, fechaNacimiento#3400, edadMeses#3419, juntos#4711, sis#4730, qaliwarma#4749, cast(peso#4635 as float) AS peso#4920, talla#4654, hemoglobina#4673, cred#4768, suplementacion#4787, consejeria#4806, departamento#4844, provincia#4863, distrito#4882, hbc#4692, diag_anemia#4901]
                                                                                                                  +- Project [fechaAtencion#3362, sexo#4825, fechaNacimiento#3400, edadMeses#3419, juntos#4711, sis#4730, qaliwarma#4749, peso#4635, talla#4654, hemoglobina#4673, cred#4768, suplementacion#4787, consejeria#4806, departamento#4844, provincia#4863, distrito#4882, hbc#4692, cast(diag_anemia#4616 as string) AS diag_anemia#4901]
                                                                                                                     +- Project [fechaAtencion#3362, sexo#4825, fechaNacimiento#3400, edadMeses#3419, juntos#4711, sis#4730, qaliwarma#4749, peso#4635, talla#4654, hemoglobina#4673, cred#4768, suplementacion#4787, consejeria#4806, departamento#4844, provincia#4863, cast(distrito#4597 as string) AS distrito#4882, hbc#4692, diag_anemia#4616]
                                                                                                                        +- Project [fechaAtencion#3362, sexo#4825, fechaNacimiento#3400, edadMeses#3419, juntos#4711, sis#4730, qaliwarma#4749, peso#4635, talla#4654, hemoglobina#4673, cred#4768, suplementacion#4787, consejeria#4806, departamento#4844, cast(provincia#4578 as string) AS provincia#4863, distrito#4597, hbc#4692, diag_anemia#4616]
                                                                                                                           +- Project [fechaAtencion#3362, sexo#4825, fechaNacimiento#3400, edadMeses#3419, juntos#4711, sis#4730, qaliwarma#4749, peso#4635, talla#4654, hemoglobina#4673, cred#4768, suplementacion#4787, consejeria#4806, cast(departamento#4559 as string) AS departamento#4844, provincia#4578, distrito#4597, hbc#4692, diag_anemia#4616]
                                                                                                                              +- Project [fechaAtencion#3362, cast(sexo#4540 as string) AS sexo#4825, fechaNacimiento#3400, edadMeses#3419, juntos#4711, sis#4730, qaliwarma#4749, peso#4635, talla#4654, hemoglobina#4673, cred#4768, suplementacion#4787, consejeria#4806, departamento#4559, provincia#4578, distrito#4597, hbc#4692, diag_anemia#4616]
                                                                                                                                 +- Project [fechaAtencion#3362, sexo#4540, fechaNacimiento#3400, edadMeses#3419, juntos#4711, sis#4730, qaliwarma#4749, peso#4635, talla#4654, hemoglobina#4673, cred#4768, suplementacion#4787, cast(consejeria#4521 as boolean) AS consejeria#4806, departamento#4559, provincia#4578, distrito#4597, hbc#4692, diag_anemia#4616]
                                                                                                                                    +- Project [fechaAtencion#3362, sexo#4540, fechaNacimiento#3400, edadMeses#3419, juntos#4711, sis#4730, qaliwarma#4749, peso#4635, talla#4654, hemoglobina#4673, cred#4768, cast(suplementacion#4502 as boolean) AS suplementacion#4787, consejeria#4521, departamento#4559, provincia#4578, distrito#4597, hbc#4692, diag_anemia#4616]
                                                                                                                                       +- Project [fechaAtencion#3362, sexo#4540, fechaNacimiento#3400, edadMeses#3419, juntos#4711, sis#4730, qaliwarma#4749, peso#4635, talla#4654, hemoglobina#4673, cast(cred#4483 as boolean) AS cred#4768, suplementacion#4502, consejeria#4521, departamento#4559, provincia#4578, distrito#4597, hbc#4692, diag_anemia#4616]
                                                                                                                                          +- Project [fechaAtencion#3362, sexo#4540, fechaNacimiento#3400, edadMeses#3419, juntos#4711, sis#4730, cast(qaliwarma#4464 as boolean) AS qaliwarma#4749, peso#4635, talla#4654, hemoglobina#4673, cred#4483, suplementacion#4502, consejeria#4521, departamento#4559, provincia#4578, distrito#4597, hbc#4692, diag_anemia#4616]
                                                                                                                                             +- Project [fechaAtencion#3362, sexo#4540, fechaNacimiento#3400, edadMeses#3419, juntos#4711, cast(sis#4445 as boolean) AS sis#4730, qaliwarma#4464, peso#4635, talla#4654, hemoglobina#4673, cred#4483, suplementacion#4502, consejeria#4521, departamento#4559, provincia#4578, distrito#4597, hbc#4692, diag_anemia#4616]
                                                                                                                                                +- Project [fechaAtencion#3362, sexo#4540, fechaNacimiento#3400, edadMeses#3419, cast(juntos#4426 as boolean) AS juntos#4711, sis#4445, qaliwarma#4464, peso#4635, talla#4654, hemoglobina#4673, cred#4483, suplementacion#4502, consejeria#4521, departamento#4559, provincia#4578, distrito#4597, hbc#4692, diag_anemia#4616]
                                                                                                                                                   +- Project [fechaAtencion#3362, sexo#4540, fechaNacimiento#3400, edadMeses#3419, juntos#4426, sis#4445, qaliwarma#4464, peso#4635, talla#4654, hemoglobina#4673, cred#4483, suplementacion#4502, consejeria#4521, departamento#4559, provincia#4578, distrito#4597, cast(hbc#3666 as double) AS hbc#4692, diag_anemia#4616]
                                                                                                                                                      +- Project [fechaAtencion#3362, sexo#4540, fechaNacimiento#3400, edadMeses#3419, juntos#4426, sis#4445, qaliwarma#4464, peso#4635, talla#4654, cast(hemoglobina#4407 as float) AS hemoglobina#4673, cred#4483, suplementacion#4502, consejeria#4521, departamento#4559, provincia#4578, distrito#4597, hbc#3666, diag_anemia#4616]
                                                                                                                                                         +- Project [fechaAtencion#3362, sexo#4540, fechaNacimiento#3400, edadMeses#3419, juntos#4426, sis#4445, qaliwarma#4464, peso#4635, cast(talla#4388 as float) AS talla#4654, hemoglobina#4407, cred#4483, suplementacion#4502, consejeria#4521, departamento#4559, provincia#4578, distrito#4597, hbc#3666, diag_anemia#4616]
                                                                                                                                                            +- Project [fechaAtencion#3362, sexo#4540, fechaNacimiento#3400, edadMeses#3419, juntos#4426, sis#4445, qaliwarma#4464, cast(peso#4369 as float) AS peso#4635, talla#4388, hemoglobina#4407, cred#4483, suplementacion#4502, consejeria#4521, departamento#4559, provincia#4578, distrito#4597, hbc#3666, diag_anemia#4616]
                                                                                                                                                               +- Project [fechaAtencion#3362, sexo#4540, fechaNacimiento#3400, edadMeses#3419, juntos#4426, sis#4445, qaliwarma#4464, peso#4369, talla#4388, hemoglobina#4407, cred#4483, suplementacion#4502, consejeria#4521, departamento#4559, provincia#4578, distrito#4597, hbc#3666, cast(diag_anemia#4350 as string) AS diag_anemia#4616]
                                                                                                                                                                  +- Project [fechaAtencion#3362, sexo#4540, fechaNacimiento#3400, edadMeses#3419, juntos#4426, sis#4445, qaliwarma#4464, peso#4369, talla#4388, hemoglobina#4407, cred#4483, suplementacion#4502, consejeria#4521, departamento#4559, provincia#4578, cast(distrito#4331 as string) AS distrito#4597, hbc#3666, diag_anemia#4350]
                                                                                                                                                                     +- Project [fechaAtencion#3362, sexo#4540, fechaNacimiento#3400, edadMeses#3419, juntos#4426, sis#4445, qaliwarma#4464, peso#4369, talla#4388, hemoglobina#4407, cred#4483, suplementacion#4502, consejeria#4521, departamento#4559, cast(provincia#4312 as string) AS provincia#4578, distrito#4331, hbc#3666, diag_anemia#4350]
                                                                                                                                                                        +- Project [fechaAtencion#3362, sexo#4540, fechaNacimiento#3400, edadMeses#3419, juntos#4426, sis#4445, qaliwarma#4464, peso#4369, talla#4388, hemoglobina#4407, cred#4483, suplementacion#4502, consejeria#4521, cast(departamento#4293 as string) AS departamento#4559, provincia#4312, distrito#4331, hbc#3666, diag_anemia#4350]
                                                                                                                                                                           +- Project [fechaAtencion#3362, cast(sexo#4274 as string) AS sexo#4540, fechaNacimiento#3400, edadMeses#3419, juntos#4426, sis#4445, qaliwarma#4464, peso#4369, talla#4388, hemoglobina#4407, cred#4483, suplementacion#4502, consejeria#4521, departamento#4293, provincia#4312, distrito#4331, hbc#3666, diag_anemia#4350]
                                                                                                                                                                              +- Project [fechaAtencion#3362, sexo#4274, fechaNacimiento#3400, edadMeses#3419, juntos#4426, sis#4445, qaliwarma#4464, peso#4369, talla#4388, hemoglobina#4407, cred#4483, suplementacion#4502, cast(consejeria#4255 as boolean) AS consejeria#4521, departamento#4293, provincia#4312, distrito#4331, hbc#3666, diag_anemia#4350]
                                                                                                                                                                                 +- Project [fechaAtencion#3362, sexo#4274, fechaNacimiento#3400, edadMeses#3419, juntos#4426, sis#4445, qaliwarma#4464, peso#4369, talla#4388, hemoglobina#4407, cred#4483, cast(suplementacion#4236 as boolean) AS suplementacion#4502, consejeria#4255, departamento#4293, provincia#4312, distrito#4331, hbc#3666, diag_anemia#4350]
                                                                                                                                                                                    +- Project [fechaAtencion#3362, sexo#4274, fechaNacimiento#3400, edadMeses#3419, juntos#4426, sis#4445, qaliwarma#4464, peso#4369, talla#4388, hemoglobina#4407, cast(cred#4217 as boolean) AS cred#4483, suplementacion#4236, consejeria#4255, departamento#4293, provincia#4312, distrito#4331, hbc#3666, diag_anemia#4350]
                                                                                                                                                                                       +- Project [fechaAtencion#3362, sexo#4274, fechaNacimiento#3400, edadMeses#3419, juntos#4426, sis#4445, cast(qaliwarma#4198 as boolean) AS qaliwarma#4464, peso#4369, talla#4388, hemoglobina#4407, cred#4217, suplementacion#4236, consejeria#4255, departamento#4293, provincia#4312, distrito#4331, hbc#3666, diag_anemia#4350]
                                                                                                                                                                                          +- Project [fechaAtencion#3362, sexo#4274, fechaNacimiento#3400, edadMeses#3419, juntos#4426, cast(sis#4179 as boolean) AS sis#4445, qaliwarma#4198, peso#4369, talla#4388, hemoglobina#4407, cred#4217, suplementacion#4236, consejeria#4255, departamento#4293, provincia#4312, distrito#4331, hbc#3666, diag_anemia#4350]
                                                                                                                                                                                             +- Project [fechaAtencion#3362, sexo#4274, fechaNacimiento#3400, edadMeses#3419, cast(juntos#4160 as boolean) AS juntos#4426, sis#4179, qaliwarma#4198, peso#4369, talla#4388, hemoglobina#4407, cred#4217, suplementacion#4236, consejeria#4255, departamento#4293, provincia#4312, distrito#4331, hbc#3666, diag_anemia#4350]
                                                                                                                                                                                                +- Project [fechaAtencion#3362, sexo#4274, fechaNacimiento#3400, edadMeses#3419, juntos#4160, sis#4179, qaliwarma#4198, peso#4369, talla#4388, cast(hemoglobina#3533 as float) AS hemoglobina#4407, cred#4217, suplementacion#4236, consejeria#4255, departamento#4293, provincia#4312, distrito#4331, hbc#3666, diag_anemia#4350]
                                                                                                                                                                                                   +- Project [fechaAtencion#3362, sexo#4274, fechaNacimiento#3400, edadMeses#3419, juntos#4160, sis#4179, qaliwarma#4198, peso#4369, cast(talla#3514 as float) AS talla#4388, hemoglobina#3533, cred#4217, suplementacion#4236, consejeria#4255, departamento#4293, provincia#4312, distrito#4331, hbc#3666, diag_anemia#4350]
                                                                                                                                                                                                      +- Project [fechaAtencion#3362, sexo#4274, fechaNacimiento#3400, edadMeses#3419, juntos#4160, sis#4179, qaliwarma#4198, cast(peso#3495 as float) AS peso#4369, talla#3514, hemoglobina#3533, cred#4217, suplementacion#4236, consejeria#4255, departamento#4293, provincia#4312, distrito#4331, hbc#3666, diag_anemia#4350]
                                                                                                                                                                                                         +- Project [fechaAtencion#3362, sexo#4274, fechaNacimiento#3400, edadMeses#3419, juntos#4160, sis#4179, qaliwarma#4198, peso#3495, talla#3514, hemoglobina#3533, cred#4217, suplementacion#4236, consejeria#4255, departamento#4293, provincia#4312, distrito#4331, hbc#3666, cast(diag_anemia#4141 as string) AS diag_anemia#4350]
                                                                                                                                                                                                            +- Project [fechaAtencion#3362, sexo#4274, fechaNacimiento#3400, edadMeses#3419, juntos#4160, sis#4179, qaliwarma#4198, peso#3495, talla#3514, hemoglobina#3533, cred#4217, suplementacion#4236, consejeria#4255, departamento#4293, provincia#4312, cast(distrito#4122 as string) AS distrito#4331, hbc#3666, diag_anemia#4141]
                                                                                                                                                                                                               +- Project [fechaAtencion#3362, sexo#4274, fechaNacimiento#3400, edadMeses#3419, juntos#4160, sis#4179, qaliwarma#4198, peso#3495, talla#3514, hemoglobina#3533, cred#4217, suplementacion#4236, consejeria#4255, departamento#4293, cast(provincia#4103 as string) AS provincia#4312, distrito#4122, hbc#3666, diag_anemia#4141]
                                                                                                                                                                                                                  +- Project [fechaAtencion#3362, sexo#4274, fechaNacimiento#3400, edadMeses#3419, juntos#4160, sis#4179, qaliwarma#4198, peso#3495, talla#3514, hemoglobina#3533, cred#4217, suplementacion#4236, consejeria#4255, cast(departamento#4084 as string) AS departamento#4293, provincia#4103, distrito#4122, hbc#3666, diag_anemia#4141]
                                                                                                                                                                                                                     +- Project [fechaAtencion#3362, cast(sexo#3381 as string) AS sexo#4274, fechaNacimiento#3400, edadMeses#3419, juntos#4160, sis#4179, qaliwarma#4198, peso#3495, talla#3514, hemoglobina#3533, cred#4217, suplementacion#4236, consejeria#4255, departamento#4084, provincia#4103, distrito#4122, hbc#3666, diag_anemia#4141]
                                                                                                                                                                                                                        +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#4160, sis#4179, qaliwarma#4198, peso#3495, talla#3514, hemoglobina#3533, cred#4217, suplementacion#4236, cast(consejeria#4065 as boolean) AS consejeria#4255, departamento#4084, provincia#4103, distrito#4122, hbc#3666, diag_anemia#4141]
                                                                                                                                                                                                                           +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#4160, sis#4179, qaliwarma#4198, peso#3495, talla#3514, hemoglobina#3533, cred#4217, cast(suplementacion#4046 as boolean) AS suplementacion#4236, consejeria#4065, departamento#4084, provincia#4103, distrito#4122, hbc#3666, diag_anemia#4141]
                                                                                                                                                                                                                              +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#4160, sis#4179, qaliwarma#4198, peso#3495, talla#3514, hemoglobina#3533, cast(cred#4027 as boolean) AS cred#4217, suplementacion#4046, consejeria#4065, departamento#4084, provincia#4103, distrito#4122, hbc#3666, diag_anemia#4141]
                                                                                                                                                                                                                                 +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#4160, sis#4179, cast(qaliwarma#4008 as boolean) AS qaliwarma#4198, peso#3495, talla#3514, hemoglobina#3533, cred#4027, suplementacion#4046, consejeria#4065, departamento#4084, provincia#4103, distrito#4122, hbc#3666, diag_anemia#4141]
                                                                                                                                                                                                                                    +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#4160, cast(sis#3989 as boolean) AS sis#4179, qaliwarma#4008, peso#3495, talla#3514, hemoglobina#3533, cred#4027, suplementacion#4046, consejeria#4065, departamento#4084, provincia#4103, distrito#4122, hbc#3666, diag_anemia#4141]
                                                                                                                                                                                                                                       +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, cast(juntos#3970 as boolean) AS juntos#4160, sis#3989, qaliwarma#4008, peso#3495, talla#3514, hemoglobina#3533, cred#4027, suplementacion#4046, consejeria#4065, departamento#4084, provincia#4103, distrito#4122, hbc#3666, diag_anemia#4141]
                                                                                                                                                                                                                                          +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3970, sis#3989, qaliwarma#4008, peso#3495, talla#3514, hemoglobina#3533, cred#4027, suplementacion#4046, consejeria#4065, departamento#4084, provincia#4103, distrito#4122, hbc#3666, cast(diag_anemia#3685 as string) AS diag_anemia#4141]
                                                                                                                                                                                                                                             +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3970, sis#3989, qaliwarma#4008, peso#3495, talla#3514, hemoglobina#3533, cred#4027, suplementacion#4046, consejeria#4065, departamento#4084, provincia#4103, cast(distrito#3647 as string) AS distrito#4122, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3970, sis#3989, qaliwarma#4008, peso#3495, talla#3514, hemoglobina#3533, cred#4027, suplementacion#4046, consejeria#4065, departamento#4084, cast(provincia#3628 as string) AS provincia#4103, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                   +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3970, sis#3989, qaliwarma#4008, peso#3495, talla#3514, hemoglobina#3533, cred#4027, suplementacion#4046, consejeria#4065, cast(departamento#3609 as string) AS departamento#4084, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                      +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3970, sis#3989, qaliwarma#4008, peso#3495, talla#3514, hemoglobina#3533, cred#4027, suplementacion#4046, cast(consejeria#3951 as boolean) AS consejeria#4065, departamento#3609, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                         +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3970, sis#3989, qaliwarma#4008, peso#3495, talla#3514, hemoglobina#3533, cred#4027, cast(suplementacion#3932 as boolean) AS suplementacion#4046, consejeria#3951, departamento#3609, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                            +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3970, sis#3989, qaliwarma#4008, peso#3495, talla#3514, hemoglobina#3533, cast(cred#3913 as boolean) AS cred#4027, suplementacion#3932, consejeria#3951, departamento#3609, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                               +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3970, sis#3989, cast(qaliwarma#3894 as boolean) AS qaliwarma#4008, peso#3495, talla#3514, hemoglobina#3533, cred#3913, suplementacion#3932, consejeria#3951, departamento#3609, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                                  +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3970, cast(sis#3875 as boolean) AS sis#3989, qaliwarma#3894, peso#3495, talla#3514, hemoglobina#3533, cred#3913, suplementacion#3932, consejeria#3951, departamento#3609, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                                     +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, cast(juntos#3856 as boolean) AS juntos#3970, sis#3875, qaliwarma#3894, peso#3495, talla#3514, hemoglobina#3533, cred#3913, suplementacion#3932, consejeria#3951, departamento#3609, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                                        +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3856, sis#3875, qaliwarma#3894, peso#3495, talla#3514, hemoglobina#3533, cred#3913, suplementacion#3932, cast(consejeria#3837 as boolean) AS consejeria#3951, departamento#3609, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                                           +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3856, sis#3875, qaliwarma#3894, peso#3495, talla#3514, hemoglobina#3533, cred#3913, cast(suplementacion#3818 as boolean) AS suplementacion#3932, consejeria#3837, departamento#3609, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                                              +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3856, sis#3875, qaliwarma#3894, peso#3495, talla#3514, hemoglobina#3533, cast(cred#3799 as boolean) AS cred#3913, suplementacion#3818, consejeria#3837, departamento#3609, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                                                 +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3856, sis#3875, cast(qaliwarma#3780 as boolean) AS qaliwarma#3894, peso#3495, talla#3514, hemoglobina#3533, cred#3799, suplementacion#3818, consejeria#3837, departamento#3609, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                                                    +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3856, cast(sis#3761 as boolean) AS sis#3875, qaliwarma#3780, peso#3495, talla#3514, hemoglobina#3533, cred#3799, suplementacion#3818, consejeria#3837, departamento#3609, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                                                       +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, cast(juntos#3742 as boolean) AS juntos#3856, sis#3761, qaliwarma#3780, peso#3495, talla#3514, hemoglobina#3533, cred#3799, suplementacion#3818, consejeria#3837, departamento#3609, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                                                          +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3742, sis#3761, qaliwarma#3780, peso#3495, talla#3514, hemoglobina#3533, cred#3799, suplementacion#3818, consejeria#3590 AS consejeria#3837, departamento#3609, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                                                             +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3742, sis#3761, qaliwarma#3780, peso#3495, talla#3514, hemoglobina#3533, cred#3799, suplementacion#3571 AS suplementacion#3818, consejeria#3590, departamento#3609, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                                                                +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3742, sis#3761, qaliwarma#3780, peso#3495, talla#3514, hemoglobina#3533, cred#3552 AS cred#3799, suplementacion#3571, consejeria#3590, departamento#3609, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                                                                   +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3742, sis#3761, qaliwarma#3476 AS qaliwarma#3780, peso#3495, talla#3514, hemoglobina#3533, cred#3552, suplementacion#3571, consejeria#3590, departamento#3609, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                                                                      +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3742, sis#3457 AS sis#3761, qaliwarma#3476, peso#3495, talla#3514, hemoglobina#3533, cred#3552, suplementacion#3571, consejeria#3590, departamento#3609, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                                                                         +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3438 AS juntos#3742, sis#3457, qaliwarma#3476, peso#3495, talla#3514, hemoglobina#3533, cred#3552, suplementacion#3571, consejeria#3590, departamento#3609, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3685]
                                                                                                                                                                                                                                                                                                            +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3438, sis#3457, qaliwarma#3476, peso#3495, talla#3514, hemoglobina#3533, cred#3552, suplementacion#3571, consejeria#3590, departamento#3609, provincia#3628, distrito#3647, hbc#3666, diag_anemia#3343 AS diag_anemia#3685]
                                                                                                                                                                                                                                                                                                               +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3438, sis#3457, qaliwarma#3476, peso#3495, talla#3514, hemoglobina#3533, cred#3552, suplementacion#3571, consejeria#3590, departamento#3609, provincia#3628, distrito#3647, hbc#3324 AS hbc#3666, diag_anemia#3343]
                                                                                                                                                                                                                                                                                                                  +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3438, sis#3457, qaliwarma#3476, peso#3495, talla#3514, hemoglobina#3533, cred#3552, suplementacion#3571, consejeria#3590, departamento#3609, provincia#3628, distrito#3305 AS distrito#3647, hbc#3324, diag_anemia#3343]
                                                                                                                                                                                                                                                                                                                     +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3438, sis#3457, qaliwarma#3476, peso#3495, talla#3514, hemoglobina#3533, cred#3552, suplementacion#3571, consejeria#3590, departamento#3609, provincia#3286 AS provincia#3628, distrito#3305, hbc#3324, diag_anemia#3343]
                                                                                                                                                                                                                                                                                                                        +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3438, sis#3457, qaliwarma#3476, peso#3495, talla#3514, hemoglobina#3533, cred#3552, suplementacion#3571, consejeria#3590, departamento#3267 AS departamento#3609, provincia#3286, distrito#3305, hbc#3324, diag_anemia#3343]
                                                                                                                                                                                                                                                                                                                           +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3438, sis#3457, qaliwarma#3476, peso#3495, talla#3514, hemoglobina#3533, cred#3552, suplementacion#3571, consejeria#3248 AS consejeria#3590, departamento#3267, provincia#3286, distrito#3305, hbc#3324, diag_anemia#3343]
                                                                                                                                                                                                                                                                                                                              +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3438, sis#3457, qaliwarma#3476, peso#3495, talla#3514, hemoglobina#3533, cred#3552, suplementacion#3229 AS suplementacion#3571, consejeria#3248, departamento#3267, provincia#3286, distrito#3305, hbc#3324, diag_anemia#3343]
                                                                                                                                                                                                                                                                                                                                 +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3438, sis#3457, qaliwarma#3476, peso#3495, talla#3514, hemoglobina#3533, cred#3210 AS cred#3552, suplementacion#3229, consejeria#3248, departamento#3267, provincia#3286, distrito#3305, hbc#3324, diag_anemia#3343]
                                                                                                                                                                                                                                                                                                                                    +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3438, sis#3457, qaliwarma#3476, peso#3495, talla#3514, hemoglobina#3191 AS hemoglobina#3533, cred#3210, suplementacion#3229, consejeria#3248, departamento#3267, provincia#3286, distrito#3305, hbc#3324, diag_anemia#3343]
                                                                                                                                                                                                                                                                                                                                       +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3438, sis#3457, qaliwarma#3476, peso#3495, talla#3172 AS talla#3514, hemoglobina#3191, cred#3210, suplementacion#3229, consejeria#3248, departamento#3267, provincia#3286, distrito#3305, hbc#3324, diag_anemia#3343]
                                                                                                                                                                                                                                                                                                                                          +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3438, sis#3457, qaliwarma#3476, peso#3153 AS peso#3495, talla#3172, hemoglobina#3191, cred#3210, suplementacion#3229, consejeria#3248, departamento#3267, provincia#3286, distrito#3305, hbc#3324, diag_anemia#3343]
                                                                                                                                                                                                                                                                                                                                             +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3438, sis#3457, qaliwarma#3134 AS qaliwarma#3476, peso#3153, talla#3172, hemoglobina#3191, cred#3210, suplementacion#3229, consejeria#3248, departamento#3267, provincia#3286, distrito#3305, hbc#3324, diag_anemia#3343]
                                                                                                                                                                                                                                                                                                                                                +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3438, sis#3115 AS sis#3457, qaliwarma#3134, peso#3153, talla#3172, hemoglobina#3191, cred#3210, suplementacion#3229, consejeria#3248, departamento#3267, provincia#3286, distrito#3305, hbc#3324, diag_anemia#3343]
                                                                                                                                                                                                                                                                                                                                                   +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3419, juntos#3096 AS juntos#3438, sis#3115, qaliwarma#3134, peso#3153, talla#3172, hemoglobina#3191, cred#3210, suplementacion#3229, consejeria#3248, departamento#3267, provincia#3286, distrito#3305, hbc#3324, diag_anemia#3343]
                                                                                                                                                                                                                                                                                                                                                      +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3400, edadMeses#3077 AS edadMeses#3419, juntos#3096, sis#3115, qaliwarma#3134, peso#3153, talla#3172, hemoglobina#3191, cred#3210, suplementacion#3229, consejeria#3248, departamento#3267, provincia#3286, distrito#3305, hbc#3324, diag_anemia#3343]
                                                                                                                                                                                                                                                                                                                                                         +- Project [fechaAtencion#3362, sexo#3381, fechaNacimiento#3058 AS fechaNacimiento#3400, edadMeses#3077, juntos#3096, sis#3115, qaliwarma#3134, peso#3153, talla#3172, hemoglobina#3191, cred#3210, suplementacion#3229, consejeria#3248, departamento#3267, provincia#3286, distrito#3305, hbc#3324, diag_anemia#3343]
                                                                                                                                                                                                                                                                                                                                                            +- Project [fechaAtencion#3362, sexo#3039 AS sexo#3381, fechaNacimiento#3058, edadMeses#3077, juntos#3096, sis#3115, qaliwarma#3134, peso#3153, talla#3172, hemoglobina#3191, cred#3210, suplementacion#3229, consejeria#3248, departamento#3267, provincia#3286, distrito#3305, hbc#3324, diag_anemia#3343]
                                                                                                                                                                                                                                                                                                                                                               +- Project [fechaAtencion#3020 AS fechaAtencion#3362, sexo#3039, fechaNacimiento#3058, edadMeses#3077, juntos#3096, sis#3115, qaliwarma#3134, peso#3153, talla#3172, hemoglobina#3191, cred#3210, suplementacion#3229, consejeria#3248, departamento#3267, provincia#3286, distrito#3305, hbc#3324, diag_anemia#3343]
                                                                                                                                                                                                                                                                                                                                                                  +- Project [fechaAtencion#3020, sexo#3039, fechaNacimiento#3058, edadMeses#3077, juntos#3096, sis#3115, qaliwarma#3134, peso#3153, talla#3172, hemoglobina#3191, cred#3210, suplementacion#3229, consejeria#3248, departamento#3267, provincia#3286, distrito#3305, hbc#3324, Dx_Anemia#2568 AS diag_anemia#3343]
                                                                                                                                                                                                                                                                                                                                                                     +- Project [fechaAtencion#3020, sexo#3039, fechaNacimiento#3058, edadMeses#3077, juntos#3096, sis#3115, qaliwarma#3134, peso#3153, talla#3172, hemoglobina#3191, cred#3210, suplementacion#3229, consejeria#3248, departamento#3267, provincia#3286, distrito#3305, HBC#2567 AS hbc#3324, Dx_Anemia#2568]
                                                                                                                                                                                                                                                                                                                                                                        +- Project [fechaAtencion#3020, sexo#3039, fechaNacimiento#3058, edadMeses#3077, juntos#3096, sis#3115, qaliwarma#3134, peso#3153, talla#3172, hemoglobina#3191, cred#3210, suplementacion#3229, consejeria#3248, departamento#3267, provincia#3286, DISTRITO#2566 AS distrito#3305, HBC#2567, Dx_Anemia#2568]
                                                                                                                                                                                                                                                                                                                                                                           +- Project [fechaAtencion#3020, sexo#3039, fechaNacimiento#3058, edadMeses#3077, juntos#3096, sis#3115, qaliwarma#3134, peso#3153, talla#3172, hemoglobina#3191, cred#3210, suplementacion#3229, consejeria#3248, departamento#3267, PROVINCIA#2565 AS provincia#3286, DISTRITO#2566, HBC#2567, Dx_Anemia#2568]
                                                                                                                                                                                                                                                                                                                                                                              +- Project [fechaAtencion#3020, sexo#3039, fechaNacimiento#3058, edadMeses#3077, juntos#3096, sis#3115, qaliwarma#3134, peso#3153, talla#3172, hemoglobina#3191, cred#3210, suplementacion#3229, consejeria#3248, DEPARTAMENTO#2564 AS departamento#3267, PROVINCIA#2565, DISTRITO#2566, HBC#2567, Dx_Anemia#2568]
                                                                                                                                                                                                                                                                                                                                                                                 +- Project [fechaAtencion#3020, sexo#3039, fechaNacimiento#3058, edadMeses#3077, juntos#3096, sis#3115, qaliwarma#3134, peso#3153, talla#3172, hemoglobina#3191, cred#3210, suplementacion#3229, Consejeria#2563 AS consejeria#3248, DEPARTAMENTO#2564, PROVINCIA#2565, DISTRITO#2566, HBC#2567, Dx_Anemia#2568]
                                                                                                                                                                                                                                                                                                                                                                                    +- Project [fechaAtencion#3020, sexo#3039, fechaNacimiento#3058, edadMeses#3077, juntos#3096, sis#3115, qaliwarma#3134, peso#3153, talla#3172, hemoglobina#3191, cred#3210, Suplementacion#2562 AS suplementacion#3229, Consejeria#2563, DEPARTAMENTO#2564, PROVINCIA#2565, DISTRITO#2566, HBC#2567, Dx_Anemia#2568]
                                                                                                                                                                                                                                                                                                                                                                                       +- Project [fechaAtencion#3020, sexo#3039, fechaNacimiento#3058, edadMeses#3077, juntos#3096, sis#3115, qaliwarma#3134, peso#3153, talla#3172, hemoglobina#3191, Cred#2561 AS cred#3210, Suplementacion#2562, Consejeria#2563, DEPARTAMENTO#2564, PROVINCIA#2565, DISTRITO#2566, HBC#2567, Dx_Anemia#2568]
                                                                                                                                                                                                                                                                                                                                                                                          +- Project [fechaAtencion#3020, sexo#3039, fechaNacimiento#3058, edadMeses#3077, juntos#3096, sis#3115, qaliwarma#3134, peso#3153, talla#3172, Hemoglobina#2560 AS hemoglobina#3191, Cred#2561, Suplementacion#2562, Consejeria#2563, DEPARTAMENTO#2564, PROVINCIA#2565, DISTRITO#2566, HBC#2567, Dx_Anemia#2568]
                                                                                                                                                                                                                                                                                                                                                                                             +- Project [fechaAtencion#3020, sexo#3039, fechaNacimiento#3058, edadMeses#3077, juntos#3096, sis#3115, qaliwarma#3134, peso#3153, Talla#2559 AS talla#3172, Hemoglobina#2560, Cred#2561, Suplementacion#2562, Consejeria#2563, DEPARTAMENTO#2564, PROVINCIA#2565, DISTRITO#2566, HBC#2567, Dx_Anemia#2568]
                                                                                                                                                                                                                                                                                                                                                                                                +- Project [fechaAtencion#3020, sexo#3039, fechaNacimiento#3058, edadMeses#3077, juntos#3096, sis#3115, qaliwarma#3134, Peso#2558 AS peso#3153, Talla#2559, Hemoglobina#2560, Cred#2561, Suplementacion#2562, Consejeria#2563, DEPARTAMENTO#2564, PROVINCIA#2565, DISTRITO#2566, HBC#2567, Dx_Anemia#2568]
                                                                                                                                                                                                                                                                                                                                                                                                   +- Project [fechaAtencion#3020, sexo#3039, fechaNacimiento#3058, edadMeses#3077, juntos#3096, sis#3115, Qaliwarma#2557 AS qaliwarma#3134, Peso#2558, Talla#2559, Hemoglobina#2560, Cred#2561, Suplementacion#2562, Consejeria#2563, DEPARTAMENTO#2564, PROVINCIA#2565, DISTRITO#2566, HBC#2567, Dx_Anemia#2568]
                                                                                                                                                                                                                                                                                                                                                                                                      +- Project [fechaAtencion#3020, sexo#3039, fechaNacimiento#3058, edadMeses#3077, juntos#3096, SIS#2556 AS sis#3115, Qaliwarma#2557, Peso#2558, Talla#2559, Hemoglobina#2560, Cred#2561, Suplementacion#2562, Consejeria#2563, DEPARTAMENTO#2564, PROVINCIA#2565, DISTRITO#2566, HBC#2567, Dx_Anemia#2568]
                                                                                                                                                                                                                                                                                                                                                                                                         +- Project [fechaAtencion#3020, sexo#3039, fechaNacimiento#3058, edadMeses#3077, Juntos#2555 AS juntos#3096, SIS#2556, Qaliwarma#2557, Peso#2558, Talla#2559, Hemoglobina#2560, Cred#2561, Suplementacion#2562, Consejeria#2563, DEPARTAMENTO#2564, PROVINCIA#2565, DISTRITO#2566, HBC#2567, Dx_Anemia#2568]
                                                                                                                                                                                                                                                                                                                                                                                                            +- Project [fechaAtencion#3020, sexo#3039, fechaNacimiento#3058, EdadMeses#2554 AS edadMeses#3077, Juntos#2555, SIS#2556, Qaliwarma#2557, Peso#2558, Talla#2559, Hemoglobina#2560, Cred#2561, Suplementacion#2562, Consejeria#2563, DEPARTAMENTO#2564, PROVINCIA#2565, DISTRITO#2566, HBC#2567, Dx_Anemia#2568]
                                                                                                                                                                                                                                                                                                                                                                                                               +- Project [fechaAtencion#3020, sexo#3039, FechaNacimiento#2553 AS fechaNacimiento#3058, EdadMeses#2554, Juntos#2555, SIS#2556, Qaliwarma#2557, Peso#2558, Talla#2559, Hemoglobina#2560, Cred#2561, Suplementacion#2562, Consejeria#2563, DEPARTAMENTO#2564, PROVINCIA#2565, DISTRITO#2566, HBC#2567, Dx_Anemia#2568]
                                                                                                                                                                                                                                                                                                                                                                                                                  +- Project [fechaAtencion#3020, Sexo#2552 AS sexo#3039, FechaNacimiento#2553, EdadMeses#2554, Juntos#2555, SIS#2556, Qaliwarma#2557, Peso#2558, Talla#2559, Hemoglobina#2560, Cred#2561, Suplementacion#2562, Consejeria#2563, DEPARTAMENTO#2564, PROVINCIA#2565, DISTRITO#2566, HBC#2567, Dx_Anemia#2568]
                                                                                                                                                                                                                                                                                                                                                                                                                     +- Project [FechaAtencion#2551 AS fechaAtencion#3020, Sexo#2552, FechaNacimiento#2553, EdadMeses#2554, Juntos#2555, SIS#2556, Qaliwarma#2557, Peso#2558, Talla#2559, Hemoglobina#2560, Cred#2561, Suplementacion#2562, Consejeria#2563, DEPARTAMENTO#2564, PROVINCIA#2565, DISTRITO#2566, HBC#2567, Dx_Anemia#2568]
                                                                                                                                                                                                                                                                                                                                                                                                                        +- Relation[FechaAtencion#2551,Sexo#2552,FechaNacimiento#2553,EdadMeses#2554,Juntos#2555,SIS#2556,Qaliwarma#2557,Peso#2558,Talla#2559,Hemoglobina#2560,Cred#2561,Suplementacion#2562,Consejeria#2563,DEPARTAMENTO#2564,PROVINCIA#2565,DISTRITO#2566,HBC#2567,Dx_Anemia#2568] csv


In [97]:
df_raw.printSchema()

root
 |-- fechaAtencion: string (nullable = true)
 |-- sexo: string (nullable = true)
 |-- fechaNacimiento: string (nullable = true)
 |-- edadMeses: integer (nullable = true)
 |-- juntos: boolean (nullable = true)
 |-- sis: boolean (nullable = true)
 |-- qaliwarma: boolean (nullable = true)
 |-- peso: float (nullable = true)
 |-- talla: float (nullable = true)
 |-- hemoglobina: float (nullable = true)
 |-- cred: boolean (nullable = true)
 |-- suplementacion: boolean (nullable = true)
 |-- consejeria: boolean (nullable = true)
 |-- departamento: string (nullable = true)
 |-- provincia: string (nullable = true)
 |-- distrito: string (nullable = true)
 |-- hbc: double (nullable = true)
 |-- diag_anemia: string (nullable = true)



# Soluction

In [1]:
# Encontrar Spark
import findspark

findspark.init()
print(findspark.find())

import pyspark
from pyspark.sql import SparkSession
from pyspark.sql import functions as F

spark = SparkSession.builder.appName('spark_Pipeline').getOrCreate()

D:\Libraries\spark-3.0.2-bin-hadoop2.7-hive1.2\python\pyspark


In [6]:
from typing import Callable
from pyspark.sql import Column
from pyspark.sql.functions import udf, col
from pyspark.sql.types import StringType, IntegerType, ArrayType, DataType
class py_or_udf:
    def __init__(self, returnType : DataType=StringType()):
        self.spark_udf_type = returnType
        
    def __call__(self, func : Callable):
        def wrapped_func(*args, **kwargs):
            if any([isinstance(arg, Column) for arg in args]) or \
                any([isinstance(vv, Column) for vv in kwargs.values()]):
                return udf(func, self.spark_udf_type)(*args, **kwargs)
            else:
                return func(*args, **kwargs)
            
        return wrapped_func

In [7]:
colum = ["date"]
data =     [("1/5/1991",), ("11/24/1991",), ("11/3/1991",),("1/05/1991",)]
df = spark.createDataFrame(data,colum) 
df.show()

+----------+
|      date|
+----------+
|  1/5/1991|
|11/24/1991|
| 11/3/1991|
| 1/05/1991|
+----------+



In [11]:
@py_or_udf(returnType=StringType())
def decoratorUDF(date: str) -> str:
    m,d,y =date.split("/")
    
    if len(m) <2:
        m="0"+m
    if len(d) < 2:
        d="0"+d
    
    return d+"/"+m+"/"+y
#     print(m,d,y)

# This works
# assert decoratorUDF("annagray@kaufman.com") == "a"

decoratorUDF("1/05/1991")

'05/01/1991'

In [None]:
# This also works
salesDF.select(decoratorUDF(col("email"))).show(3)

In [None]:
# No olvidar la presentación del proyecto sea a travez de un meet que sera parte de la permanente
# parcial sera un escritura de un paper sobre el proyecto