In [1]:
from snowflake.snowpark.session import Session
from snowflake.snowpark.functions import udf
from snowflake.snowpark.types import StringType
from snowflake.snowpark.types import IntegerType
from config import connection_parameters


In [2]:
#Definir sesión y objetos Snowflake
sesion = Session.builder.configs(connection_parameters).create()
if sesion != None:
    print("Conectado")
    sesion.use_database('inegi')
    print(sesion.sql("select current_warehouse(), current_database(), current_role()").collect()) 
else:
    print("Error de conexión")

Conectado
[Row(CURRENT_WAREHOUSE()='INEGI_WH', CURRENT_DATABASE()='INEGI', CURRENT_ROLE()='INEGI_ROLE')]


In [3]:
#Crear vista 
query = "create or replace  view INEGI_DATA as select " + \
 "v:ENTIDAD::int as entidad," + \
 "v:MUN::int as municipio," + \
 "v:NOM_MUN::string as nom_municipio," + \
 "v:LOC::string as localidad," + \
 "v:NOM_LOC::string as nom_localidad," + \
 "v:LONGITUD::float as longitud," + \
 "v:LATITUD::float as latitud," + \
 "v:ALTITUD::int as altitud," + \
 "v:POBTOT::int as pob_total," + \
 "v:POBFEM::int as pob_fem," + \
 "v:POBMAS::int as pob_masc," + \
 "v:PCON_DISC::int as pob_discapacidad," + \
 "v:GRAPROES::int as pob_escolaridad," + \
 "v:GRAPROES_F::int as pob_esco_fem," + \
 "v:GRAPROES_M::int as pob_esco_masc," + \
 "v:PSINDER::int as pob_sssalud," + \
 "v:PDER_SS::int as pob_cssalud," + \
 "v:VIVTOT::int as total_vivienda," + \
 "v:TVIVHAB::int total_habitada," + \
 "v:VPH_INTER::int as hab_internet " + \
 "from INEGI_RAW;"
sesion.sql(query).collect()



[Row(status='View INEGI_DATA successfully created.')]

In [None]:
#UDF declaración
entidad_udf = sesion.udf.register_from_file(file_path='@inegi/entidad.py',func_name='nom_entidad',return_type=StringType(),input_types=[IntegerType()],is_permanent=True, name="nom_entidad",stage_location="@inegi")

In [8]:
#Vista con totales por entidad aplicando UDF para convertir número de entidad x nombre entidad
viewquery = "create or replace view INEGI_MAPA as " + \
"with poblacion_lat as ( select latitud as latitude,longitud as longitude, nom_entidad(entidad) as nom_entidad," + \
"ROW_NUMBER() OVER(PARTITION BY nom_entidad ORDER BY nom_entidad DESC) AS row_number from INEGI_DATA)," + \
"poblacion_t as (select sum(pob_total) as poblacion_total,nom_entidad(entidad) as nom_entidad from " + \
"INEGI_DATA group by entidad order by poblacion_total desc)" + \
" select pl.nom_entidad,pt.poblacion_total as total_population, pl.latitude, pl.longitude" + \
" from poblacion_lat pl left join poblacion_t pt on pl.nom_entidad = pt.nom_entidad" + \
" where row_number = 1;"
sesion.sql(viewquery).collect()

[Row(status='View INEGI_MAPA successfully created.')]

In [6]:
#Validar la vista solo con totales por entidad
df_entidad = sesion.table("INEGI_MAPA")
df_entidad.show()

---------------------------------------------------------------------------------------
|"NOM_ENTIDAD"         |"POBLACION_TOTAL"  |"LATITUDE"          |"LONGITUDE"          |
---------------------------------------------------------------------------------------
|SINALOA               |3026943            |24.771613055555555  |-107.11566722222221  |
|MEXICO                |16992418           |19.489337777777777  |-99.34708166666667   |
|TLAXCALA              |1342977            |19.404180277777776  |-98.04882305555556   |
|COAHUILA DE ZARAGOZA  |3146771            |25.794352777777778  |-103.08951666666667  |
|OAXACA                |4132148            |17.465217777777777  |-95.23456138888889   |
|PUEBLA                |6583278            |20.03848888888889   |-97.27450888888889   |
|TAMAULIPAS            |3527735            |23.68985222222222   |-99.11164777777778   |
|BAJA CALIFORNIA       |3769020            |30.096982777777775  |-115.6571538888889   |
|TABASCO               |2402598 

In [7]:
sesion.close()