# Revisión de datos de los archivos csv de Open Contracting Partnership. Data Set del 2021.

Usando el buscador de OpenContracting se buscaron datos sobre Chile (buscador: https://data.open-contracting.org/es/search/). Se decidió descargarlos archivos de los años 2018 y 2021, para tener una muestra de datos que contenga información sobre licitaciones en un rango de tiempo cercano a estas fechas.

In [1]:
import pandas as pd
import numpy as np
import datetime
from os import path

## Revision de main.csv

In [2]:
camino = path.join("Chile","2021","main.csv")
df_main = pd.read_csv(camino, sep=",",low_memory=False)

In [3]:
df_respaldo = df_main

In [4]:
df_main.head()

Unnamed: 0,_link,id,tag,date,ocid,language,initiationType,tender_id,tender_procurementMethodDetails,tender_title,...,buyer_name,planning_budget_id,planning_budget_description,planning_budget_amount_amount,planning_budget_amount_currency,tender_techniques_hasFrameworkAgreement,tender_techniques_frameworkAgreement_method,tender_contractPeriod_endDate,tender_contractPeriod_startDate,tender_contractPeriod_durationInDays
0,id-0.0,ocds-70d2nz-1057501-322-LQ21-2021-12-22T15:00:11Z,compiled,2021-12-22T15:00:11Z,ocds-70d2nz-1057501-322-LQ21,es,tender,1057501-322-LQ21,Licitación Pública entre a 2000 y 5000 UTM (LQ),CONVENIO INSUMOS PABELLON CENTRAL II,...,,,,,,,,,,
1,id-0.1,ocds-70d2nz-1039043-27-LE21-2021-12-29T17:32:40Z,compiled,2021-12-29T17:32:40Z,ocds-70d2nz-1039043-27-LE21,es,tender,1039043-27-LE21,Licitación Pública Entre 100 y 1000 UTM (LE),LIC. SERVICIOS MANTENCION DE EDIFICIOS,...,,,,,,,,,,
2,id-0.2,ocds-70d2nz-2281-300-L121-2021-12-28T17:15:48Z,compiled,2021-12-28T17:15:48Z,ocds-70d2nz-2281-300-L121,es,tender,2281-300-L121,Licitación Pública Menor a 100 UTM (L1),ADQUISICION DE MATERIALES DE OFICINAS,...,,,,,,,,,,
3,id-0.3,ocds-70d2nz-968521-14-LE21-2021-12-27T11:47:53Z,compiled,2021-12-27T11:47:53Z,ocds-70d2nz-968521-14-LE21,es,tender,968521-14-LE21,Licitación Pública Entre 100 y 1000 UTM (LE),Antisépticos y desinfectantes,...,,,,,,,,,,
4,id-0.4,ocds-70d2nz-1079639-135-L121-2021-12-27T17:31:16Z,compiled,2021-12-27T17:31:16Z,ocds-70d2nz-1079639-135-L121,es,tender,1079639-135-L121,Licitación Pública Menor a 100 UTM (L1),CIERRE PERIMETRAL DE 04 VIVIENDAS FISCALES DE ...,...,,,,,,,,,,


In [5]:
df_main.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 134701 entries, 0 to 134700
Data columns (total 39 columns):
 #   Column                                       Non-Null Count   Dtype  
---  ------                                       --------------   -----  
 0   _link                                        134701 non-null  object 
 1   id                                           134701 non-null  object 
 2   tag                                          134701 non-null  object 
 3   date                                         134701 non-null  object 
 4   ocid                                         134701 non-null  object 
 5   language                                     134701 non-null  object 
 6   initiationType                               134701 non-null  object 
 7   tender_id                                    134701 non-null  object 
 8   tender_procurementMethodDetails              134701 non-null  object 
 9   tender_title                                 134701 non-nul

In [6]:
df_main.columns

Index(['_link', 'id', 'tag', 'date', 'ocid', 'language', 'initiationType',
       'tender_id', 'tender_procurementMethodDetails', 'tender_title',
       'tender_status', 'tender_procurementMethod', 'tender_description',
       'tender_hasEnquiries', 'tender_awardPeriod_endDate',
       'tender_awardPeriod_startDate', 'tender_awardPeriod_durationInDays',
       'tender_tenderPeriod_endDate', 'tender_tenderPeriod_startDate',
       'tender_tenderPeriod_durationInDays', 'tender_enquiryPeriod_endDate',
       'tender_enquiryPeriod_startDate', 'tender_enquiryPeriod_durationInDays',
       'tender_procuringEntity_id', 'tender_procuringEntity_name',
       'tender_value_amount', 'tender_value_unitOfAccount',
       'tender_value_currency', 'buyer_id', 'buyer_name', 'planning_budget_id',
       'planning_budget_description', 'planning_budget_amount_amount',
       'planning_budget_amount_currency',
       'tender_techniques_hasFrameworkAgreement',
       'tender_techniques_frameworkAgreement_m

In [7]:
df_main["tender_procurementMethodDetails"].unique()

array(['Licitación Pública entre a 2000 y 5000 UTM (LQ)',
       'Licitación Pública Entre 100 y 1000 UTM (LE)',
       'Licitación Pública Menor a 100 UTM (L1)',
       'Licitación Pública Mayor 1000 UTM (LP)',
       'Licitación Pública Mayor a 5000 (LR)',
       'Licitación Privada entre 100 y 1000 UTM.',
       'Licitación Pública MOP (O1)',
       'Licitación Privada Menor a 100 UTM.',
       'Licitación Privada MOP (O2)',
       'Licitación Pública Servicios personales especializados (LS)',
       'Licitación Privada Mayor a 1000 UTM',
       'Licitación Privada entre a 2000 y 5000 UTM (H2)',
       'Licitación Privada Mayor a 5000 (I2)'], dtype=object)

Se utilizan la pagina <<https://api.mercadopublico.cl/modules/Licitacion.aspx>> para crear el diccionario.

In [8]:

tipos_licitaciones ={'Licitación Pública Menor a 100 UTM (L1)': "L1",
       'Licitación Pública Entre 100 y 1000 UTM (LE)': "LE",
       'Licitación Pública Mayor 1000 UTM (LP)': "LP",
       'Licitación Pública entre a 2000 y 5000 UTM (LQ)':"LQ",
       'Licitación Pública Mayor a 5000 (LR)' :"LR",
       'Licitación Privada entre a 2000 y 5000 UTM (H2)':"H2",
       'Licitación Pública Servicios personales especializados (LS)':"LS",
       "Licitación Privada por Licitación Pública anterior sin oferentes (A1)" : "A1",
       "Licitación Privada por Remanente de Contrato anterior (B1)": "B1",
       "Licitación Privada por Convenios con Personas Jurídicas Extranjeras fuera del Territorio Nacional (E1)": "E1",
       "Licitación Privada por Servicios de Naturaleza Confidencial (F1)": "F1",
       "Licitación Privada por otras causales, excluidas de la ley de Compras (J1)": "J1",
       "Licitación Privada entre 100 y 1000 UTM" :"CO", "Licitación Privada Mayor a 1000 UTM": "B2",
       "Trato Directo por Producto de Licitación Privada anterior sin oferentes o desierta": "A2",
       "Trato Directo por Proveedor Único (D1)": "D1","Licitación Privada Menor a 100 UTM" : "E2", "Trato Directo (Cotización) (C2)" : "C2",
       "Compra Directa (Orden de compra) (C1)" : "C1", "Trato Directo (Cotización) (F2)" :"F2", "Compra Directa (Orden de compra) (F3)" : "F3",
       "Directo (Cotización) (G2)" : "G2", "Compra Directa (Orden de compra) (G1)" : "G1",
       "Orden de Compra menor a 3 UTM (R1)" : "R1", "Orden de Compra sin Resolución (CA)" : "CA", 
       "Orden de Compra proveniente de adquisición sin emisión automática de OC (SE)" : "SE"}


df_main["tender_procurementMethodDetails"] = df_main["tender_procurementMethodDetails"].map(tipos_licitaciones)
df_main["tender_procurementMethodDetails"] = df_main["tender_procurementMethodDetails"].astype("category")

In [9]:
df_main["buyer_id"] = df_main["tender_procuringEntity_id"]
df_main['buyer_id'].info()

<class 'pandas.core.series.Series'>
RangeIndex: 134701 entries, 0 to 134700
Series name: buyer_id
Non-Null Count   Dtype 
--------------   ----- 
134701 non-null  object
dtypes: object(1)
memory usage: 1.0+ MB


In [10]:
def dejar_rut(texto:str) -> str:
    return int(texto.split("-")[2])

df_main["buyer_id"] = df_main["buyer_id"].apply(dejar_rut)

In [11]:
df_main["buyer_id"]

0         1057501
1         1039043
2            3275
3          968521
4         1079639
           ...   
134696       3724
134697       3422
134698       3573
134699       3459
134700       3756
Name: buyer_id, Length: 134701, dtype: int64

In [12]:
df_main.drop(columns=['id', 'tag', 'date', 'ocid', 'language', 'buyer_name', 'tender_title', 'tender_description', 'tender_procuringEntity_id'], inplace=True)

In [13]:
df_main.loc[df_main['tender_enquiryPeriod_startDate'] == "3"]

Unnamed: 0,_link,initiationType,tender_id,tender_procurementMethodDetails,tender_status,tender_procurementMethod,tender_hasEnquiries,tender_awardPeriod_endDate,tender_awardPeriod_startDate,tender_awardPeriod_durationInDays,...,buyer_id,planning_budget_id,planning_budget_description,planning_budget_amount_amount,planning_budget_amount_currency,tender_techniques_hasFrameworkAgreement,tender_techniques_frameworkAgreement_method,tender_contractPeriod_endDate,tender_contractPeriod_startDate,tender_contractPeriod_durationInDays


### Transformación de datos.

In [14]:
df_main["tender_enquiryPeriod_startDate"] = pd.to_datetime(df_main['tender_enquiryPeriod_startDate'], format='%Y-%m-%dT%H:%M:%SZ')
df_main["tender_enquiryPeriod_endDate"] = pd.to_datetime(df_main['tender_enquiryPeriod_endDate'], format='%Y-%m-%dT%H:%M:%SZ')
df_main['tender_tenderPeriod_startDate'] = pd.to_datetime(df_main['tender_tenderPeriod_startDate'], format='%Y-%m-%dT%H:%M:%SZ')
df_main["tender_tenderPeriod_endDate"] = pd.to_datetime(df_main['tender_tenderPeriod_endDate'], format='%Y-%m-%dT%H:%M:%SZ')
df_main['tender_awardPeriod_startDate'] = pd.to_datetime(df_main['tender_awardPeriod_startDate'], format='%Y-%m-%dT%H:%M:%SZ')

Hay un error en el formato de una fila con contenido <2999-03-15T12:17:00Z>, se eliminará.

In [15]:
df_main.drop(df_main[df_main["tender_awardPeriod_endDate"] == "2999-03-15T12:17:00Z"].index, axis=0, inplace=True)
df_main.drop(df_main[df_main["tender_awardPeriod_endDate"] == "2999-05-31T17:26:00Z"].index, axis=0, inplace=True)
df_main.drop(df_main[df_main["tender_awardPeriod_endDate"] == "2999-05-31T15:11:00Z"].index, axis=0, inplace=True)

In [16]:
df_main['tender_awardPeriod_endDate'] = pd.to_datetime(df_main['tender_awardPeriod_endDate'], format='%Y-%m-%dT%H:%M:%SZ')
df_main['tender_enquiryPeriod_durationInDays'] = df_main['tender_enquiryPeriod_endDate'] - df_main['tender_enquiryPeriod_startDate'] 
df_main['tender_awardPeriod_durationInDays'] = df_main['tender_awardPeriod_endDate'] - df_main['tender_awardPeriod_startDate']
df_main['tender_tenderPeriod_durationInDays'] = df_main['tender_tenderPeriod_endDate'] - df_main['tender_tenderPeriod_startDate']

# Filtrado por fecha
Así como en el notebook del 2018, filtraremos las fechas, de forma que queden solo entre (2020-09-01) y (2021-12-31).

In [17]:
fecha_filtro = '2020-09-01'
df_filtrado = df_main.loc[df_main['tender_tenderPeriod_startDate'] >= fecha_filtro]

fecha_filtro = '2021-12-31'
df_filtrado = df_filtrado.loc[df_filtrado['tender_awardPeriod_endDate'] <= fecha_filtro]

In [18]:
df_filtrado.reset_index(drop=True, inplace=True)
df_filtrado.rename(columns={"tender_value_amount":"estimated_cost", "tender_value_currency": "estimated_cost_currency", "tender_value_unitOfAccount": "estimated_cost_unitOfAccount"}, inplace=True )
df_filtrado["estimated_cost_currency"] = df_filtrado["estimated_cost_currency"].replace(np.nan, "No")

In [19]:
def crear_unit_value_monetary_unit(indice:int, comparador="No") -> str:
    if df_filtrado.iloc[indice]["estimated_cost_currency"] == comparador:
        return df_filtrado.iloc[indice]["estimated_cost_unitOfAccount"]
    return df_filtrado.iloc[indice]["estimated_cost_currency"]

df_filtrado["estimated_cost_monetary_unit"] = df_filtrado.index.map(crear_unit_value_monetary_unit)

In [20]:
df_filtrado["estimated_cost_monetary_unit"].unique()

array(['CLP', nan, 'CLF', 'USD', 'UTM', 'EUR'], dtype=object)

In [21]:
df_filtrado.drop(columns=["estimated_cost_currency","estimated_cost_unitOfAccount"],inplace=True)
df_filtrado.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120718 entries, 0 to 120717
Data columns (total 29 columns):
 #   Column                                       Non-Null Count   Dtype          
---  ------                                       --------------   -----          
 0   _link                                        120718 non-null  object         
 1   initiationType                               120718 non-null  object         
 2   tender_id                                    120718 non-null  object         
 3   tender_procurementMethodDetails              119575 non-null  category       
 4   tender_status                                120718 non-null  object         
 5   tender_procurementMethod                     120718 non-null  object         
 6   tender_hasEnquiries                          120718 non-null  bool           
 7   tender_awardPeriod_endDate                   120718 non-null  datetime64[ns] 
 8   tender_awardPeriod_startDate                 120718 no

## Revision awards.csv

In [22]:
camino = path.join("Chile","2018","awards.csv")
df_awards = pd.read_csv(camino, sep=",")

In [23]:
df_awards.head(8)

Unnamed: 0,_link,_link_main,id,date,description,title,status,value_amount,value_currency,value_unitOfAccount
0,id-0.0.awards.0,id-0.0,8270925,2017-12-23T13:23:24Z,DIDECO - NAVIDAD COMUNAL - JUEGOS INFLABLES\r\...,DIDECO - NAVIDAD COMUNAL - JUEGOS INFLABLES,active,690000.0,CLP,
1,id-0.1.awards.0,id-0.1,8274205,,CONFECCIÓN E INSTALACIÓN DE PROTECCIONES METAL...,CONFECCIÓN E INSTALACIÓN DE PROTECCIONES METÁL...,unsuccessful,,,
2,id-0.2.awards.0,id-0.2,8275841,2018-02-02T11:16:54Z,sm80 sp3565 servicios generales,Reparacion de mamparas,active,870000.0,CLP,
3,id-0.3.awards.0,id-0.3,8254526,2017-12-26T18:19:33Z,VISITA A HUERTO ORGANICO DE BERRIES HUBICADO E...,VISTA A HUERTO ORGANICO DE BERRIES HUBICADO EN...,unsuccessful,,,
4,id-0.4.awards.0,id-0.4,8253535,2017-12-21T18:17:23Z,CAPACITACIÓN PARA DOCENTES DE LA ESC. F-929 - ...,CAPACITACIÓN PARA DOCENTES DE LA ESC. F-929 - ...,active,6000000.0,CLP,
5,id-0.5.awards.0,id-0.5,8277247,2018-01-16T10:56:54Z,Adquisicion requerida por el Sr. Juan Pablo Re...,Adquisición programa rehabilitación integral e...,active,347043.0,CLP,
6,id-0.6.awards.0,id-0.6,8274542,2017-12-28T21:54:24Z,SE REQUIERE LA ADQUISICION DE REACTIVOS PARA L...,REACTIVOS PARA LABORATORIO,active,12219604.0,CLP,
7,id-0.7.awards.0,id-0.7,8271070,2017-12-29T11:54:57Z,PROGRAMA ATENCIÓN INTEGRAL FAMILIAR 24 HORAS,TERMO-VENTILADOR Y ESTUFA A GAS (N°1017) COM.S...,active,191241.0,CLP,


In [24]:
df_no_nan_un_ac = df_awards.dropna(subset=["value_unitOfAccount"])[["value_currency", "value_unitOfAccount", "status"]]
df_unit_of_account = df_no_nan_un_ac.loc[df_no_nan_un_ac["value_currency"].isna()]

In [25]:
df_unit_of_account["value_unitOfAccount"].describe()

count      83
unique      1
top       UTM
freq       83
Name: value_unitOfAccount, dtype: object

In [26]:
df_no_nan_un_ac["value_unitOfAccount"].describe()

count      83
unique      1
top       UTM
freq       83
Name: value_unitOfAccount, dtype: object

In [27]:
df_awards["value_currency"] = df_awards["value_currency"].replace(np.nan, "No")

In [28]:
def crear_unit_value_monetary_unit(indice:int, comparador="No") -> str:
    if df_awards.iloc[indice]["value_currency"] == comparador:
        return df_awards.iloc[indice]["value_unitOfAccount"]
    return df_awards.iloc[indice]["value_currency"]

df_awards["value_monetary_unit"] = df_awards.index.map(crear_unit_value_monetary_unit)

In [29]:
df_awards["value_monetary_unit"].unique()

array(['CLP', nan, 'CLF', 'USD', 'UTM', 'EUR'], dtype=object)

In [30]:
df_awards.drop(columns=['id', 'description', 'title', "value_unitOfAccount", "value_currency","value_unitOfAccount"],inplace=True)

In [31]:
df_awards.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 240021 entries, 0 to 240020
Data columns (total 6 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   _link                240021 non-null  object 
 1   _link_main           240021 non-null  object 
 2   date                 210324 non-null  object 
 3   status               240021 non-null  object 
 4   value_amount         177451 non-null  float64
 5   value_monetary_unit  177451 non-null  object 
dtypes: float64(1), object(5)
memory usage: 11.0+ MB


## Revisión de awards_items.csv

In [32]:
camino = path.join("Chile","2018","awards_items.csv") 
df_awards_items = pd.read_csv(camino, sep=",")

In [33]:
df_awards_items.columns

Index(['_link', '_link_awards', '_link_main', 'id', 'description', 'quantity',
       'unit_name', 'unit_value_amount', 'unit_value_currency',
       'classification_id', 'classification_uri', 'classification_scheme',
       'unit_value_unitOfAccount'],
      dtype='object')

In [34]:
df_awards_items.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 695151 entries, 0 to 695150
Data columns (total 13 columns):
 #   Column                    Non-Null Count   Dtype  
---  ------                    --------------   -----  
 0   _link                     695151 non-null  object 
 1   _link_awards              695151 non-null  object 
 2   _link_main                695151 non-null  object 
 3   id                        695151 non-null  int64  
 4   description               693046 non-null  object 
 5   quantity                  695151 non-null  float64
 6   unit_name                 695151 non-null  object 
 7   unit_value_amount         695151 non-null  float64
 8   unit_value_currency       695022 non-null  object 
 9   classification_id         695151 non-null  int64  
 10  classification_uri        695151 non-null  object 
 11  classification_scheme     695151 non-null  object 
 12  unit_value_unitOfAccount  129 non-null     object 
dtypes: float64(2), int64(2), object(9)
memory us

In [35]:
df_awards_items["unit_value_unitOfAccount"].describe()

count     129
unique      1
top       UTM
freq      129
Name: unit_value_unitOfAccount, dtype: object

In [36]:
df_awards_items.loc[df_awards_items["unit_value_currency"].isna()]["unit_value_unitOfAccount"].describe()

count     129
unique      1
top       UTM
freq      129
Name: unit_value_unitOfAccount, dtype: object

Podemos observar que para los valores de <unit_value_currency> que son NaN existe un dato en <unit_value_unitOfAccount>, por lo que crearemos una nueva columna llamada <unit_value_monetary_unit> que tenga los valores de ambas columna según correspondan.

In [37]:
df_awards_items["unit_value_currency"] = df_awards_items["unit_value_currency"].replace(np.nan, "No")

In [38]:
def crear_unit_value_monetary_unit(indice:int, comparador="No") -> str:
    if df_awards_items.iloc[indice]["unit_value_currency"] == comparador:
        return df_awards_items.iloc[indice]["unit_value_unitOfAccount"]
    return df_awards_items.iloc[indice]["unit_value_currency"]

df_awards_items["unit_value_monetary_unit"] = df_awards_items.index.map(crear_unit_value_monetary_unit)

In [39]:
df_awards_items["unit_value_monetary_unit"].unique()

array(['CLP', 'CLF', 'USD', 'UTM', 'EUR'], dtype=object)

In [40]:
df_awards_items.drop(columns=['classification_uri', 'classification_scheme','unit_value_unitOfAccount', "unit_value_currency", "description"], inplace=True)

## Revisión de awards_suppliers.csv

In [41]:
camino = path.join("Chile","2018","awards_suppliers.csv")
df_awards_supplier = pd.read_csv(camino, sep=",")

In [42]:
df_awards_supplier.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 258498 entries, 0 to 258497
Data columns (total 5 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   _link         258498 non-null  object
 1   _link_awards  258498 non-null  object
 2   _link_main    258498 non-null  object
 3   id            258498 non-null  object
 4   name          258498 non-null  object
dtypes: object(5)
memory usage: 9.9+ MB


In [43]:
df_awards_supplier.head()

Unnamed: 0,_link,_link_awards,_link_main,id,name
0,id-0.0.awards.0.suppliers.0,id-0.0.awards.0,id-0.0,CL-MP-836904,JOSÉ MIGUEL | JOSÉ MIGUEL
1,id-0.2.awards.0.suppliers.0,id-0.2.awards.0,id-0.2,CL-MP-200536,Todovidrios | Vidrieria las condes 7002
2,id-0.4.awards.0.suppliers.0,id-0.4.awards.0,id-0.4,CL-MP-909666,Jorge Alejandro | Jorge Alejandro
3,id-0.5.awards.0.suppliers.0,id-0.5.awards.0,id-0.5,CL-MP-24062,INSTITUTO ORTOPEDICO Y DE REHABILITACION IOR L...
4,id-0.5.awards.0.suppliers.1,id-0.5.awards.0,id-0.5,CL-MP-69821,RICARDO RODRIGUEZ Y CIA. LTDA. | RICARDO RODRI...


In [44]:
def dejar_rut(texto:str) -> str:
    return int(texto.split("-")[2])

In [45]:
df_awards_supplier["id"] = df_awards_supplier["id"].apply(dejar_rut)

In [46]:
df_awards_supplier.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 258498 entries, 0 to 258497
Data columns (total 5 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   _link         258498 non-null  object
 1   _link_awards  258498 non-null  object
 2   _link_main    258498 non-null  object
 3   id            258498 non-null  int64 
 4   name          258498 non-null  object
dtypes: int64(1), object(4)
memory usage: 9.9+ MB


## Revisión de awards_documents.csv

In [47]:
camino = path.join("Chile","2018", "awards_documents.csv")
df_award_documents = pd.read_csv(camino, sep=",")
df_award_documents.head()

Unnamed: 0,_link,_link_awards,_link_main,id,url,title,format,language,description,documentType
0,id-0.0.awards.0.documents.0,id-0.0.awards.0,id-0.0,1,https://www.mercadopublico.cl/Procurement/Modu...,Página documentos del proceso de contratación,html,es,Todos los documentos relacionados al proceso d...,x_procurementDocuments
1,id-0.2.awards.0.documents.0,id-0.2.awards.0,id-0.2,1,https://www.mercadopublico.cl/Procurement/Modu...,Página documentos del proceso de contratación,html,es,Todos los documentos relacionados al proceso d...,x_procurementDocuments
2,id-0.4.awards.0.documents.0,id-0.4.awards.0,id-0.4,1,https://www.mercadopublico.cl/Procurement/Modu...,Página documentos del proceso de contratación,html,es,Todos los documentos relacionados al proceso d...,x_procurementDocuments
3,id-0.5.awards.0.documents.0,id-0.5.awards.0,id-0.5,1,https://www.mercadopublico.cl/Procurement/Modu...,Página documentos del proceso de contratación,html,es,Todos los documentos relacionados al proceso d...,x_procurementDocuments
4,id-0.6.awards.0.documents.0,id-0.6.awards.0,id-0.6,1,https://www.mercadopublico.cl/Procurement/Modu...,Página documentos del proceso de contratación,html,es,Todos los documentos relacionados al proceso d...,x_procurementDocuments


In [48]:
df_award_documents.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 218525 entries, 0 to 218524
Data columns (total 10 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   _link         218525 non-null  object
 1   _link_awards  218525 non-null  object
 2   _link_main    218525 non-null  object
 3   id            218525 non-null  int64 
 4   url           218525 non-null  object
 5   title         218525 non-null  object
 6   format        218525 non-null  object
 7   language      218525 non-null  object
 8   description   218525 non-null  object
 9   documentType  218525 non-null  object
dtypes: int64(1), object(9)
memory usage: 16.7+ MB


No se utilizará este DataFrame, puesto que no contiene información relevante para lo que se busca resolver.

## Revisión de parties.csv

In [49]:
camino = path.join("Chile","2018", "parties.csv")
df_parties = pd.read_csv(camino, sep=",")
df_parties.head()

Unnamed: 0,_link,_link_main,id,name,roles,address_region,address_streetAddress,identifier_id,identifier_scheme,identifier_legalName,contactPoint_name,contactPoint_email,contactPoint_telephone,address_countryName,contactPoint_faxNumber
0,id-0.0.parties.0,id-0.0,CL-MP-836904,JOSÉ MIGUEL | JOSÉ MIGUEL,"tenderer,supplier",Región de Valparaíso,ESTRELLA DE CHILE 448,182792101,CL-RUT,JOSÉ MIGUEL REYES GACITÚA,JOSÉ MIGUEL REYES GACITÚA,jomelitoz@hotmail.com,56-32-2930110,,
1,id-0.0.parties.1,id-0.0,CL-MP-5464,Ilustre Municipalidad de Quintero | UNIDAD NORMAL,"procuringEntity,buyer",Región de Valparaíso,normandie #1916,69060700K,CL-RUT,I MUNICIPALIDAD DE QUINTERO,Eduardo Enrique Rios Rios,rios@muniquintero.cl,56-32-2379639,Chile,
2,id-0.1.parties.0,id-0.1,CL-MP-3401,Municipalidad de Los Angeles | MUNICIPALIDAD D...,"procuringEntity,buyer",Región del Biobío,COLO COLO 484,691701018,CL-RUT,I MUNICIPALIDAD DE LOS ANGELES DEPTO DE,JASMIN LISBETH SANHUEZA CIFUENTES,jsanhueza@educacionlosangeles.cl,56-43-2570146,Chile,
3,id-0.2.parties.0,id-0.2,CL-MP-200536,Todovidrios | Vidrieria las condes 7002,"tenderer,supplier",Región Metropolitana de Santiago,Avenida Francisco Bilbao 8010 local 3 torre 1,51259424,CL-RUT,JOSE MERCEDES RODRIGUEZ ECHEVERRIA,Jose Mercedes rodriguez echeverria,todovidrios7@gmail.com,56-02-22290586,Chile,
4,id-0.2.parties.1,id-0.2,CL-MP-731625,FERRETERIA COMERCIAL L Y J LIMITADA | COMERCI...,tenderer,Región Metropolitana de Santiago,ARTURO PRAT N ° 213,762581167,CL-RUT,FERRETERIA COMERCIAL L&J LIMITADA,FERRETERIA COMERCIAL L Y J LTDA FERRETERIA LYJ...,contactolyj@gmail.com,56-22-9047419,Chile,


In [50]:
df_parties.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1300059 entries, 0 to 1300058
Data columns (total 15 columns):
 #   Column                  Non-Null Count    Dtype 
---  ------                  --------------    ----- 
 0   _link                   1300059 non-null  object
 1   _link_main              1300059 non-null  object
 2   id                      1300059 non-null  object
 3   name                    1300056 non-null  object
 4   roles                   1300059 non-null  object
 5   address_region          1284966 non-null  object
 6   address_streetAddress   1292579 non-null  object
 7   identifier_id           1299403 non-null  object
 8   identifier_scheme       1299430 non-null  object
 9   identifier_legalName    1299121 non-null  object
 10  contactPoint_name       1300059 non-null  object
 11  contactPoint_email      1299876 non-null  object
 12  contactPoint_telephone  1288642 non-null  object
 13  address_countryName     1087631 non-null  object
 14  contactPoint_faxNu

In [51]:
df_parties.drop(columns=['address_streetAddress', 'identifier_scheme','identifier_legalName', 'contactPoint_name', 'contactPoint_email','contactPoint_telephone', 'contactPoint_faxNumber'], inplace=True)

In [52]:
df_parties.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1300059 entries, 0 to 1300058
Data columns (total 8 columns):
 #   Column               Non-Null Count    Dtype 
---  ------               --------------    ----- 
 0   _link                1300059 non-null  object
 1   _link_main           1300059 non-null  object
 2   id                   1300059 non-null  object
 3   name                 1300056 non-null  object
 4   roles                1300059 non-null  object
 5   address_region       1284966 non-null  object
 6   identifier_id        1299403 non-null  object
 7   address_countryName  1087631 non-null  object
dtypes: object(8)
memory usage: 79.3+ MB


Se eliminan estas columnas porque ofrecen información que no es de interes, ni se puede utilizar para obtener información en alguna de las paginas relacionadas al dataset.

In [53]:
df_parties["roles"].unique()

array(['tenderer,supplier', 'procuringEntity,buyer', 'tenderer'],
      dtype=object)

In [54]:
def separar_roles(texto:str) -> list:
    if "," in texto:
        return texto.split(",")
    else:
        return [texto, np.nan]
    
df_parties["roles"] = df_parties["roles"].apply(separar_roles)

In [55]:
df_parties["rol 1"] = df_parties.index.map(lambda x: df_parties.loc[x]["roles"][0])

In [56]:
df_parties["rol 2"] = df_parties.index.map(lambda x: df_parties.loc[x]["roles"][1])

In [57]:
df_parties.head()

Unnamed: 0,_link,_link_main,id,name,roles,address_region,identifier_id,address_countryName,rol 1,rol 2
0,id-0.0.parties.0,id-0.0,CL-MP-836904,JOSÉ MIGUEL | JOSÉ MIGUEL,"[tenderer, supplier]",Región de Valparaíso,182792101,,tenderer,supplier
1,id-0.0.parties.1,id-0.0,CL-MP-5464,Ilustre Municipalidad de Quintero | UNIDAD NORMAL,"[procuringEntity, buyer]",Región de Valparaíso,69060700K,Chile,procuringEntity,buyer
2,id-0.1.parties.0,id-0.1,CL-MP-3401,Municipalidad de Los Angeles | MUNICIPALIDAD D...,"[procuringEntity, buyer]",Región del Biobío,691701018,Chile,procuringEntity,buyer
3,id-0.2.parties.0,id-0.2,CL-MP-200536,Todovidrios | Vidrieria las condes 7002,"[tenderer, supplier]",Región Metropolitana de Santiago,51259424,Chile,tenderer,supplier
4,id-0.2.parties.1,id-0.2,CL-MP-731625,FERRETERIA COMERCIAL L Y J LIMITADA | COMERCI...,"[tenderer, nan]",Región Metropolitana de Santiago,762581167,Chile,tenderer,


In [58]:
df_G = df_parties.dropna(subset=["address_region"])
df_G = df_G[df_G["address_countryName"].isna()]

In [59]:
df_G["address_region"].unique()

array(['Región de Valparaíso ', 'Región del Biobío ',
       'Región Metropolitana de Santiago',
       'Región de Magallanes y de la Antártica',
       'Región del Libertador General Bernardo O´Higgins',
       'Región de Atacama ', 'Región de Coquimbo ',
       'Región de la Araucanía ', 'Región de los Lagos ',
       'Región del Maule ', 'Región de Los Ríos',
       'Región de Antofagasta ', 'Región de Tarapacá  ',
       'Región de Arica y Parinacota',
       'Región Aysén del General Carlos Ibáñez del Campo'], dtype=object)

In [60]:
df_parties["address_countryName"] = df_parties["address_countryName"].replace(np.nan, "no")
df_parties["address_region"] = df_parties["address_region"].replace(np.nan, "no")

In [61]:
def dar_pais_segun_region(indice:int, comparador="no") -> str:
    if df_parties.iloc[indice]["address_countryName"] == comparador:
        if df_parties.iloc[indice]["address_region"] in set(['Región de Valparaíso ', 'Región del Biobío ','Región Metropolitana de Santiago','Región de Magallanes y de la Antártica','Región del Libertador General Bernardo O´Higgins','Región de Atacama ', 'Región de Coquimbo ',
       'Región de la Araucanía ', 'Región de los Lagos ','Región del Maule ', 'Región de Los Ríos','Región de Antofagasta ', 'Región de Tarapacá  ','Región de Arica y Parinacota','Región Aysén del General Carlos Ibáñez del Campo']):
            return ("Chile")
        else:
            return ("no")
    else:
        return df_parties.iloc[indice]["address_countryName"]

In [62]:
def dar_region_segun_pais(indice:int, comparador="no") -> str:
    if df_parties.iloc[indice]["address_region"] == comparador:
        if df_parties.iloc[indice]["address_countryName"] != "Chile" and df_parties.iloc[indice]["address_countryName"] != "no":
            return("Extranjero")
        else:
            return np.nan
    else:
        return df_parties.iloc[indice]["address_region"]

In [63]:
df_parties["address_countryName"] = df_parties.index.map(dar_pais_segun_region)

In [64]:
df_parties["address_region"] = df_parties.index.map(dar_region_segun_pais)

In [65]:
df_parties["address_region"].unique()

array(['Región de Valparaíso ', 'Región del Biobío ',
       'Región Metropolitana de Santiago', 'Región del Ñuble',
       'Región de los Lagos ', 'Región de Tarapacá  ',
       'Región de Arica y Parinacota', 'Región de la Araucanía ',
       'Región del Maule ', 'Región de Magallanes y de la Antártica',
       'Región de Los Ríos', nan,
       'Región Aysén del General Carlos Ibáñez del Campo',
       'Región del Libertador General Bernardo O´Higgins',
       'Región de Atacama ', 'Región de Coquimbo ',
       'Región de Antofagasta ', 'Extranjero'], dtype=object)

In [66]:
df_parties["address_countryName"].replace("no", np.nan, inplace=True)
df_parties["address_countryName"].unique()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_parties["address_countryName"].replace("no", np.nan, inplace=True)


array(['Chile', nan, 'Francia', 'Estados Unidos', 'Alemania', 'India',
       'Ecuador', 'Reino Unido', 'Suiza', 'México', 'Afganistán',
       'España', 'Colombia', 'Uruguay', 'Bolivia',
       'Francia, Metropolitana', 'Panamá', 'Egipto', 'Argentina',
       'Portugal', 'Venezuela', 'Hong Kong', 'Brasil', 'Finlandia',
       'Nueva Zelanda', 'Perú', 'Italia', 'Canadá', 'Países Bajos',
       'Angola', 'Turquía', 'China', 'Austria', 'Suecia',
       'Rusia, Federación de', 'Cuba', 'Corea, República de'],
      dtype=object)

In [67]:
df_parties.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1300059 entries, 0 to 1300058
Data columns (total 10 columns):
 #   Column               Non-Null Count    Dtype 
---  ------               --------------    ----- 
 0   _link                1300059 non-null  object
 1   _link_main           1300059 non-null  object
 2   id                   1300059 non-null  object
 3   name                 1300056 non-null  object
 4   roles                1300059 non-null  object
 5   address_region       1285117 non-null  object
 6   identifier_id        1299403 non-null  object
 7   address_countryName  1285137 non-null  object
 8   rol 1                1300059 non-null  object
 9   rol 2                503396 non-null   object
dtypes: object(10)
memory usage: 99.2+ MB


In [68]:
def dejar_rut(texto:str) -> str:
    if "-" in texto:
        return int(texto.split("-")[2])
    else:
        return np.nan

In [69]:
df_parties["id"] = df_parties["id"].apply(dejar_rut)

In [70]:
df_parties.drop(columns=["roles"], inplace=True)

## Revisión de parties_additionalIdentifiers.csv

In [71]:
camino = path.join("Chile","2018", "parties_additionalIdentifiers.csv")
df_parties_add = pd.read_csv(camino, sep=",")
df_parties_add.head()

Unnamed: 0,_link,_link_parties,_link_main,id,uri,scheme,legalName
0,id-0.0.parties.0.additionalIdentifiers.0,id-0.0.parties.0,id-0.0,836904,https://apis.mercadopublico.cl/OCDS/data/prove...,CL-MP,JOSÉ MIGUEL REYES GACITÚA
1,id-0.0.parties.1.additionalIdentifiers.0,id-0.0.parties.1,id-0.0,5464,https://apis.mercadopublico.cl/OCDS/data/compr...,CL-MP,I MUNICIPALIDAD DE QUINTERO
2,id-0.1.parties.0.additionalIdentifiers.0,id-0.1.parties.0,id-0.1,3401,https://apis.mercadopublico.cl/OCDS/data/compr...,CL-MP,I MUNICIPALIDAD DE LOS ANGELES DEPTO DE
3,id-0.2.parties.0.additionalIdentifiers.0,id-0.2.parties.0,id-0.2,200536,https://apis.mercadopublico.cl/OCDS/data/prove...,CL-MP,JOSE MERCEDES RODRIGUEZ ECHEVERRIA
4,id-0.2.parties.1.additionalIdentifiers.0,id-0.2.parties.1,id-0.2,731625,https://apis.mercadopublico.cl/OCDS/data/prove...,CL-MP,FERRETERIA COMERCIAL L&J LIMITADA


In [72]:
df_parties_add.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1300059 entries, 0 to 1300058
Data columns (total 7 columns):
 #   Column         Non-Null Count    Dtype 
---  ------         --------------    ----- 
 0   _link          1300059 non-null  object
 1   _link_parties  1300059 non-null  object
 2   _link_main     1300059 non-null  object
 3   id             1300059 non-null  int64 
 4   uri            1300059 non-null  object
 5   scheme         1300059 non-null  object
 6   legalName      1299395 non-null  object
dtypes: int64(1), object(6)
memory usage: 69.4+ MB


In [73]:
df_parties_add.iloc[3]["uri"]

'https://apis.mercadopublico.cl/OCDS/data/proveedor/unidad/200536'

Los links de uri no aportan información extra, por lo no se mantendran.

In [74]:
df_parties_add.drop(columns=["uri", "scheme", "_link_main"],inplace=True)

### Revisión de tender_items.csv

In [75]:
camino = path.join("Chile","2018", "tender_items.csv")
df_tender_item = pd.read_csv(camino, sep=",")
df_tender_item.head()

Unnamed: 0,_link,_link_main,id,description,quantity,unit_name,classification_id,classification_uri,classification_scheme
0,id-0.0.tender.items.0,id-0.0,36425198,"Instrumentos musicales, juegos, juguetes, arte...",1.0,Unidad,60141012,https://apis.mercadopublico.cl/OCDS/data/produ...,UNSPSC
1,id-0.1.tender.items.0,id-0.1,36442486,"Artículos para estructuras, obras y construcci...",1.0,Unidad,30103205,https://apis.mercadopublico.cl/OCDS/data/produ...,UNSPSC
2,id-0.1.tender.items.1,id-0.1,36442487,"Artículos para estructuras, obras y construcci...",1.0,Unidad,30103205,https://apis.mercadopublico.cl/OCDS/data/produ...,UNSPSC
3,id-0.1.tender.items.2,id-0.1,36442488,"Artículos para estructuras, obras y construcci...",1.0,Unidad,30103205,https://apis.mercadopublico.cl/OCDS/data/produ...,UNSPSC
4,id-0.1.tender.items.3,id-0.1,36442489,"Artículos para estructuras, obras y construcci...",1.0,Unidad,30103205,https://apis.mercadopublico.cl/OCDS/data/produ...,UNSPSC


In [76]:
df_tender_item.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 979055 entries, 0 to 979054
Data columns (total 9 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   _link                  979055 non-null  object 
 1   _link_main             979055 non-null  object 
 2   id                     979055 non-null  int64  
 3   description            979055 non-null  object 
 4   quantity               979055 non-null  float64
 5   unit_name              979055 non-null  object 
 6   classification_id      979055 non-null  int64  
 7   classification_uri     979055 non-null  object 
 8   classification_scheme  979055 non-null  object 
dtypes: float64(1), int64(2), object(6)
memory usage: 67.2+ MB


In [77]:
df_tender_item.drop(columns=["classification_uri"], inplace=True)

In [78]:
df_tender_item.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 979055 entries, 0 to 979054
Data columns (total 8 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   _link                  979055 non-null  object 
 1   _link_main             979055 non-null  object 
 2   id                     979055 non-null  int64  
 3   description            979055 non-null  object 
 4   quantity               979055 non-null  float64
 5   unit_name              979055 non-null  object 
 6   classification_id      979055 non-null  int64  
 7   classification_scheme  979055 non-null  object 
dtypes: float64(1), int64(2), object(5)
memory usage: 59.8+ MB


### Revisión de tender_tenderers.csv

In [79]:
camino = path.join("Chile","2018", "tender_tenderers.csv")
df_tender_tenderers = pd.read_csv(camino, sep=",")

In [80]:
df_tender_tenderers.head()

Unnamed: 0,_link,_link_main,id,name
0,id-0.0.tender.tenderers.0,id-0.0,CL-MP-836904,JOSÉ MIGUEL | JOSÉ MIGUEL
1,id-0.2.tender.tenderers.0,id-0.2,CL-MP-17336,Inmobiliaria y Servicios | Inmobiliaria y Serv...
2,id-0.2.tender.tenderers.1,id-0.2,CL-MP-200536,Todovidrios | Vidrieria las condes 7002
3,id-0.2.tender.tenderers.2,id-0.2,CL-MP-731625,FERRETERIA COMERCIAL L Y J LIMITADA | COMERCI...
4,id-0.2.tender.tenderers.3,id-0.2,CL-MP-971211,ROBERT FRANK | ROBERT FRANK


In [81]:
df_tender_tenderers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1055231 entries, 0 to 1055230
Data columns (total 4 columns):
 #   Column      Non-Null Count    Dtype 
---  ------      --------------    ----- 
 0   _link       1055231 non-null  object
 1   _link_main  1055231 non-null  object
 2   id          1055231 non-null  object
 3   name        1055228 non-null  object
dtypes: object(4)
memory usage: 32.2+ MB


In [82]:
def dejar_rut(texto:str) -> str:
    if "-" in texto:
        return int(texto.split("-")[2])
    else:
        return np.nan

In [83]:
df_tender_tenderers["id"] = df_tender_tenderers["id"].apply(dejar_rut)

In [84]:
df_tender_tenderers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1055231 entries, 0 to 1055230
Data columns (total 4 columns):
 #   Column      Non-Null Count    Dtype  
---  ------      --------------    -----  
 0   _link       1055231 non-null  object 
 1   _link_main  1055231 non-null  object 
 2   id          1055185 non-null  float64
 3   name        1055228 non-null  object 
dtypes: float64(1), object(3)
memory usage: 32.2+ MB


### Clasificación con UNSPCS.

In [85]:
df_clasificaciones = pd.read_excel("unspcs-clasificador-de-bienes-y-servicios-de-naciones-unidas-en-espanol.xlsx", skiprows=5)
df_clasificaciones.head(10)

Unnamed: 0,Código segmento,Nombre Segmento,Código Familia,Nombre Familia,Código Clase,Nombre Clase,Código Producto,Nombre Producto
0,10,"Material Vivo Vegetal y Animal, Accesorios y S...",1010,Animales vivos,101015,Animales de granja,10101501,Gatos
1,10,"Material Vivo Vegetal y Animal, Accesorios y S...",1010,Animales vivos,101015,Animales de granja,10101502,Perros
2,10,"Material Vivo Vegetal y Animal, Accesorios y S...",1010,Animales vivos,101015,Animales de granja,10101504,Visón
3,10,"Material Vivo Vegetal y Animal, Accesorios y S...",1010,Animales vivos,101015,Animales de granja,10101505,Ratas
4,10,"Material Vivo Vegetal y Animal, Accesorios y S...",1010,Animales vivos,101015,Animales de granja,10101506,Caballos
5,10,"Material Vivo Vegetal y Animal, Accesorios y S...",1010,Animales vivos,101015,Animales de granja,10101507,Ovejas
6,10,"Material Vivo Vegetal y Animal, Accesorios y S...",1010,Animales vivos,101015,Animales de granja,10101508,Cabras
7,10,"Material Vivo Vegetal y Animal, Accesorios y S...",1010,Animales vivos,101015,Animales de granja,10101509,Asnos
8,10,"Material Vivo Vegetal y Animal, Accesorios y S...",1010,Animales vivos,101015,Animales de granja,10101510,Ratones
9,10,"Material Vivo Vegetal y Animal, Accesorios y S...",1010,Animales vivos,101015,Animales de granja,10101511,Cerdos


El dataframe df_clasificaciones corresponde a un conjunto de datos que posee el significado de cada objeto solicitados según la codificación UNSPSC, por lo que se hara merge con el df_main para saber que se esta comprando en cada licitación, se desecharán algunas de las columnas de df_clasificaciones para esto.

In [86]:
df_clasificaciones.drop(columns=["Código segmento","Código Familia","Nombre Familia","Nombre Clase","Código Clase"], inplace=True)

### Instituciones que realizan licitaciones. 

Se obtuvo unb dataset con información de los organismos que han realizado lictaciones historicamente desde <https://datos-abiertos.chilecompra.cl/descargas/complementos>, se utilizará para determinar el sector del comprador.

In [87]:
camino = path.join("Chile","Instituciones_compradoras.csv")
df_instituciones_compradoras = pd.read_csv(camino, sep=";", encoding="UTF-8")

In [88]:
df_instituciones_compradoras.head()

Unnamed: 0,sector,Instituci�n,C�digo Instituci�n,RUT Unidad de Compra,C�digo Unidad de Compra,Unidad de Compra,Regi�n Unidad de Compra
0,MUNICIPALIDADES,I MUNICIPALIDAD DE AYSEN,100049,69.240.100-K,3701,I.MUNICIPALIDAD AYSEN-DIRECCION DE EDUCACION,Ays�n
1,MUNICIPALIDADES,I MUNICIPALIDAD DE AYSEN,100049,69.240.100-K,3702,I.MUNICIPALIDAD AYSEN-ADQUISICIONES,Ays�n
2,MUNICIPALIDADES,I MUNICIPALIDAD DE AYSEN,100049,60.920.753-1,3703,I.MUNICIPALIDAD AYSEN-LICEO POLITECNICO,Ays�n
3,MUNICIPALIDADES,I MUNICIPALIDAD DE PIRQUE,100072,69.072.200-3,3704,MUNICIPALIDAD DE PIRQUE - DAF,Metropolitana
4,MUNICIPALIDADES,I MUNICIPALIDAD DE PIRQUE,100072,69.072.200-3,3705,MUNICIPALIDAD DE PIRQUE - DOM,Metropolitana


In [89]:
df_instituciones_compradoras.loc[df_instituciones_compradoras["C�digo Unidad de Compra"] == 5772]

Unnamed: 0,sector,Instituci�n,C�digo Instituci�n,RUT Unidad de Compra,C�digo Unidad de Compra,Unidad de Compra,Regi�n Unidad de Compra
2446,MUNICIPALIDADES,I MUNICIPALIDAD DE TIRUA,183037,69.160.700-3,5772,FINANZAS,B�o-B�o


In [90]:
df_instituciones_compradoras.drop(columns=["Instituci�n","Regi�n Unidad de Compra", "RUT Unidad de Compra", "C�digo Instituci�n"], inplace=True)
df_instituciones_compradoras.rename(columns={"C�digo Unidad de Compra":"Codigo Unidad de Compra"}, inplace=True)
df_instituciones_compradoras.head()

Unnamed: 0,sector,Codigo Unidad de Compra,Unidad de Compra
0,MUNICIPALIDADES,3701,I.MUNICIPALIDAD AYSEN-DIRECCION DE EDUCACION
1,MUNICIPALIDADES,3702,I.MUNICIPALIDAD AYSEN-ADQUISICIONES
2,MUNICIPALIDADES,3703,I.MUNICIPALIDAD AYSEN-LICEO POLITECNICO
3,MUNICIPALIDADES,3704,MUNICIPALIDAD DE PIRQUE - DAF
4,MUNICIPALIDADES,3705,MUNICIPALIDAD DE PIRQUE - DOM


### Unión de dataframes.

In [91]:
df_filtrado.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120718 entries, 0 to 120717
Data columns (total 29 columns):
 #   Column                                       Non-Null Count   Dtype          
---  ------                                       --------------   -----          
 0   _link                                        120718 non-null  object         
 1   initiationType                               120718 non-null  object         
 2   tender_id                                    120718 non-null  object         
 3   tender_procurementMethodDetails              119575 non-null  category       
 4   tender_status                                120718 non-null  object         
 5   tender_procurementMethod                     120718 non-null  object         
 6   tender_hasEnquiries                          120718 non-null  bool           
 7   tender_awardPeriod_endDate                   120718 non-null  datetime64[ns] 
 8   tender_awardPeriod_startDate                 120718 no

In [92]:
df_awards.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 240021 entries, 0 to 240020
Data columns (total 6 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   _link                240021 non-null  object 
 1   _link_main           240021 non-null  object 
 2   date                 210324 non-null  object 
 3   status               240021 non-null  object 
 4   value_amount         177451 non-null  float64
 5   value_monetary_unit  177451 non-null  object 
dtypes: float64(1), object(5)
memory usage: 11.0+ MB


In [93]:
df_awards_items.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 695151 entries, 0 to 695150
Data columns (total 9 columns):
 #   Column                    Non-Null Count   Dtype  
---  ------                    --------------   -----  
 0   _link                     695151 non-null  object 
 1   _link_awards              695151 non-null  object 
 2   _link_main                695151 non-null  object 
 3   id                        695151 non-null  int64  
 4   quantity                  695151 non-null  float64
 5   unit_name                 695151 non-null  object 
 6   unit_value_amount         695151 non-null  float64
 7   classification_id         695151 non-null  int64  
 8   unit_value_monetary_unit  695151 non-null  object 
dtypes: float64(2), int64(2), object(5)
memory usage: 47.7+ MB


In [94]:
df_awards_supplier.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 258498 entries, 0 to 258497
Data columns (total 5 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   _link         258498 non-null  object
 1   _link_awards  258498 non-null  object
 2   _link_main    258498 non-null  object
 3   id            258498 non-null  int64 
 4   name          258498 non-null  object
dtypes: int64(1), object(4)
memory usage: 9.9+ MB


In [95]:
df_parties.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1300059 entries, 0 to 1300058
Data columns (total 9 columns):
 #   Column               Non-Null Count    Dtype 
---  ------               --------------    ----- 
 0   _link                1300059 non-null  object
 1   _link_main           1300059 non-null  object
 2   id                   1300059 non-null  int64 
 3   name                 1300056 non-null  object
 4   address_region       1285117 non-null  object
 5   identifier_id        1299403 non-null  object
 6   address_countryName  1285137 non-null  object
 7   rol 1                1300059 non-null  object
 8   rol 2                503396 non-null   object
dtypes: int64(1), object(8)
memory usage: 89.3+ MB


In [96]:
df_parties_add.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1300059 entries, 0 to 1300058
Data columns (total 4 columns):
 #   Column         Non-Null Count    Dtype 
---  ------         --------------    ----- 
 0   _link          1300059 non-null  object
 1   _link_parties  1300059 non-null  object
 2   id             1300059 non-null  int64 
 3   legalName      1299395 non-null  object
dtypes: int64(1), object(3)
memory usage: 39.7+ MB


In [97]:
df_tender_item.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 979055 entries, 0 to 979054
Data columns (total 8 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   _link                  979055 non-null  object 
 1   _link_main             979055 non-null  object 
 2   id                     979055 non-null  int64  
 3   description            979055 non-null  object 
 4   quantity               979055 non-null  float64
 5   unit_name              979055 non-null  object 
 6   classification_id      979055 non-null  int64  
 7   classification_scheme  979055 non-null  object 
dtypes: float64(1), int64(2), object(5)
memory usage: 59.8+ MB


In [98]:
df_tender_tenderers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1055231 entries, 0 to 1055230
Data columns (total 4 columns):
 #   Column      Non-Null Count    Dtype  
---  ------      --------------    -----  
 0   _link       1055231 non-null  object 
 1   _link_main  1055231 non-null  object 
 2   id          1055185 non-null  float64
 3   name        1055228 non-null  object 
dtypes: float64(1), object(3)
memory usage: 32.2+ MB


In [99]:
df_clasificaciones.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49022 entries, 0 to 49021
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Nombre Segmento  49022 non-null  object
 1   Código Producto  49022 non-null  int64 
 2   Nombre Producto  49022 non-null  object
dtypes: int64(1), object(2)
memory usage: 1.1+ MB


### Award_items y clasificaciones.

In [100]:
df_award_item_clasifi = df_awards_items.merge(df_clasificaciones, left_on="classification_id",right_on="Código Producto")
df_award_item_clasifi.drop(columns=[ "_link"], inplace=True)
df_award_item_clasifi.head()

Unnamed: 0,_link_awards,_link_main,id,quantity,unit_name,unit_value_amount,classification_id,unit_value_monetary_unit,Nombre Segmento,Código Producto,Nombre Producto
0,id-0.0.awards.0,id-0.0,36425198,1.0,Unidad,690000.0,60141012,CLP,"Instrumentos Musicales, Juegos, Juguetes, Arte...",60141012,Juguetes inflables
1,id-0.2.awards.0,id-0.2,36451829,1.0,Unidad,870000.0,30171501,CLP,"Componentes y Suministros para Estructuras, Ed...",30171501,Puertas de cristal
2,id-0.4.awards.0,id-0.4,36342031,1.0,Unidad,6000000.0,86141501,CLP,Servicios Educativos y de Formación,86141501,Servicios de asesorías educativas
3,id-0.5.awards.0,id-0.5,36460630,4.0,Unidad,4995.0,30102201,CLP,"Componentes y Suministros para Estructuras, Ed...",30102201,Placa de aleación ferrosa
4,id-0.5.awards.0,id-0.5,36460632,4.0,Kit,4538.0,49161604,CLP,"Equipos, Suministros y Accesorios para Deporte...",49161604,Pelotas de tenis


### Awards y Awards_items

In [101]:
df_award_and_items = df_awards.merge(df_award_item_clasifi, left_on="_link", right_on="_link_awards")
df_award_and_items.head()

Unnamed: 0,_link,_link_main_x,date,status,value_amount,value_monetary_unit,_link_awards,_link_main_y,id,quantity,unit_name,unit_value_amount,classification_id,unit_value_monetary_unit,Nombre Segmento,Código Producto,Nombre Producto
0,id-0.0.awards.0,id-0.0,2017-12-23T13:23:24Z,active,690000.0,CLP,id-0.0.awards.0,id-0.0,36425198,1.0,Unidad,690000.0,60141012,CLP,"Instrumentos Musicales, Juegos, Juguetes, Arte...",60141012,Juguetes inflables
1,id-0.2.awards.0,id-0.2,2018-02-02T11:16:54Z,active,870000.0,CLP,id-0.2.awards.0,id-0.2,36451829,1.0,Unidad,870000.0,30171501,CLP,"Componentes y Suministros para Estructuras, Ed...",30171501,Puertas de cristal
2,id-0.4.awards.0,id-0.4,2017-12-21T18:17:23Z,active,6000000.0,CLP,id-0.4.awards.0,id-0.4,36342031,1.0,Unidad,6000000.0,86141501,CLP,Servicios Educativos y de Formación,86141501,Servicios de asesorías educativas
3,id-0.5.awards.0,id-0.5,2018-01-16T10:56:54Z,active,347043.0,CLP,id-0.5.awards.0,id-0.5,36460630,4.0,Unidad,4995.0,30102201,CLP,"Componentes y Suministros para Estructuras, Ed...",30102201,Placa de aleación ferrosa
4,id-0.5.awards.0,id-0.5,2018-01-16T10:56:54Z,active,347043.0,CLP,id-0.5.awards.0,id-0.5,36460632,4.0,Kit,4538.0,49161604,CLP,"Equipos, Suministros y Accesorios para Deporte...",49161604,Pelotas de tenis


In [102]:
df_award_and_items.drop(columns=["_link_main_y", "_link_awards"],inplace=True)

In [103]:
df_igualdad = pd.DataFrame(df_award_and_items["value_monetary_unit"] == df_award_and_items["unit_value_monetary_unit"])
indice = df_igualdad.loc[df_igualdad[0] == False].index

In [104]:
df_award_and_items.iloc[indice]

Unnamed: 0,_link,_link_main_x,date,status,value_amount,value_monetary_unit,id,quantity,unit_name,unit_value_amount,classification_id,unit_value_monetary_unit,Nombre Segmento,Código Producto,Nombre Producto
196703,id-4.14990.awards.0,id-4.14990,,active,,,34331008,0.0,Unidad,0.0,83101505,CLP,Servicios Públicos y Servicios Relacionados co...,83101505,Servicios de asesoramiento de política hidráulica


In [105]:
df_award_and_items.drop(indice, inplace=True)

In [106]:
df_award_and_items.rename({"_link_main_x":"_link_main"},inplace=True)

In [107]:
df_award_and_items.columns

Index(['_link', '_link_main_x', 'date', 'status', 'value_amount',
       'value_monetary_unit', 'id', 'quantity', 'unit_name',
       'unit_value_amount', 'classification_id', 'unit_value_monetary_unit',
       'Nombre Segmento', 'Código Producto', 'Nombre Producto'],
      dtype='object')

### Awards y Awards_supplier

In [108]:
df_award_and_supplier = df_awards.merge(df_awards_supplier, left_on="_link", right_on="_link_awards")
df_award_and_supplier.drop(columns=["_link_awards", "_link_main_y", "_link_y"], inplace=True)
df_award_and_supplier.head()

Unnamed: 0,_link_x,_link_main_x,date,status,value_amount,value_monetary_unit,id,name
0,id-0.0.awards.0,id-0.0,2017-12-23T13:23:24Z,active,690000.0,CLP,836904,JOSÉ MIGUEL | JOSÉ MIGUEL
1,id-0.2.awards.0,id-0.2,2018-02-02T11:16:54Z,active,870000.0,CLP,200536,Todovidrios | Vidrieria las condes 7002
2,id-0.4.awards.0,id-0.4,2017-12-21T18:17:23Z,active,6000000.0,CLP,909666,Jorge Alejandro | Jorge Alejandro
3,id-0.5.awards.0,id-0.5,2018-01-16T10:56:54Z,active,347043.0,CLP,24062,INSTITUTO ORTOPEDICO Y DE REHABILITACION IOR L...
4,id-0.5.awards.0,id-0.5,2018-01-16T10:56:54Z,active,347043.0,CLP,69821,RICARDO RODRIGUEZ Y CIA. LTDA. | RICARDO RODRI...


In [109]:
df_award_and_supplier.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 258498 entries, 0 to 258497
Data columns (total 8 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   _link_x              258498 non-null  object 
 1   _link_main_x         258498 non-null  object 
 2   date                 256365 non-null  object 
 3   status               258498 non-null  object 
 4   value_amount         258495 non-null  float64
 5   value_monetary_unit  258495 non-null  object 
 6   id                   258498 non-null  int64  
 7   name                 258498 non-null  object 
dtypes: float64(1), int64(1), object(6)
memory usage: 15.8+ MB


#### Tender_items y Clasificaciones.

In [110]:
df_tender_item = df_tender_item.merge(df_clasificaciones, left_on="classification_id",right_on="Código Producto")
df_tender_item.drop(columns=["classification_scheme", "classification_id"], inplace=True)
df_tender_item.head()

Unnamed: 0,_link,_link_main,id,description,quantity,unit_name,Nombre Segmento,Código Producto,Nombre Producto
0,id-0.0.tender.items.0,id-0.0,36425198,"Instrumentos musicales, juegos, juguetes, arte...",1.0,Unidad,"Instrumentos Musicales, Juegos, Juguetes, Arte...",60141012,Juguetes inflables
1,id-0.1.tender.items.0,id-0.1,36442486,"Artículos para estructuras, obras y construcci...",1.0,Unidad,"Componentes y Suministros para Estructuras, Ed...",30103205,Rejilla de hierro
2,id-0.1.tender.items.1,id-0.1,36442487,"Artículos para estructuras, obras y construcci...",1.0,Unidad,"Componentes y Suministros para Estructuras, Ed...",30103205,Rejilla de hierro
3,id-0.1.tender.items.2,id-0.1,36442488,"Artículos para estructuras, obras y construcci...",1.0,Unidad,"Componentes y Suministros para Estructuras, Ed...",30103205,Rejilla de hierro
4,id-0.1.tender.items.3,id-0.1,36442489,"Artículos para estructuras, obras y construcci...",1.0,Unidad,"Componentes y Suministros para Estructuras, Ed...",30103205,Rejilla de hierro


### Awards y Tender_items_clasificaciones.

In [111]:
df_items_tender_awards = df_award_and_items.merge(df_tender_item, left_on=["_link_main_x","classification_id","unit_name"], right_on=["_link_main","Código Producto","unit_name"], how="left")

In [112]:
df_items_tender_awards.head()

Unnamed: 0,_link_x,_link_main_x,date,status,value_amount,value_monetary_unit,id_x,quantity_x,unit_name,unit_value_amount,...,Código Producto_x,Nombre Producto_x,_link_y,_link_main,id_y,description,quantity_y,Nombre Segmento_y,Código Producto_y,Nombre Producto_y
0,id-0.0.awards.0,id-0.0,2017-12-23T13:23:24Z,active,690000.0,CLP,36425198,1.0,Unidad,690000.0,...,60141012,Juguetes inflables,id-0.0.tender.items.0,id-0.0,36425198,"Instrumentos musicales, juegos, juguetes, arte...",1.0,"Instrumentos Musicales, Juegos, Juguetes, Arte...",60141012,Juguetes inflables
1,id-0.2.awards.0,id-0.2,2018-02-02T11:16:54Z,active,870000.0,CLP,36451829,1.0,Unidad,870000.0,...,30171501,Puertas de cristal,id-0.2.tender.items.0,id-0.2,36451829,"Artículos para estructuras, obras y construcci...",1.0,"Componentes y Suministros para Estructuras, Ed...",30171501,Puertas de cristal
2,id-0.4.awards.0,id-0.4,2017-12-21T18:17:23Z,active,6000000.0,CLP,36342031,1.0,Unidad,6000000.0,...,86141501,Servicios de asesorías educativas,id-0.4.tender.items.0,id-0.4,36342031,"Educación, formación, entrenamiento y capacita...",1.0,Servicios Educativos y de Formación,86141501,Servicios de asesorías educativas
3,id-0.5.awards.0,id-0.5,2018-01-16T10:56:54Z,active,347043.0,CLP,36460630,4.0,Unidad,4995.0,...,30102201,Placa de aleación ferrosa,id-0.5.tender.items.0,id-0.5,36460630,"Artículos para estructuras, obras y construcci...",5.0,"Componentes y Suministros para Estructuras, Ed...",30102201,Placa de aleación ferrosa
4,id-0.5.awards.0,id-0.5,2018-01-16T10:56:54Z,active,347043.0,CLP,36460632,4.0,Kit,4538.0,...,49161604,Pelotas de tenis,id-0.5.tender.items.2,id-0.5,36460632,"Equipos, suministros y accesorios deportivos y...",3.0,"Equipos, Suministros y Accesorios para Deporte...",49161604,Pelotas de tenis


In [113]:
df_items_tender_awards.columns

Index(['_link_x', '_link_main_x', 'date', 'status', 'value_amount',
       'value_monetary_unit', 'id_x', 'quantity_x', 'unit_name',
       'unit_value_amount', 'classification_id', 'unit_value_monetary_unit',
       'Nombre Segmento_x', 'Código Producto_x', 'Nombre Producto_x',
       '_link_y', '_link_main', 'id_y', 'description', 'quantity_y',
       'Nombre Segmento_y', 'Código Producto_y', 'Nombre Producto_y'],
      dtype='object')

In [114]:
df_items_tender_awards.drop(columns=['_link_y', '_link_main','quantity_x', 'classification_id', "id_x"], inplace=True)
df_items_tender_awards.rename(columns={'_link_x':"_link_awards", '_link_main_x':"_link_main", 'date':"date_award", 'status':"award_status",'quantity_x':"quantity", 'unit_name_x':"unit_name", "id_y":"award_id"}, inplace=True)
df_items_tender_awards.head()

Unnamed: 0,_link_awards,_link_main,date_award,award_status,value_amount,value_monetary_unit,unit_name,unit_value_amount,unit_value_monetary_unit,Nombre Segmento_x,Código Producto_x,Nombre Producto_x,award_id,description,quantity_y,Nombre Segmento_y,Código Producto_y,Nombre Producto_y
0,id-0.0.awards.0,id-0.0,2017-12-23T13:23:24Z,active,690000.0,CLP,Unidad,690000.0,CLP,"Instrumentos Musicales, Juegos, Juguetes, Arte...",60141012,Juguetes inflables,36425198,"Instrumentos musicales, juegos, juguetes, arte...",1.0,"Instrumentos Musicales, Juegos, Juguetes, Arte...",60141012,Juguetes inflables
1,id-0.2.awards.0,id-0.2,2018-02-02T11:16:54Z,active,870000.0,CLP,Unidad,870000.0,CLP,"Componentes y Suministros para Estructuras, Ed...",30171501,Puertas de cristal,36451829,"Artículos para estructuras, obras y construcci...",1.0,"Componentes y Suministros para Estructuras, Ed...",30171501,Puertas de cristal
2,id-0.4.awards.0,id-0.4,2017-12-21T18:17:23Z,active,6000000.0,CLP,Unidad,6000000.0,CLP,Servicios Educativos y de Formación,86141501,Servicios de asesorías educativas,36342031,"Educación, formación, entrenamiento y capacita...",1.0,Servicios Educativos y de Formación,86141501,Servicios de asesorías educativas
3,id-0.5.awards.0,id-0.5,2018-01-16T10:56:54Z,active,347043.0,CLP,Unidad,4995.0,CLP,"Componentes y Suministros para Estructuras, Ed...",30102201,Placa de aleación ferrosa,36460630,"Artículos para estructuras, obras y construcci...",5.0,"Componentes y Suministros para Estructuras, Ed...",30102201,Placa de aleación ferrosa
4,id-0.5.awards.0,id-0.5,2018-01-16T10:56:54Z,active,347043.0,CLP,Kit,4538.0,CLP,"Equipos, Suministros y Accesorios para Deporte...",49161604,Pelotas de tenis,36460632,"Equipos, suministros y accesorios deportivos y...",3.0,"Equipos, Suministros y Accesorios para Deporte...",49161604,Pelotas de tenis


### Parties y Parties_additional_identifiers.

In [115]:
df_parties_and_add = df_parties.merge(df_parties_add, left_on=["_link","id"], right_on=["_link_parties", "id"], how="left")
df_parties_and_add.head()

Unnamed: 0,_link_x,_link_main,id,name,address_region,identifier_id,address_countryName,rol 1,rol 2,_link_y,_link_parties,legalName
0,id-0.0.parties.0,id-0.0,836904,JOSÉ MIGUEL | JOSÉ MIGUEL,Región de Valparaíso,182792101,Chile,tenderer,supplier,id-0.0.parties.0.additionalIdentifiers.0,id-0.0.parties.0,JOSÉ MIGUEL REYES GACITÚA
1,id-0.0.parties.1,id-0.0,5464,Ilustre Municipalidad de Quintero | UNIDAD NORMAL,Región de Valparaíso,69060700K,Chile,procuringEntity,buyer,id-0.0.parties.1.additionalIdentifiers.0,id-0.0.parties.1,I MUNICIPALIDAD DE QUINTERO
2,id-0.1.parties.0,id-0.1,3401,Municipalidad de Los Angeles | MUNICIPALIDAD D...,Región del Biobío,691701018,Chile,procuringEntity,buyer,id-0.1.parties.0.additionalIdentifiers.0,id-0.1.parties.0,I MUNICIPALIDAD DE LOS ANGELES DEPTO DE
3,id-0.2.parties.0,id-0.2,200536,Todovidrios | Vidrieria las condes 7002,Región Metropolitana de Santiago,51259424,Chile,tenderer,supplier,id-0.2.parties.0.additionalIdentifiers.0,id-0.2.parties.0,JOSE MERCEDES RODRIGUEZ ECHEVERRIA
4,id-0.2.parties.1,id-0.2,731625,FERRETERIA COMERCIAL L Y J LIMITADA | COMERCI...,Región Metropolitana de Santiago,762581167,Chile,tenderer,,id-0.2.parties.1.additionalIdentifiers.0,id-0.2.parties.1,FERRETERIA COMERCIAL L&J LIMITADA


In [116]:
df_parties_and_add.drop(columns=["_link_y", "_link_parties"], inplace=True)
df_parties_and_add.head()

Unnamed: 0,_link_x,_link_main,id,name,address_region,identifier_id,address_countryName,rol 1,rol 2,legalName
0,id-0.0.parties.0,id-0.0,836904,JOSÉ MIGUEL | JOSÉ MIGUEL,Región de Valparaíso,182792101,Chile,tenderer,supplier,JOSÉ MIGUEL REYES GACITÚA
1,id-0.0.parties.1,id-0.0,5464,Ilustre Municipalidad de Quintero | UNIDAD NORMAL,Región de Valparaíso,69060700K,Chile,procuringEntity,buyer,I MUNICIPALIDAD DE QUINTERO
2,id-0.1.parties.0,id-0.1,3401,Municipalidad de Los Angeles | MUNICIPALIDAD D...,Región del Biobío,691701018,Chile,procuringEntity,buyer,I MUNICIPALIDAD DE LOS ANGELES DEPTO DE
3,id-0.2.parties.0,id-0.2,200536,Todovidrios | Vidrieria las condes 7002,Región Metropolitana de Santiago,51259424,Chile,tenderer,supplier,JOSE MERCEDES RODRIGUEZ ECHEVERRIA
4,id-0.2.parties.1,id-0.2,731625,FERRETERIA COMERCIAL L Y J LIMITADA | COMERCI...,Región Metropolitana de Santiago,762581167,Chile,tenderer,,FERRETERIA COMERCIAL L&J LIMITADA


In [117]:
df_parties_and_add.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1300059 entries, 0 to 1300058
Data columns (total 10 columns):
 #   Column               Non-Null Count    Dtype 
---  ------               --------------    ----- 
 0   _link_x              1300059 non-null  object
 1   _link_main           1300059 non-null  object
 2   id                   1300059 non-null  int64 
 3   name                 1300056 non-null  object
 4   address_region       1285117 non-null  object
 5   identifier_id        1299403 non-null  object
 6   address_countryName  1285137 non-null  object
 7   rol 1                1300059 non-null  object
 8   rol 2                503396 non-null   object
 9   legalName            1299395 non-null  object
dtypes: int64(1), object(9)
memory usage: 99.2+ MB


### Union de awards_supplier y parties.

In [118]:
df_supplier_additional_info = df_award_and_supplier.merge(df_parties_and_add, left_on=["_link_main_x", "name"], right_on=["_link_main", "name"])
df_supplier_additional_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 258472 entries, 0 to 258471
Data columns (total 17 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   _link_x_x            258472 non-null  object 
 1   _link_main_x         258472 non-null  object 
 2   date                 256339 non-null  object 
 3   status               258472 non-null  object 
 4   value_amount         258469 non-null  float64
 5   value_monetary_unit  258469 non-null  object 
 6   id_x                 258472 non-null  int64  
 7   name                 258472 non-null  object 
 8   _link_x_y            258472 non-null  object 
 9   _link_main           258472 non-null  object 
 10  id_y                 258472 non-null  int64  
 11  address_region       255119 non-null  object 
 12  identifier_id        258261 non-null  object 
 13  address_countryName  255124 non-null  object 
 14  rol 1                258472 non-null  object 
 15  rol 2            

In [119]:
df_supplier_additional_info.head()

Unnamed: 0,_link_x_x,_link_main_x,date,status,value_amount,value_monetary_unit,id_x,name,_link_x_y,_link_main,id_y,address_region,identifier_id,address_countryName,rol 1,rol 2,legalName
0,id-0.0.awards.0,id-0.0,2017-12-23T13:23:24Z,active,690000.0,CLP,836904,JOSÉ MIGUEL | JOSÉ MIGUEL,id-0.0.parties.0,id-0.0,836904,Región de Valparaíso,182792101,Chile,tenderer,supplier,JOSÉ MIGUEL REYES GACITÚA
1,id-0.2.awards.0,id-0.2,2018-02-02T11:16:54Z,active,870000.0,CLP,200536,Todovidrios | Vidrieria las condes 7002,id-0.2.parties.0,id-0.2,200536,Región Metropolitana de Santiago,51259424,Chile,tenderer,supplier,JOSE MERCEDES RODRIGUEZ ECHEVERRIA
2,id-0.4.awards.0,id-0.4,2017-12-21T18:17:23Z,active,6000000.0,CLP,909666,Jorge Alejandro | Jorge Alejandro,id-0.4.parties.0,id-0.4,909666,Región del Biobío,81079137,Chile,tenderer,supplier,Jorge Cifuentes Flores
3,id-0.5.awards.0,id-0.5,2018-01-16T10:56:54Z,active,347043.0,CLP,24062,INSTITUTO ORTOPEDICO Y DE REHABILITACION IOR L...,id-0.5.parties.0,id-0.5,24062,Región Metropolitana de Santiago,821225000,Chile,tenderer,supplier,INSTITUTO ORTOPEDICO Y DE REHABILITACION IOR LTDA
4,id-0.5.awards.0,id-0.5,2018-01-16T10:56:54Z,active,347043.0,CLP,69821,RICARDO RODRIGUEZ Y CIA. LTDA. | RICARDO RODRI...,id-0.5.parties.2,id-0.5,69821,Región Metropolitana de Santiago,89912300K,Chile,tenderer,supplier,INGENIERIA Y CONSTRUCCION RICARDO RODRIGUEZ Y ...


In [120]:
df_supplier_additional_info.drop(columns=["id_y", "_link_main", "_link_x_y"], inplace=True)
df_supplier_additional_info.rename(columns={"_link_main_x":"_link_main", "_link_x_x":"_link_awards", "id_x":"supplier_id"}, inplace=True)
df_supplier_additional_info.head()

Unnamed: 0,_link_awards,_link_main,date,status,value_amount,value_monetary_unit,supplier_id,name,address_region,identifier_id,address_countryName,rol 1,rol 2,legalName
0,id-0.0.awards.0,id-0.0,2017-12-23T13:23:24Z,active,690000.0,CLP,836904,JOSÉ MIGUEL | JOSÉ MIGUEL,Región de Valparaíso,182792101,Chile,tenderer,supplier,JOSÉ MIGUEL REYES GACITÚA
1,id-0.2.awards.0,id-0.2,2018-02-02T11:16:54Z,active,870000.0,CLP,200536,Todovidrios | Vidrieria las condes 7002,Región Metropolitana de Santiago,51259424,Chile,tenderer,supplier,JOSE MERCEDES RODRIGUEZ ECHEVERRIA
2,id-0.4.awards.0,id-0.4,2017-12-21T18:17:23Z,active,6000000.0,CLP,909666,Jorge Alejandro | Jorge Alejandro,Región del Biobío,81079137,Chile,tenderer,supplier,Jorge Cifuentes Flores
3,id-0.5.awards.0,id-0.5,2018-01-16T10:56:54Z,active,347043.0,CLP,24062,INSTITUTO ORTOPEDICO Y DE REHABILITACION IOR L...,Región Metropolitana de Santiago,821225000,Chile,tenderer,supplier,INSTITUTO ORTOPEDICO Y DE REHABILITACION IOR LTDA
4,id-0.5.awards.0,id-0.5,2018-01-16T10:56:54Z,active,347043.0,CLP,69821,RICARDO RODRIGUEZ Y CIA. LTDA. | RICARDO RODRI...,Región Metropolitana de Santiago,89912300K,Chile,tenderer,supplier,INGENIERIA Y CONSTRUCCION RICARDO RODRIGUEZ Y ...


## Unión de main con otros dataframes.

In [121]:
df_main_buyer_info = df_filtrado.merge(df_parties_and_add, right_on=["id","_link_main"], left_on=["buyer_id","_link"], how="left")
df_main_buyer_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120718 entries, 0 to 120717
Data columns (total 39 columns):
 #   Column                                       Non-Null Count   Dtype          
---  ------                                       --------------   -----          
 0   _link                                        120718 non-null  object         
 1   initiationType                               120718 non-null  object         
 2   tender_id                                    120718 non-null  object         
 3   tender_procurementMethodDetails              119575 non-null  category       
 4   tender_status                                120718 non-null  object         
 5   tender_procurementMethod                     120718 non-null  object         
 6   tender_hasEnquiries                          120718 non-null  bool           
 7   tender_awardPeriod_endDate                   120718 non-null  datetime64[ns] 
 8   tender_awardPeriod_startDate                 120718 no

In [122]:
df_main_buyer_info = df_main_buyer_info.merge(df_instituciones_compradoras, left_on="id", right_on="Codigo Unidad de Compra")

In [123]:
df_main_buyer_info.head()

Unnamed: 0,_link,initiationType,tender_id,tender_procurementMethodDetails,tender_status,tender_procurementMethod,tender_hasEnquiries,tender_awardPeriod_endDate,tender_awardPeriod_startDate,tender_awardPeriod_durationInDays,...,name,address_region,identifier_id,address_countryName,rol 1,rol 2,legalName,sector,Codigo Unidad de Compra,Unidad de Compra
0,id-0.601,tender,5060-572-L121,L1,complete,open,True,2021-12-02 17:07:00,2021-11-29 17:08:00,2 days 23:59:00,...,Municipalidad de Temuco | Municipalidad de Tem...,Región de la Araucanía,691907007,Chile,procuringEntity,buyer,I MUNICIPALIDAD DE TEMUCO,MUNICIPALIDADES,5973,MUNICIPALIDAD DE TEMUCO-EDUCACI�N
1,id-0.799,tender,1736-353-LE21,LE,complete,open,True,2021-12-23 16:14:00,2021-11-22 16:14:00,31 days 00:00:00,...,Ilustre Municipalidad de El Bosque | Ilustre M...,Región Metropolitana de Santiago,692553004,Chile,procuringEntity,buyer,I MUNICIPALIDAD DE LA COMUNA DE EL BOSQUE,MUNICIPALIDADES,2732,I MUNICIPALIDAD DE LA COMUNA DE EL BOSQUE
2,id-0.1821,tender,2436-318-L121,L1,complete,open,False,2021-11-11 17:00:00,2021-10-27 16:01:00,15 days 00:59:00,...,I.Municipalidad de Curicó | IMUNI_CURICOADQUIS...,Región del Maule,691001008,Chile,procuringEntity,buyer,I MUNICIPALIDAD DE CURICO,MUNICIPALIDADES,3428,IMUNI_CURICOADQUISICIONES
3,id-0.3575,tender,2409-286-L121,L1,active,open,False,2021-09-21 16:15:00,2021-08-23 15:01:00,29 days 01:14:00,...,Municipalidad de Los Angeles | MUNICIPALIDAD D...,Región del Biobío,691701018,Chile,procuringEntity,buyer,I MUNICIPALIDAD DE LOS ANGELES DEPTO DE,MUNICIPALIDADES,3401,MUNICIPALIDAD DE LOS ANGELES - EDUCACION
4,id-0.3894,tender,2310-138-L121,L1,complete,open,False,2021-06-10 22:34:00,2021-06-09 22:34:00,1 days 00:00:00,...,ILUSTRE MUNICIPALIDAD DE MOLINA | Dirección C...,Región del Maule,691101002,Chile,procuringEntity,buyer,IMunicipalidad de Molina,MUNICIPALIDADES,3304,DIRECCI�N COMUNAL DE EDUCACI�N


In [124]:
df_main_buyer_info.drop(columns=["buyer_id", "tender_procuringEntity_name", "_link_main","rol 1" ,"rol 2","_link_x", "address_countryName","_link_x", "_link_main"], inplace=True)
df_main_buyer_info.rename(columns={"address_region":"buyer_region", "name":"buyer_name", "id":"buyer_id", "legalName":"buyer_legalName", "tender_procurementMethodDetails":"procurement_details", "tender_procurementMethod":"procurementMethod", "address_countryName":"buyer_country"},inplace=True)
df_main_buyer_info.head()

Unnamed: 0,_link,initiationType,tender_id,procurement_details,tender_status,procurementMethod,tender_hasEnquiries,tender_awardPeriod_endDate,tender_awardPeriod_startDate,tender_awardPeriod_durationInDays,...,tender_contractPeriod_durationInDays,estimated_cost_monetary_unit,buyer_id,buyer_name,buyer_region,identifier_id,buyer_legalName,sector,Codigo Unidad de Compra,Unidad de Compra
0,id-0.601,tender,5060-572-L121,L1,complete,open,True,2021-12-02 17:07:00,2021-11-29 17:08:00,2 days 23:59:00,...,,CLP,5973.0,Municipalidad de Temuco | Municipalidad de Tem...,Región de la Araucanía,691907007,I MUNICIPALIDAD DE TEMUCO,MUNICIPALIDADES,5973,MUNICIPALIDAD DE TEMUCO-EDUCACI�N
1,id-0.799,tender,1736-353-LE21,LE,complete,open,True,2021-12-23 16:14:00,2021-11-22 16:14:00,31 days 00:00:00,...,,,2732.0,Ilustre Municipalidad de El Bosque | Ilustre M...,Región Metropolitana de Santiago,692553004,I MUNICIPALIDAD DE LA COMUNA DE EL BOSQUE,MUNICIPALIDADES,2732,I MUNICIPALIDAD DE LA COMUNA DE EL BOSQUE
2,id-0.1821,tender,2436-318-L121,L1,complete,open,False,2021-11-11 17:00:00,2021-10-27 16:01:00,15 days 00:59:00,...,,,3428.0,I.Municipalidad de Curicó | IMUNI_CURICOADQUIS...,Región del Maule,691001008,I MUNICIPALIDAD DE CURICO,MUNICIPALIDADES,3428,IMUNI_CURICOADQUISICIONES
3,id-0.3575,tender,2409-286-L121,L1,active,open,False,2021-09-21 16:15:00,2021-08-23 15:01:00,29 days 01:14:00,...,,CLP,3401.0,Municipalidad de Los Angeles | MUNICIPALIDAD D...,Región del Biobío,691701018,I MUNICIPALIDAD DE LOS ANGELES DEPTO DE,MUNICIPALIDADES,3401,MUNICIPALIDAD DE LOS ANGELES - EDUCACION
4,id-0.3894,tender,2310-138-L121,L1,complete,open,False,2021-06-10 22:34:00,2021-06-09 22:34:00,1 days 00:00:00,...,,,3304.0,ILUSTRE MUNICIPALIDAD DE MOLINA | Dirección C...,Región del Maule,691101002,IMunicipalidad de Molina,MUNICIPALIDADES,3304,DIRECCI�N COMUNAL DE EDUCACI�N


####  Merge de df_main_buyer con df_supplier_additional_info y df_awards

In [125]:
df_main_buyer_suplier = df_main_buyer_info.merge(df_supplier_additional_info, left_on="_link", right_on="_link_main", suffixes=("main_", "_supplier"), how="left")
df_main_buyer_suplier.head()

Unnamed: 0,_link,initiationType,tender_id,procurement_details,tender_status,procurementMethod,tender_hasEnquiries,tender_awardPeriod_endDate,tender_awardPeriod_startDate,tender_awardPeriod_durationInDays,...,value_amount,value_monetary_unit,supplier_id,name,address_region,identifier_id_supplier,address_countryName,rol 1,rol 2,legalName
0,id-0.601,tender,5060-572-L121,L1,complete,open,True,2021-12-02 17:07:00,2021-11-29 17:08:00,2 days 23:59:00,...,,,,,,,,,,
1,id-0.799,tender,1736-353-LE21,LE,complete,open,True,2021-12-23 16:14:00,2021-11-22 16:14:00,31 days 00:00:00,...,804000.0,CLP,761050.0,GRYLY SABINA | GRYLY SABINA,Región Metropolitana de Santiago,129800429,Chile,tenderer,supplier,GRYLY SABINA MONTERO BELTRAN
2,id-0.799,tender,1736-353-LE21,LE,complete,open,True,2021-12-23 16:14:00,2021-11-22 16:14:00,31 days 00:00:00,...,804000.0,CLP,99621.0,Trema | Trema,Región Metropolitana de Santiago,761288407,Chile,tenderer,supplier,TREMA DENTAL LIMITADA
3,id-0.1821,tender,2436-318-L121,L1,complete,open,False,2021-11-11 17:00:00,2021-10-27 16:01:00,15 days 00:59:00,...,1232000.0,CLP,814318.0,Cerro Name | Cerro Name,Región del Maule,76320572K,Chile,tenderer,supplier,SERVICIOS CERRO NAME LIMITADA
4,id-0.3575,tender,2409-286-L121,L1,active,open,False,2021-09-21 16:15:00,2021-08-23 15:01:00,29 days 01:14:00,...,,,,,,,,,,


In [126]:
df_main_buyer_suplier.columns

Index(['_link', 'initiationType', 'tender_id', 'procurement_details',
       'tender_status', 'procurementMethod', 'tender_hasEnquiries',
       'tender_awardPeriod_endDate', 'tender_awardPeriod_startDate',
       'tender_awardPeriod_durationInDays', 'tender_tenderPeriod_endDate',
       'tender_tenderPeriod_startDate', 'tender_tenderPeriod_durationInDays',
       'tender_enquiryPeriod_endDate', 'tender_enquiryPeriod_startDate',
       'tender_enquiryPeriod_durationInDays', 'estimated_cost',
       'planning_budget_id', 'planning_budget_description',
       'planning_budget_amount_amount', 'planning_budget_amount_currency',
       'tender_techniques_hasFrameworkAgreement',
       'tender_techniques_frameworkAgreement_method',
       'tender_contractPeriod_endDate', 'tender_contractPeriod_startDate',
       'tender_contractPeriod_durationInDays', 'estimated_cost_monetary_unit',
       'buyer_id', 'buyer_name', 'buyer_region', 'identifier_idmain_',
       'buyer_legalName', 'sector', 'Co

In [127]:
df_main_buyer_suplier[['estimated_cost_monetary_unit', 'buyer_id', 'buyer_name','buyer_region', 'buyer_legalName', '_link_awards', '_link_main', 'date','status', 'value_amount', 'value_monetary_unit', 'supplier_id', 'name','address_region', 'address_countryName', 'rol 1', 'rol 2', 'legalName']].head()

Unnamed: 0,estimated_cost_monetary_unit,buyer_id,buyer_name,buyer_region,buyer_legalName,_link_awards,_link_main,date,status,value_amount,value_monetary_unit,supplier_id,name,address_region,address_countryName,rol 1,rol 2,legalName
0,CLP,5973.0,Municipalidad de Temuco | Municipalidad de Tem...,Región de la Araucanía,I MUNICIPALIDAD DE TEMUCO,,,,,,,,,,,,,
1,,2732.0,Ilustre Municipalidad de El Bosque | Ilustre M...,Región Metropolitana de Santiago,I MUNICIPALIDAD DE LA COMUNA DE EL BOSQUE,id-0.799.awards.0,id-0.799,2017-10-31T18:30:57Z,active,804000.0,CLP,761050.0,GRYLY SABINA | GRYLY SABINA,Región Metropolitana de Santiago,Chile,tenderer,supplier,GRYLY SABINA MONTERO BELTRAN
2,,2732.0,Ilustre Municipalidad de El Bosque | Ilustre M...,Región Metropolitana de Santiago,I MUNICIPALIDAD DE LA COMUNA DE EL BOSQUE,id-0.799.awards.0,id-0.799,2017-10-31T18:30:57Z,active,804000.0,CLP,99621.0,Trema | Trema,Región Metropolitana de Santiago,Chile,tenderer,supplier,TREMA DENTAL LIMITADA
3,,3428.0,I.Municipalidad de Curicó | IMUNI_CURICOADQUIS...,Región del Maule,I MUNICIPALIDAD DE CURICO,id-0.1821.awards.0,id-0.1821,2017-09-01T09:40:50Z,active,1232000.0,CLP,814318.0,Cerro Name | Cerro Name,Región del Maule,Chile,tenderer,supplier,SERVICIOS CERRO NAME LIMITADA
4,CLP,3401.0,Municipalidad de Los Angeles | MUNICIPALIDAD D...,Región del Biobío,I MUNICIPALIDAD DE LOS ANGELES DEPTO DE,,,,,,,,,,,,,


In [128]:
df_main_buyer_suplier.rename(columns={'date':"date_award",'status':"award_status", 'value_amount':"tender_value", 'value_monetary_unit':"tender_monetary_unit", 'name':"supplier_name",'address_region':"supplier_region", 'address_countryName':"supplier_country", 'legalName':"supplier_legal_name"}, inplace=True)
df_main_buyer_suplier.head()

Unnamed: 0,_link,initiationType,tender_id,procurement_details,tender_status,procurementMethod,tender_hasEnquiries,tender_awardPeriod_endDate,tender_awardPeriod_startDate,tender_awardPeriod_durationInDays,...,tender_value,tender_monetary_unit,supplier_id,supplier_name,supplier_region,identifier_id_supplier,supplier_country,rol 1,rol 2,supplier_legal_name
0,id-0.601,tender,5060-572-L121,L1,complete,open,True,2021-12-02 17:07:00,2021-11-29 17:08:00,2 days 23:59:00,...,,,,,,,,,,
1,id-0.799,tender,1736-353-LE21,LE,complete,open,True,2021-12-23 16:14:00,2021-11-22 16:14:00,31 days 00:00:00,...,804000.0,CLP,761050.0,GRYLY SABINA | GRYLY SABINA,Región Metropolitana de Santiago,129800429,Chile,tenderer,supplier,GRYLY SABINA MONTERO BELTRAN
2,id-0.799,tender,1736-353-LE21,LE,complete,open,True,2021-12-23 16:14:00,2021-11-22 16:14:00,31 days 00:00:00,...,804000.0,CLP,99621.0,Trema | Trema,Región Metropolitana de Santiago,761288407,Chile,tenderer,supplier,TREMA DENTAL LIMITADA
3,id-0.1821,tender,2436-318-L121,L1,complete,open,False,2021-11-11 17:00:00,2021-10-27 16:01:00,15 days 00:59:00,...,1232000.0,CLP,814318.0,Cerro Name | Cerro Name,Región del Maule,76320572K,Chile,tenderer,supplier,SERVICIOS CERRO NAME LIMITADA
4,id-0.3575,tender,2409-286-L121,L1,active,open,False,2021-09-21 16:15:00,2021-08-23 15:01:00,29 days 01:14:00,...,,,,,,,,,,


In [129]:
df_main_buyer_suplier.drop(columns=['rol 1', 'rol 2', '_link_awards', '_link_main'], inplace=True)

In [130]:
df_main_buyer_suplier.head()

Unnamed: 0,_link,initiationType,tender_id,procurement_details,tender_status,procurementMethod,tender_hasEnquiries,tender_awardPeriod_endDate,tender_awardPeriod_startDate,tender_awardPeriod_durationInDays,...,date_award,award_status,tender_value,tender_monetary_unit,supplier_id,supplier_name,supplier_region,identifier_id_supplier,supplier_country,supplier_legal_name
0,id-0.601,tender,5060-572-L121,L1,complete,open,True,2021-12-02 17:07:00,2021-11-29 17:08:00,2 days 23:59:00,...,,,,,,,,,,
1,id-0.799,tender,1736-353-LE21,LE,complete,open,True,2021-12-23 16:14:00,2021-11-22 16:14:00,31 days 00:00:00,...,2017-10-31T18:30:57Z,active,804000.0,CLP,761050.0,GRYLY SABINA | GRYLY SABINA,Región Metropolitana de Santiago,129800429,Chile,GRYLY SABINA MONTERO BELTRAN
2,id-0.799,tender,1736-353-LE21,LE,complete,open,True,2021-12-23 16:14:00,2021-11-22 16:14:00,31 days 00:00:00,...,2017-10-31T18:30:57Z,active,804000.0,CLP,99621.0,Trema | Trema,Región Metropolitana de Santiago,761288407,Chile,TREMA DENTAL LIMITADA
3,id-0.1821,tender,2436-318-L121,L1,complete,open,False,2021-11-11 17:00:00,2021-10-27 16:01:00,15 days 00:59:00,...,2017-09-01T09:40:50Z,active,1232000.0,CLP,814318.0,Cerro Name | Cerro Name,Región del Maule,76320572K,Chile,SERVICIOS CERRO NAME LIMITADA
4,id-0.3575,tender,2409-286-L121,L1,active,open,False,2021-09-21 16:15:00,2021-08-23 15:01:00,29 days 01:14:00,...,,,,,,,,,,


In [131]:
df_main_award = df_main_buyer_info.merge(df_tender_item, left_on="_link", right_on="_link_main", how="left")
df_main_award.head()

Unnamed: 0,_link_x,initiationType,tender_id,procurement_details,tender_status,procurementMethod,tender_hasEnquiries,tender_awardPeriod_endDate,tender_awardPeriod_startDate,tender_awardPeriod_durationInDays,...,Unidad de Compra,_link_y,_link_main,id,description,quantity,unit_name,Nombre Segmento,Código Producto,Nombre Producto
0,id-0.601,tender,5060-572-L121,L1,complete,open,True,2021-12-02 17:07:00,2021-11-29 17:08:00,2 days 23:59:00,...,MUNICIPALIDAD DE TEMUCO-EDUCACI�N,,,,,,,,,
1,id-0.799,tender,1736-353-LE21,LE,complete,open,True,2021-12-23 16:14:00,2021-11-22 16:14:00,31 days 00:00:00,...,I MUNICIPALIDAD DE LA COMUNA DE EL BOSQUE,id-0.799.tender.items.0,id-0.799,36260703.0,Equipamiento y suministros médicos / Equipos y...,1200.0,Unidad,"Equipo Médico, Accesorios y Suministros",42151611.0,Cepillos operativos dentales
2,id-0.799,tender,1736-353-LE21,LE,complete,open,True,2021-12-23 16:14:00,2021-11-22 16:14:00,31 days 00:00:00,...,I MUNICIPALIDAD DE LA COMUNA DE EL BOSQUE,id-0.799.tender.items.1,id-0.799,36260708.0,Equipamiento y suministros médicos / Equipos y...,600.0,Unidad,"Equipo Médico, Accesorios y Suministros",42152406.0,Pastas abrasivas de uso odontológico
3,id-0.1821,tender,2436-318-L121,L1,complete,open,False,2021-11-11 17:00:00,2021-10-27 16:01:00,15 days 00:59:00,...,IMUNI_CURICOADQUISICIONES,id-0.1821.tender.items.0,id-0.1821,36126674.0,"Combustibles, lubricantes y anticorrosivos / C...",1600.0,Litro,"Materiales Combustibles, Aditivos para Combust...",15101507.0,Benceno
4,id-0.3575,tender,2409-286-L121,L1,active,open,False,2021-09-21 16:15:00,2021-08-23 15:01:00,29 days 01:14:00,...,MUNICIPALIDAD DE LOS ANGELES - EDUCACION,id-0.3575.tender.items.0,id-0.3575,35775593.0,"Artículos para estructuras, obras y construcci...",1.0,Unidad,"Componentes y Suministros para Estructuras, Ed...",30161702.0,Pisos de madera


### main_award y tenderers.

In [132]:
df_tenderers = df_tender_tenderers.merge(df_parties_and_add, left_on=["_link_main","id"], right_on=["_link_main", "id"], how="left")
df_tenderers.head()

Unnamed: 0,_link,_link_main,id,name_x,_link_x,name_y,address_region,identifier_id,address_countryName,rol 1,rol 2,legalName
0,id-0.0.tender.tenderers.0,id-0.0,836904.0,JOSÉ MIGUEL | JOSÉ MIGUEL,id-0.0.parties.0,JOSÉ MIGUEL | JOSÉ MIGUEL,Región de Valparaíso,182792101,Chile,tenderer,supplier,JOSÉ MIGUEL REYES GACITÚA
1,id-0.2.tender.tenderers.0,id-0.2,17336.0,Inmobiliaria y Servicios | Inmobiliaria y Serv...,id-0.2.parties.2,Inmobiliaria y Servicios | Inmobiliaria y Serv...,Región Metropolitana de Santiago,760043508,Chile,tenderer,,INMOBILIARIA Y SERVICIOS ORELLANA Y MARTINEZ L...
2,id-0.2.tender.tenderers.1,id-0.2,200536.0,Todovidrios | Vidrieria las condes 7002,id-0.2.parties.0,Todovidrios | Vidrieria las condes 7002,Región Metropolitana de Santiago,51259424,Chile,tenderer,supplier,JOSE MERCEDES RODRIGUEZ ECHEVERRIA
3,id-0.2.tender.tenderers.2,id-0.2,731625.0,FERRETERIA COMERCIAL L Y J LIMITADA | COMERCI...,id-0.2.parties.1,FERRETERIA COMERCIAL L Y J LIMITADA | COMERCI...,Región Metropolitana de Santiago,762581167,Chile,tenderer,,FERRETERIA COMERCIAL L&J LIMITADA
4,id-0.2.tender.tenderers.3,id-0.2,971211.0,ROBERT FRANK | ROBERT FRANK,id-0.2.parties.3,ROBERT FRANK | ROBERT FRANK,Región Metropolitana de Santiago,767366418,Chile,tenderer,,AUTOMATICE MOTORS LTDA..


In [133]:
df_main_award_itemless = df_main_buyer_info.merge(df_awards, left_on="_link", right_on="_link_main")

In [134]:
df_main_award_itemless.head()

Unnamed: 0,_link_x,initiationType,tender_id,procurement_details,tender_status,procurementMethod,tender_hasEnquiries,tender_awardPeriod_endDate,tender_awardPeriod_startDate,tender_awardPeriod_durationInDays,...,buyer_legalName,sector,Codigo Unidad de Compra,Unidad de Compra,_link_y,_link_main,date,status,value_amount,value_monetary_unit
0,id-0.601,tender,5060-572-L121,L1,complete,open,True,2021-12-02 17:07:00,2021-11-29 17:08:00,2 days 23:59:00,...,I MUNICIPALIDAD DE TEMUCO,MUNICIPALIDADES,5973,MUNICIPALIDAD DE TEMUCO-EDUCACI�N,id-0.601.awards.0,id-0.601,,cancelled,,
1,id-0.799,tender,1736-353-LE21,LE,complete,open,True,2021-12-23 16:14:00,2021-11-22 16:14:00,31 days 00:00:00,...,I MUNICIPALIDAD DE LA COMUNA DE EL BOSQUE,MUNICIPALIDADES,2732,I MUNICIPALIDAD DE LA COMUNA DE EL BOSQUE,id-0.799.awards.0,id-0.799,2017-10-31T18:30:57Z,active,804000.0,CLP
2,id-0.1821,tender,2436-318-L121,L1,complete,open,False,2021-11-11 17:00:00,2021-10-27 16:01:00,15 days 00:59:00,...,I MUNICIPALIDAD DE CURICO,MUNICIPALIDADES,3428,IMUNI_CURICOADQUISICIONES,id-0.1821.awards.0,id-0.1821,2017-09-01T09:40:50Z,active,1232000.0,CLP
3,id-0.3575,tender,2409-286-L121,L1,active,open,False,2021-09-21 16:15:00,2021-08-23 15:01:00,29 days 01:14:00,...,I MUNICIPALIDAD DE LOS ANGELES DEPTO DE,MUNICIPALIDADES,3401,MUNICIPALIDAD DE LOS ANGELES - EDUCACION,id-0.3575.awards.0,id-0.3575,2017-05-29T17:23:48Z,unsuccessful,,
4,id-0.3894,tender,2310-138-L121,L1,complete,open,False,2021-06-10 22:34:00,2021-06-09 22:34:00,1 days 00:00:00,...,IMunicipalidad de Molina,MUNICIPALIDADES,3304,DIRECCI�N COMUNAL DE EDUCACI�N,id-0.3894.awards.0,id-0.3894,2017-05-31T13:50:39Z,unsuccessful,,


In [135]:
df_main_award_itemless.drop(columns=["_link_y", "_link_main"], inplace=True)

In [136]:
df_main_tenderers = df_main_buyer_info.merge(df_tenderers, left_on="_link", right_on = "_link_main", how="left")

In [137]:
df_main_tenderers.head()

Unnamed: 0,_link_x,initiationType,tender_id,procurement_details,tender_status,procurementMethod,tender_hasEnquiries,tender_awardPeriod_endDate,tender_awardPeriod_startDate,tender_awardPeriod_durationInDays,...,id,name_x,_link_x.1,name_y,address_region,identifier_id_y,address_countryName,rol 1,rol 2,legalName
0,id-0.601,tender,5060-572-L121,L1,complete,open,True,2021-12-02 17:07:00,2021-11-29 17:08:00,2 days 23:59:00,...,,,,,,,,,,
1,id-0.799,tender,1736-353-LE21,LE,complete,open,True,2021-12-23 16:14:00,2021-11-22 16:14:00,31 days 00:00:00,...,116253.0,Expro Dental (SCL) | Expro Dental (SCL),id-0.799.parties.13,Expro Dental (SCL) | Expro Dental (SCL),Región Metropolitana de Santiago,995744600.0,Chile,tenderer,,EXPRO SPA
2,id-0.799,tender,1736-353-LE21,LE,complete,open,True,2021-12-23 16:14:00,2021-11-22 16:14:00,31 days 00:00:00,...,142341.0,Awad Artículos Médicos | Awad Artículos Médicos,id-0.799.parties.3,Awad Artículos Médicos | Awad Artículos Médicos,Región Metropolitana de Santiago,53987877.0,Chile,tenderer,,EDUARDO AWAD MANZUR
3,id-0.799,tender,1736-353-LE21,LE,complete,open,True,2021-12-23 16:14:00,2021-11-22 16:14:00,31 days 00:00:00,...,160694.0,MAYORDENT | MAYORDENT LTDA,id-0.799.parties.16,MAYORDENT | MAYORDENT LTDA,Región del Biobío,762713608.0,Chile,tenderer,,MAYORDENT DENTAL LIMITADA
4,id-0.799,tender,1736-353-LE21,LE,complete,open,True,2021-12-23 16:14:00,2021-11-22 16:14:00,31 days 00:00:00,...,24656.0,BUHOS S.C.I. LTDA. | BUHOS S.C.I. LTDA.,id-0.799.parties.5,BUHOS S.C.I. LTDA. | BUHOS S.C.I. LTDA.,Región Metropolitana de Santiago,854627007.0,Chile,tenderer,,BUHOS SOC COMERCIAL INDUSTRIAL LTDA


In [138]:
df_main_tenderers.columns

Index(['_link_x', 'initiationType', 'tender_id', 'procurement_details',
       'tender_status', 'procurementMethod', 'tender_hasEnquiries',
       'tender_awardPeriod_endDate', 'tender_awardPeriod_startDate',
       'tender_awardPeriod_durationInDays', 'tender_tenderPeriod_endDate',
       'tender_tenderPeriod_startDate', 'tender_tenderPeriod_durationInDays',
       'tender_enquiryPeriod_endDate', 'tender_enquiryPeriod_startDate',
       'tender_enquiryPeriod_durationInDays', 'estimated_cost',
       'planning_budget_id', 'planning_budget_description',
       'planning_budget_amount_amount', 'planning_budget_amount_currency',
       'tender_techniques_hasFrameworkAgreement',
       'tender_techniques_frameworkAgreement_method',
       'tender_contractPeriod_endDate', 'tender_contractPeriod_startDate',
       'tender_contractPeriod_durationInDays', 'estimated_cost_monetary_unit',
       'buyer_id', 'buyer_name', 'buyer_region', 'identifier_id_x',
       'buyer_legalName', 'sector', 'Cod

In [139]:
df_main_tenderers["Has_tender"] = df_main_tenderers.index.map(lambda x: 1 if df_main_tenderers.loc[x]["rol 1"] == "tenderer" else 0)
df_main_tenderers["is_supplier"] = df_main_tenderers.index.map(lambda x: 1 if df_main_tenderers.loc[x]["rol 2"] == "supplier" else 0)


In [140]:
df_main_tenderers["identifier_id_y"]

0            NaN
1      995744600
2       53987877
3      762713608
4      854627007
         ...    
556    761903454
557    100363631
558    96636310K
559    763113566
560    765263654
Name: identifier_id_y, Length: 561, dtype: object

In [141]:
df_main_tenderers["identifier_id_x"]

0      691907007
1      692553004
2      692553004
3      692553004
4      692553004
         ...    
556    690610000
557    691701018
558    616060007
559    616060007
560    616060007
Name: identifier_id_x, Length: 561, dtype: object

In [142]:
df_main_tenderers.drop(columns=["name_x"],inplace=True)

In [143]:
df_main_tenderers.rename(columns={"_link_x":"_link", "name_y":"tender_name", "address_region":"tenderer_region", "address_countryName":"tenderer_country",
                                  "legalName":"tenderer_legalName", "identifier_id_y":"tenderer_identifier", "identifier_id_x":"buyer_identifier"},inplace=True)
df_main_tenderers.head()

Unnamed: 0,_link,initiationType,tender_id,procurement_details,tender_status,procurementMethod,tender_hasEnquiries,tender_awardPeriod_endDate,tender_awardPeriod_startDate,tender_awardPeriod_durationInDays,...,_link.1,tender_name,tenderer_region,tenderer_identifier,tenderer_country,rol 1,rol 2,tenderer_legalName,Has_tender,is_supplier
0,id-0.601,tender,5060-572-L121,L1,complete,open,True,2021-12-02 17:07:00,2021-11-29 17:08:00,2 days 23:59:00,...,,,,,,,,,0,0
1,id-0.799,tender,1736-353-LE21,LE,complete,open,True,2021-12-23 16:14:00,2021-11-22 16:14:00,31 days 00:00:00,...,id-0.799.parties.13,Expro Dental (SCL) | Expro Dental (SCL),Región Metropolitana de Santiago,995744600.0,Chile,tenderer,,EXPRO SPA,1,0
2,id-0.799,tender,1736-353-LE21,LE,complete,open,True,2021-12-23 16:14:00,2021-11-22 16:14:00,31 days 00:00:00,...,id-0.799.parties.3,Awad Artículos Médicos | Awad Artículos Médicos,Región Metropolitana de Santiago,53987877.0,Chile,tenderer,,EDUARDO AWAD MANZUR,1,0
3,id-0.799,tender,1736-353-LE21,LE,complete,open,True,2021-12-23 16:14:00,2021-11-22 16:14:00,31 days 00:00:00,...,id-0.799.parties.16,MAYORDENT | MAYORDENT LTDA,Región del Biobío,762713608.0,Chile,tenderer,,MAYORDENT DENTAL LIMITADA,1,0
4,id-0.799,tender,1736-353-LE21,LE,complete,open,True,2021-12-23 16:14:00,2021-11-22 16:14:00,31 days 00:00:00,...,id-0.799.parties.5,BUHOS S.C.I. LTDA. | BUHOS S.C.I. LTDA.,Región Metropolitana de Santiago,854627007.0,Chile,tenderer,,BUHOS SOC COMERCIAL INDUSTRIAL LTDA,1,0


In [144]:
df_main.columns

Index(['_link', 'initiationType', 'tender_id',
       'tender_procurementMethodDetails', 'tender_status',
       'tender_procurementMethod', 'tender_hasEnquiries',
       'tender_awardPeriod_endDate', 'tender_awardPeriod_startDate',
       'tender_awardPeriod_durationInDays', 'tender_tenderPeriod_endDate',
       'tender_tenderPeriod_startDate', 'tender_tenderPeriod_durationInDays',
       'tender_enquiryPeriod_endDate', 'tender_enquiryPeriod_startDate',
       'tender_enquiryPeriod_durationInDays', 'tender_procuringEntity_name',
       'tender_value_amount', 'tender_value_unitOfAccount',
       'tender_value_currency', 'buyer_id', 'planning_budget_id',
       'planning_budget_description', 'planning_budget_amount_amount',
       'planning_budget_amount_currency',
       'tender_techniques_hasFrameworkAgreement',
       'tender_techniques_frameworkAgreement_method',
       'tender_contractPeriod_endDate', 'tender_contractPeriod_startDate',
       'tender_contractPeriod_durationInDays']

In [145]:
df_main = df_main[['_link', 'initiationType', 'buyer_id', 'tender_id',
       'tender_procurementMethodDetails', 'tender_status',
       'tender_procurementMethod', 'tender_hasEnquiries',
       'tender_awardPeriod_endDate', 'tender_awardPeriod_startDate',
       'tender_awardPeriod_durationInDays', 'tender_tenderPeriod_endDate',
       'tender_tenderPeriod_startDate', 'tender_tenderPeriod_durationInDays',
       'tender_enquiryPeriod_endDate', 'tender_enquiryPeriod_startDate',
       'tender_enquiryPeriod_durationInDays', 'tender_procuringEntity_name',
       'tender_value_amount', 'tender_value_currency',
       'tender_value_unitOfAccount']]

# Datasets del 2021
Aquí creamos con los siguientes comandos (puestos en comentario por si acaso) los datasets de awards y tenders de 2021

In [None]:
#df_main_award.to_excel('awards.xlsx', index=False)

In [None]:
#df_tenderers.to_csv('tenderers_2021.csv', encoding='utf-8', sep=',')