Installations and imports

In [3]:
import numpy as np
import pandas as pd
import os
import itertools

### Read the raw datasets

In [20]:
# The main directory to find the raw datasets
base_dir = '../dataset/raw'

This raw dataset is 'affiliate_practice' ($ap$). It shows which medical practices has been taken by a certain affiliate.

In [21]:
target_filename = 'cons_pract_medicas.csv'

for dirname, _, filenames in os.walk(base_dir):
    if target_filename in filenames:
        filepath = os.path.join(dirname, target_filename)
        print(f"File founded: {filepath}")
        ap = pd.read_csv(filepath)
        break  

File founded: ../dataset/raw\cons_pract_medicas.csv


This raw dataset is 'diabetes_practices' ($dp$). These are the practices of interest for the analysis.

In [22]:
target_filename = 'practicas_diabetes.csv'

for dirname, _, filenames in os.walk(base_dir):
    if target_filename in filenames:
        filepath = os.path.join(dirname, target_filename)
        print(f"File founded: {filepath}")
        dp = pd.read_csv(filepath)
        break  

# define a list of the practices ids of this dataset
dp_ids = list(dp.get('id_practica'))
n_dp_ids = len(dp_ids)
print('THE NUMBER OF PRACTICES OF INTEREST TO EVALUATE IS',n_dp_ids)

File founded: ../dataset/raw\practicas_diabetes.csv
THE NUMBER OF PRACTICES OF INTEREST TO EVALUATE IS 17


### Plot raw datasets

In [23]:
ap

Unnamed: 0,id_afiliado,id_practica,nombre_practica,fecha,dia,mes,anio
0,6d0e46f26269df8d87636480fd023c9d,12.42.01.01,CONSULTA MEDICA SIN ESPECIALIDAD,02/05/2019,2,5,2019
1,a60a5e9d07bc1667c71cd4ee95c9058d,12.42.01.01,CONSULTA MEDICA SIN ESPECIALIDAD,02/05/2019,2,5,2019
2,a60a5e9d07bc1667c71cd4ee95c9058d,12.17.01.01,ELECTROCARDIOGRAMA EN CONSULTORIO,02/05/2019,2,5,2019
3,116ed781dcc78944c5d23b74c30246f9,11.13.01.07,"DESTRUCCIÓN DE LESIÓN DE PIEL (VERRUGA, QUERAT...",02/05/2019,2,5,2019
4,39ce4fde48cf81b2264f68cfb00c6f93,12.42.01.01,CONSULTA MEDICA SIN ESPECIALIDAD,02/05/2019,2,5,2019
...,...,...,...,...,...,...,...
286002,3c7b1f81b6e630cef26f68f3b2f76580,07.66.00.0001,ACTO BIOQUIMICO,09/09/2024,9,9,2024
286003,3c7b1f81b6e630cef26f68f3b2f76580,07.66.03.0309,"EXUDADO NASOFARINGEO, CULTIVO. |(antes)| EXUDA...",09/09/2024,9,9,2024
286004,3c7b1f81b6e630cef26f68f3b2f76580,07.66.11.1180,TEST RAPIDO en FAUCES para STREPTOCOCCUS Beta ...,09/09/2024,9,9,2024
286005,3c7b1f81b6e630cef26f68f3b2f76580,07.66.20.2003,ACTO BIOQUÍMICO ADMINISTRATIVO POR VALIDACIÓN ...,09/09/2024,9,9,2024


In [24]:
dp

Unnamed: 0,id_practica,nombre_practica
0,12.19.03.03,CONSULTA NUTRICIONISTA CON ESPECIALIZACIÓN EN ...
1,07.66.04.0412,GLUCEMIA (C/U) |(antes)| GLUCEMIA
2,07.66.10.1035,COLESTEROL HDL (HDL-C) |(antes)| COLESTEROL HDL
3,07.66.10.1070,HEMOGLOBINA GLICOSILADA (Hb A1C) |(antes)| HEM...
4,07.66.11.1130,MICROALBUMINURIA / ALBUMINA URINARIA |(antes)|...
5,07.66.01.0174,COLESTEROL TOTAL
6,12.42.01.10,CONSULTA ESPECIALISTAS EN DIABETES
7,12.19.03.01,CONSULTA INICIAL CON PLAN NUTRICIONAL
8,12.46.00.04,"Consulta oft. a domicilio, más de 3 se adj H.C."
9,12.46.00.01,CONSULTA VESTIDA OFTALMOLOGICA (PEDIATRICA Y D...


In [25]:
# cast 'fecha' attribute to date
ap['fecha'] = pd.to_datetime(ap['fecha'],format='%d/%m/%Y')

# define the dates interval
sup_date = pd.to_datetime('01/07/2024', format='%d/%m/%Y')
low_date = pd.to_datetime('31/12/2023', format='%d/%m/%Y')

# filter dates outside the interval
ap = ap[ap['fecha'] < sup_date]
ap = ap[ap['fecha'] > low_date]

In [26]:
ap

Unnamed: 0,id_afiliado,id_practica,nombre_practica,fecha,dia,mes,anio
244296,c8f1b6ab9234f2874bbcd5eeac1d4b5e,07.66.00.0001,ACTO BIOQUIMICO,2024-01-02,2,1,2024
244297,c8f1b6ab9234f2874bbcd5eeac1d4b5e,07.66.09.0911,UROCULTIVO (MODULO),2024-01-02,2,1,2024
244298,8b77328e896abbc814c1e7e91d910c89,12.42.01.01,CONSULTA MEDICA SIN ESPECIALIDAD,2024-01-02,2,1,2024
244299,f6b377b7e0de8ec0fa95ef004bcb8b7d,07.66.00.0001,ACTO BIOQUIMICO,2024-01-02,2,1,2024
244300,f6b377b7e0de8ec0fa95ef004bcb8b7d,07.66.01.0189,CORTISOL. |(antes)| CORTISOL,2024-01-02,2,1,2024
...,...,...,...,...,...,...,...
273045,7189e0df36d5456e95b20ed26bcca807,12.42.01.01,CONSULTA MEDICA SIN ESPECIALIDAD,2024-06-29,29,6,2024
273046,9d04b15f79632311512053b65ad79df6,12.42.01.01,CONSULTA MEDICA SIN ESPECIALIDAD,2024-06-30,30,6,2024
273047,98685168b30eb1e94d98225c444ad113,12.42.01.01,CONSULTA MEDICA SIN ESPECIALIDAD,2024-06-30,30,6,2024
273048,39edd98524a54ac18475c91fa31725ef,12.42.01.02,CONSULTA MEDICA PEDIÁTRICA,2024-06-30,30,6,2024
