# Analyzing business data

---

# Imports

#### Python libraries

In [1]:
import pandas as pd
pd.set_option('display.max_columns', 500)
# pd.set_option('use_inf_as_na', True)

import numpy as np

import os

import re

import matplotlib.pyplot as plt

from datetime import *

import plotly.graph_objects as go
from plotly.subplots import make_subplots

import pprint as pp

import math

#### Ancillary modules

In [2]:
%load_ext autoreload
%autoreload 2

from BI_params import *
from BI_funcs import *

---

# Data analysis - Sales

## Import and clean data

In [3]:
dfs = pd.read_csv(os.path.join(data_loc, sales_data))

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [4]:
dfs = dfs_clean(dfs)

  return op(a, b)


#### Reviewing specialties results

In [None]:
dfs["Especialidad_match"].value_counts()

#### Reviewing business line results

In [None]:
dfs["BusLine_match"].value_counts()

## Analysis 1
- How have sales for each location behaved? (separating laboratory)

### Analysis 1.1
- Weekly basis

In [None]:
dfsx = data_processing_sales_A1p1(dfs)

In [None]:
dfsx

In [None]:
graph_sales_A1(dfsx)

### Analysis 1.2
- Monthly basis

In [None]:
dfs

---

## Analysis 2
- How have sales for each location behaved? (merging laboratory)

### Analysis 2.1
- Weekly basis

In [None]:
dfsx = data_processing_sales_A2p1(dfs)

In [None]:
dfsx

In [None]:
graph_sales_A2p1(dfsx)

### Analysis 2.2
- Montly basis

#### Base code

In [74]:
dfsx = data_processing_sales_A2p2(dfs)

In [75]:
dfsx

Loc_Hom,BASILICA,COAPA,Location_not_found,MARINA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-10,0.0,0.0,0.0,61846.79
2018-11,0.0,0.0,0.0,264151.6
2018-12,0.0,0.0,0.0,365586.3
2019-1,0.0,0.0,0.0,505593.5
2019-2,0.0,0.0,0.0,562642.4
2019-3,0.0,0.0,0.0,684281.5
2019-4,0.0,0.0,0.0,664479.9
2019-5,0.0,0.0,0.0,749942.3
2019-6,0.0,0.0,0.0,734446.9
2019-7,423803.4,0.0,0.0,775884.7


In [76]:
dfsx.drop("Location_not_found", axis=1, inplace=True)

In [77]:
graph_sales_A2p2(dfsx)

#### Further analysis

In [None]:
dfx = dfsx.copy()

In [None]:
## Filtering for relevant time period
m1 = dfx.index[list(dfx.index).index("2020-4"):list(dfx.index).index("2020-9")]
dfx = dfx.loc[m1, :]

## Calculating percent change
for col in dfx.columns:
    dfx.loc[:, col +'_pc'] = dfx[col].diff()/dfx[col].shift()*100
    dfx.loc[:, col +'_pc'] = dfx[col +'_pc'].fillna(0)
    dfx.loc[:, col +'_pc'] = dfx[col +'_pc'].replace([np.inf, -np.inf], 0)
    
dfx

---

## Analysis 3
- How have sales for each location separately behaved weekly? (merging laboratory)

In [None]:
dfsx = data_processing_sales_A3(dfs)

In [None]:
dfsx

In [None]:
graph_sales_A3(dfsx)

---

## Analysis 4
- How have sales behaved per location per business line?

#### Base code

In [14]:
dfsx = data_processing_sales_A4(dfs)

In [15]:
dfsx

Unnamed: 0_level_0,Loc_Hom,BASILICA,BASILICA,BASILICA,BASILICA,BASILICA,COAPA,COAPA,COAPA,COAPA,COAPA,Location_not_found,Location_not_found,Location_not_found,Location_not_found,Location_not_found,MARINA,MARINA,MARINA,MARINA,MARINA
Unnamed: 0_level_1,BusLine_match,CONSULTA,LABORATORIO,OTRA_LN,PROCEDIMIENTOS,ULTRASONIDO,CONSULTA,LABORATORIO,OTRA_LN,PROCEDIMIENTOS,ULTRASONIDO,CONSULTA,LABORATORIO,OTRA_LN,PROCEDIMIENTOS,ULTRASONIDO,CONSULTA,LABORATORIO,OTRA_LN,PROCEDIMIENTOS,ULTRASONIDO
BillDate,BillDate,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2018,10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,50448.42,0.0,2999.0,8048.849968,350.524125
2018,11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,162018.0,14438.926284,23342.451861,63401.70715,950.524125
2018,12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,209122.5,26149.714923,29015.24125,91819.70715,9479.109762
2019,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,299390.5,36846.796285,36123.740593,123558.43727,9674.0
2019,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,317697.05,50448.342371,31088.397096,141132.597502,22276.0
2019,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,401662.3,49644.40532,38455.208128,184249.597502,10270.0
2019,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,393169.7,40566.696232,20146.523,200594.458032,10002.5
2019,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,423148.15,46921.089888,39632.585256,232220.43727,8020.0
2019,6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,431737.6,49276.469253,38511.296566,197101.5,17820.0
2019,7,271028.0,48117.704238,4594.24,74027.5,26036.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,437364.7,74159.154436,51120.806024,195306.0,17934.0


#### Further analysis

In [58]:
dfx = dfsx.copy()

In [59]:
set_loc = "MARINA"

In [60]:
## Selecting row and columns related with the location and period of interest
mr1 = dfx.index.get_level_values(0) == 2020
mr2 = (dfx.index.get_level_values(1) >= 4) & (dfx.index.get_level_values(1) <= 8)
mc1 = dfx.columns.get_level_values(0) == set_loc

dfx = dfx.loc[(mr1 & mr2), mc1]


## Some quick cleanup
#### Removing multi columns names
dfx.columns = dfx.columns.droplevel()
#### Calculating the participation of each business line in total income
dfx["Month_total"] = dfx.sum(axis=1)
for col in [col for col in dfx.columns if "total" not in col]:
    dfx.loc[:, col + "_part"] = dfx[col]/dfx["Month_total"]*100
dfx.drop(["Month_total"], axis=1, inplace=True)

#### Converting multi-index to single-index
dfx["Date"] = dfx.index.get_level_values(0).astype("str") + "-" + dfx.index.get_level_values(1).astype("str")
dfx.set_index("Date", inplace=True, drop=True)


## Adding metrics related with the percent change per business line
for col in [col for col in dfx.columns if "_part" not in col]:
    dfx.loc[:, col + "_pc"] = dfx[col].diff()/dfx[col].shift()
    dfx.loc[:, col +'_pc'] = dfx[col +'_pc'].fillna(0)
    dfx.loc[:, col +'_pc'] = dfx[col +'_pc'].replace([np.inf, -np.inf], 0)

In [61]:
## Filtering results for display
col_filter = "_pc"

In [62]:
dfx.loc[:, [col for col in dfx.columns if col_filter in col]]

BusLine_match,CONSULTA_pc,LABORATORIO_pc,OTRA_LN_pc,PROCEDIMIENTOS_pc,ULTRASONIDO_pc
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-4,0.0,0.0,0.0,0.0,0.0
2020-5,0.150855,-0.106179,-0.593006,0.306592,0.488258
2020-6,0.213479,0.969553,1.216761,0.097962,-0.033336
2020-7,0.155794,0.323957,-0.318851,0.062871,0.221483
2020-8,0.047387,-0.080798,-0.199891,-0.338777,-0.005203


In [63]:
fig = go.Figure()


## Graphing bars
for col in [col for col in dfx.columns if "_pc" not in col and "_part" not in col]:
    fig.add_trace(
        go.Bar(
            x = dfx.index,
            y = dfx[col],
            name = col,
            text = dfx[col].astype("float"),
            textposition = "inside",
            texttemplate = "$%{value:.2s}"
        )
    )


## Labels and format
fig.update_layout(
    #### Labels
    title = str(set_loc).title() + " - Ventas por línea de negocio",
    xaxis_title = "Mes",
    yaxis_title = "Ingresos [$MXN]",
#     yaxis_tickformat = ",.2f",
    yaxis_tickprefix = "$",
#     separators = ".,",
    
    #### Dimensions
    autosize = False,
    width = 1500,
    height = 750
)


## X-axis
fig.update_xaxes(
    dtick="M1"
)

    
fig.show()

---

## Analysis 5
- How have sales behaved per location, per speciality?

#### Base code

In [9]:
dfsx = data_processing_sales_A5(dfs)

In [10]:
dfsx

Unnamed: 0_level_0,Loc_Hom,BASILICA,BASILICA,BASILICA,BASILICA,BASILICA,BASILICA,BASILICA,BASILICA,BASILICA,BASILICA,BASILICA,BASILICA,BASILICA,BASILICA,BASILICA,BASILICA,BASILICA,COAPA,COAPA,COAPA,COAPA,COAPA,COAPA,COAPA,COAPA,COAPA,COAPA,COAPA,COAPA,COAPA,COAPA,COAPA,COAPA,COAPA,Location_not_found,Location_not_found,Location_not_found,Location_not_found,Location_not_found,Location_not_found,Location_not_found,Location_not_found,Location_not_found,Location_not_found,Location_not_found,Location_not_found,Location_not_found,Location_not_found,Location_not_found,Location_not_found,Location_not_found,MARINA,MARINA,MARINA,MARINA,MARINA,MARINA,MARINA,MARINA,MARINA,MARINA,MARINA,MARINA,MARINA,MARINA,MARINA,MARINA,MARINA
Unnamed: 0_level_1,Especialidad_match,ALERGOLOGIA,CARDIOLOGIA,DERMATOLOGIA,ENDOCRINOLOGIA,GASTROENTEROLGIA,GASTROENTEROLOGIA,GINECOLOGIA,MEDICINA GENERAL,NEFROLOGIA,NEUMOLOGIA,NEUROLOGIA,NUTRICION,OTORRINOLARINGOLOGIA,OTRA_ESP,PEDIATRIA,PROCTOLOGIA,UROLOGIA,ALERGOLOGIA,CARDIOLOGIA,DERMATOLOGIA,ENDOCRINOLOGIA,GASTROENTEROLGIA,GASTROENTEROLOGIA,GINECOLOGIA,MEDICINA GENERAL,NEFROLOGIA,NEUMOLOGIA,NEUROLOGIA,NUTRICION,OTORRINOLARINGOLOGIA,OTRA_ESP,PEDIATRIA,PROCTOLOGIA,UROLOGIA,ALERGOLOGIA,CARDIOLOGIA,DERMATOLOGIA,ENDOCRINOLOGIA,GASTROENTEROLGIA,GASTROENTEROLOGIA,GINECOLOGIA,MEDICINA GENERAL,NEFROLOGIA,NEUMOLOGIA,NEUROLOGIA,NUTRICION,OTORRINOLARINGOLOGIA,OTRA_ESP,PEDIATRIA,PROCTOLOGIA,UROLOGIA,ALERGOLOGIA,CARDIOLOGIA,DERMATOLOGIA,ENDOCRINOLOGIA,GASTROENTEROLGIA,GASTROENTEROLOGIA,GINECOLOGIA,MEDICINA GENERAL,NEFROLOGIA,NEUMOLOGIA,NEUROLOGIA,NUTRICION,OTORRINOLARINGOLOGIA,OTRA_ESP,PEDIATRIA,PROCTOLOGIA,UROLOGIA
BillDate,BillDate,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2,Unnamed: 69_level_2
2018,10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,24055.0,0.0,0.0,16358.524125,14946.919968,0.0,0.0,0.0,0.0,399.0,0.0,2296.35,3791.0,0.0,0.0
2018,11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,110127.264548,589.828118,0.0,47775.597325,80611.782086,0.0,0.0,0.0,0.0,650.0,0.0,10671.921374,12774.91647,0.0,950.2995
2018,12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,132459.6797,1649.466383,0.0,69807.432123,98880.921737,11268.572375,0.0,0.0,0.0,500.74875,0.0,30852.452017,17367.0,0.0,2800.0
2019,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,188120.124209,820.582549,0.0,104241.701246,132476.572375,9820.56905,0.0,0.0,0.0,1197.0,0.0,30172.693445,35843.231275,0.0,2901.0
2019,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,212830.952616,6864.42276,0.0,97797.124395,146218.401708,19177.135105,0.0,0.0,0.0,1846.374375,0.0,41600.077512,31507.0,0.0,4800.8985
2019,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,239758.061834,1648.717883,0.0,121563.793483,224399.57074,13019.858425,0.0,0.0,0.0,474.0,0.0,33042.562832,43074.945755,0.0,7300.0
2019,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,242566.650938,2256.001366,0.0,115554.153145,216626.618252,12401.818967,0.0,0.0,0.0,0.0,0.0,34715.034596,36159.6,0.0,4200.0
2019,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,287088.187477,998.995505,0.0,116693.136268,248066.105977,17309.80332,0.0,0.0,0.0,0.0,0.0,43858.533867,31322.5,0.0,4605.0
2019,6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,267931.579625,-111.611679,0.0,164019.867382,215674.41699,25675.940582,0.0,0.0,0.0,50.0,0.0,47483.373419,10773.0,0.0,2950.2995
2019,7,0.0,0.0,170961.5,4092.6,0.0,90139.0,64904.596238,9509.0,0.0,0.0,0.0,0.0,0.0,68339.748,0.0,0.0,15857.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,261952.377054,25069.188063,0.0,152559.274057,217338.140267,28001.31345,0.0,0.0,0.0,549.0,0.0,56857.267568,30963.1,0.0,2595.0


#### Further analysis

In [11]:
dfx = dfsx.copy()

In [12]:
## Select a specific location
sel_loc = "BASILICA"

In [13]:
## Filter columns and rows according to analysis
mc1 = dfx.columns.get_level_values(0) == sel_loc
mr1 = (dfx.index.get_level_values(0) == 2020) & ((dfx.index.get_level_values(1) >= 4) & (dfx.index.get_level_values(1) <= 8))
dfx = dfx.loc[mr1, mc1]
dfx.columns = dfx.columns.droplevel()

## Changing index to one column string
dfx["Date"] = dfx.index.get_level_values(0).astype("str") + "-" + dfx.index.get_level_values(1).astype("str")
dfx.set_index("Date", inplace=True, drop=True)

dfx

Especialidad_match,ALERGOLOGIA,CARDIOLOGIA,DERMATOLOGIA,ENDOCRINOLOGIA,GASTROENTEROLGIA,GASTROENTEROLOGIA,GINECOLOGIA,MEDICINA GENERAL,NEFROLOGIA,NEUMOLOGIA,NEUROLOGIA,NUTRICION,OTORRINOLARINGOLOGIA,OTRA_ESP,PEDIATRIA,PROCTOLOGIA,UROLOGIA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2020-4,0.0,0.0,214775.5,41884.0,0.0,78971.5,140847.0,10467.0,0.0,0.0,0.0,0.0,0.0,54006.3,0.0,0.0,148406.0
2020-5,0.0,0.0,200212.5,48833.25,0.0,115457.0,136023.5,9502.0,0.0,0.0,0.0,0.0,0.0,31593.5,0.0,0.0,135928.0
2020-6,0.0,0.0,213449.1,73635.0,0.0,119734.5,193195.0,9414.0,0.0,0.0,0.0,4630.5,0.0,26867.0,0.0,0.0,195370.0
2020-7,0.0,0.0,266223.5,128716.1,0.0,130554.2,267816.0,14501.0,0.0,0.0,0.0,14116.0,0.0,46110.0,0.0,0.0,204950.0
2020-8,0.0,0.0,297095.3,132070.5,0.0,129117.0,233173.5,11841.0,0.0,0.0,0.0,17446.0,0.0,26509.0,0.0,0.0,178636.0


In [62]:
def top_5_columns(row, no_tops=2):
    
    
    ## Key attributes 
    row_total = row.sum()
    sales_no_top = round(row_total - sum(row.nlargest(no_tops).values), 1)
    
    
    ## Obtaining top specialities
    res = {}
    for top in range(1, no_tops + 1):
        res[str(top) + "_Top"] = {
            "Spec": row.nlargest(top).index[-1],
            "Sales": row.nlargest(top).values[-1],
            "Part": round((row.nlargest(top).values[-1]/row_total)*100, 1)
        }
    
    
    ## Adding to dict all specialities that were not included in the top
    res["Remaining"] = {
        "Spec": "Remaining",
        "Sales": sales_no_top,
        "Part": round((sales_no_top/row_total)*100, 1)
    }
    
    
    return res

In [63]:
## Creating series with a selection of top 5 specialities and a grouping of remainings
res = dfx.apply(lambda x: top_5_columns(x, 5), axis=1)

In [64]:
## Craeting nested dictionary with results to obtain dataframe
i = 0
resf = {}
for x in res:
    for y in x:
        for z in x[y]:
            resf[(res.index[i], z, y)] = x[y][z]
    i += 1

# resf

In [65]:
## Creting dataframe with results
mux = pd.MultiIndex.from_tuples(resf.keys())
dfx = pd.DataFrame(resf.values(), index=mux).unstack()
dfx.columns = dfx.columns.droplevel()

In [66]:
dfx

Unnamed: 0,Unnamed: 1,1_Top,2_Top,3_Top,4_Top,5_Top,Remaining
2020-4,Part,31.2,21.5,20.4,11.5,7.8,7.6
2020-4,Sales,214776,148406,140847,78971.5,54006.3,52351
2020-4,Spec,DERMATOLOGIA,UROLOGIA,GINECOLOGIA,GASTROENTEROLOGIA,OTRA_ESP,Remaining
2020-5,Part,29.5,20.1,20.1,17,7.2,6.1
2020-5,Sales,200212,136024,135928,115457,48833.2,41095.5
2020-5,Spec,DERMATOLOGIA,GINECOLOGIA,UROLOGIA,GASTROENTEROLOGIA,ENDOCRINOLOGIA,Remaining
2020-6,Part,25.5,23.4,23.1,14.3,8.8,4.9
2020-6,Sales,213449,195370,193195,119734,73635,40911.5
2020-6,Spec,DERMATOLOGIA,UROLOGIA,GINECOLOGIA,GASTROENTEROLOGIA,ENDOCRINOLOGIA,Remaining
2020-7,Part,25,24.8,19.1,12.2,12,7


## Analysis 6
- How have sales behaved per location, per business line, per speciality?

#### Base code

In [None]:
dfsx = data_processing_sales_A6(dfs)

In [None]:
dfsx

#### Further analysis

---

## Analysis 7
- How have sales behaved per location, per provider, per business line?

In [None]:
dfsx = data_processing_sales_A7(dfs)

In [None]:
dfsx

---

# Data analysis - Appointments

## Importing and cleaning data

In [None]:
dfa = pd.read_csv(os.path.join(data_loc, appointment_data))

In [None]:
dfa = dfa_clean(dfa)
dfa

## Analysis 1
- How has the number of confirmed appointments behaved weekly since 2019 for each of the most relevant specialities

In [None]:
loc='ALL'

In [None]:
dfax = data_processing_appointments_A1(dfa, loc=loc)

In [None]:
dfax

In [None]:
graph_appointments_A1(dfax, loc=loc)

## Analysis 2
- How has the number of confirmed appointments increased quarterly since 2019?

In [None]:
loc='ALL'

In [None]:
dfax = data_processing_appointments_A2(dfa, loc=loc)

In [None]:
dfax

In [None]:
dfax

In [None]:
graph_appointments_A2(dfax, loc=loc)

---

# Data analysis - Tickets

In [26]:
df_sapi = pd.read_csv("Data/Emitidos-Punto_Clinico_SAPI-2020-08.csv")
df_sc = pd.read_csv("Data/Emitidos-Punto_Clinico_SC-2020-08.csv")

In [21]:
df_sapi

Unnamed: 0,XML,Rfc Emisor,Nombre Emisor,Rfc Receptor,Nombre Receptor,Tipo,Serie,Folio,Fecha,Sub Total,Descuento,Total impuesto Trasladado,Nombre Impuesto,Total impuesto Retenido,Nombre Impuesto.1,Total,UUID,Método de Pago,Forma de Pago,Moneda,Tipo de Cambio,Versión,Uso CFDI,Régimen Fiscal,Estado,Estatus,Conceptos,Traslado IVA 16 %
0,1F5F0551-1247-458C-AC22-BCE0B5FEBBD0.xml,PCV180730UJ1,PUNTO CLINICO VISIONARIO SAPI DE CV,ROTD9301098R2,DENNIS RODRIGUEZ TORRES,ingreso,CAFETALES,3651,2020-08-01T09:24:55,699.00,0.0,0.00,,0.0,,699.0,1F5F0551-1247-458C-AC22-BCE0B5FEBBD0,PUE - Pago en una sola exhibición,28 - Tarjeta de Débito,MXN,,3.3,"D01 - Honorarios médicos, dentales y gastos ho...",General de Ley Personas Morales,Vigente,S - Comprobante obtenido satisfactoriamente.,Cantidad : 1 valorUnitario : 699.00 Importe ...,0.00
1,8BFD38DE-5B16-4A34-B199-97714D68E111.xml,PCV180730UJ1,PUNTO CLINICO VISIONARIO SAPI DE CV,COOE810712LZ3,EDGAR DIONISIO COVARRUBIOS ORTIZ,ingreso,VILLA,3652,2020-08-01T12:49:41,2430.00,0.0,0.00,,0.0,,2430.0,8BFD38DE-5B16-4A34-B199-97714D68E111,PUE - Pago en una sola exhibición,28 - Tarjeta de Débito,MXN,,3.3,"D01 - Honorarios médicos, dentales y gastos ho...",General de Ley Personas Morales,Vigente,S - Comprobante obtenido satisfactoriamente.,Cantidad : 1 valorUnitario : 2430.00 Importe...,0.00
2,06458633-7A54-462E-AB18-D0EB18773027.xml,PCV180730UJ1,PUNTO CLINICO VISIONARIO SAPI DE CV,LMC741212JN6,GRUPO DIAGNOSTICO MEDICO PROA SA DE CV,ingreso,MARINA,3653,2020-08-03T09:06:39,325000.00,0.0,52000.00,002 - IVA,0.0,,377000.0,06458633-7A54-462E-AB18-D0EB18773027,PPD - Pago en parcialidades o diferido,99 - Otros,MXN,,3.3,P01 - Por definir,General de Ley Personas Morales,Vigente,S - Comprobante obtenido satisfactoriamente.,Cantidad : 50000 valorUnitario : 6.50 Import...,52000.00
3,BC1172F5-D755-45F8-AFA7-FCEBA8E704D9.xml,PCV180730UJ1,PUNTO CLINICO VISIONARIO SAPI DE CV,OISG760103JE9,GLORIA GUADALUPE OLIVARES SANCHEZ,ingreso,CAFETALES,3654,2020-08-03T09:14:37,399.00,0.0,0.00,,0.0,,399.0,BC1172F5-D755-45F8-AFA7-FCEBA8E704D9,PUE - Pago en una sola exhibición,04 - Tarjetas de crédito,MXN,,3.3,"D01 - Honorarios médicos, dentales y gastos ho...",General de Ley Personas Morales,Vigente,S - Comprobante obtenido satisfactoriamente.,Cantidad : 1 valorUnitario : 399.00 Importe ...,0.00
4,46EB9572-A7F4-48F6-9D18-7F1C2B67C4D4.xml,PCV180730UJ1,PUNTO CLINICO VISIONARIO SAPI DE CV,HEFI770326LE5,IVONNE HERNANDEZ FLORES,ingreso,VILLA,3655,2020-08-03T09:31:27,370.69,0.0,59.31,002 - IVA,0.0,,430.0,46EB9572-A7F4-48F6-9D18-7F1C2B67C4D4,PUE - Pago en una sola exhibición,04 - Tarjetas de crédito,MXN,,3.3,"D01 - Honorarios médicos, dentales y gastos ho...",General de Ley Personas Morales,Vigente,S - Comprobante obtenido satisfactoriamente.,Cantidad : 1 valorUnitario : 370.69 Importe ...,59.31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,50046596-a28d-411d-a385-373e004eda6e.xml,PCV180730UJ1,PUNTO CLINICO VISIONARIO SAPI DE CV,GAGB710226472,BLANCA LIDIA GARCIA GONZALEZ,ingreso,VILLA,3818,2020-08-31T13:33:27,1300.00,0.0,0.00,,0.0,,1300.0,50046596-a28d-411d-a385-373e004eda6e,PUE - Pago en una sola exhibición,28 - Tarjeta de Débito,MXN,,3.3,"D01 - Honorarios médicos, dentales y gastos ho...",General de Ley Personas Morales,Vigente,S - Comprobante obtenido satisfactoriamente.,Cantidad : 1 valorUnitario : 1300.00 Importe...,0.00
196,928b53a6-5fed-4001-a217-9bb3dbea27c7.xml,PCV180730UJ1,PUNTO CLINICO VISIONARIO SAPI DE CV,GAPA900601451,AXEL GARCIA POISOT,ingreso,VILLA,3819,2020-08-31T16:14:22,549.00,0.0,0.00,,0.0,,549.0,928b53a6-5fed-4001-a217-9bb3dbea27c7,PUE - Pago en una sola exhibición,28 - Tarjeta de Débito,MXN,,3.3,"D01 - Honorarios médicos, dentales y gastos ho...",General de Ley Personas Morales,Vigente,S - Comprobante obtenido satisfactoriamente.,Cantidad : 1 valorUnitario : 549.00 Importe ...,0.00
197,58ee7419-a175-4d75-9198-a931609c252e.xml,PCV180730UJ1,PUNTO CLINICO VISIONARIO SAPI DE CV,HEGR6908316W0,REINA HERNANDEZ GALLEGOS,ingreso,VILLA,3820,2020-08-31T17:16:51,399.00,0.0,0.00,,0.0,,399.0,58ee7419-a175-4d75-9198-a931609c252e,PUE - Pago en una sola exhibición,28 - Tarjeta de Débito,MXN,,3.3,"D01 - Honorarios médicos, dentales y gastos ho...",General de Ley Personas Morales,Vigente,S - Comprobante obtenido satisfactoriamente.,Cantidad : 1 valorUnitario : 399.00 Importe ...,0.00
198,1d236376-041d-4ad6-9095-33622e906388.xml,PCV180730UJ1,PUNTO CLINICO VISIONARIO SAPI DE CV,FAMJ7608234V8,JACOBO FRANQUIZ MUÑOZ,ingreso,VILLA,3821,2020-08-31T18:10:50,399.00,0.0,0.00,,0.0,,399.0,1d236376-041d-4ad6-9095-33622e906388,PUE - Pago en una sola exhibición,28 - Tarjeta de Débito,MXN,,3.3,"D01 - Honorarios médicos, dentales y gastos ho...",General de Ley Personas Morales,Vigente,S - Comprobante obtenido satisfactoriamente.,Cantidad : 1 valorUnitario : 399.00 Importe ...,0.00


In [32]:
df_sapi["Estado"].unique()

array(['Vigente'], dtype=object)

In [27]:
df_sapi.shape[0]

200

In [30]:
1 - (df_sapi.shape[0] / (df_sapi.shape[0] + df_sc.shape[0]))

0.7014925373134329

In [29]:
df_sc

Unnamed: 0,XML,Rfc Emisor,Nombre Emisor,Rfc Receptor,Nombre Receptor,Tipo,Serie,Folio,Fecha,Sub Total,Descuento,Total impuesto Trasladado,Nombre Impuesto,Total impuesto Retenido,Nombre Impuesto.1,Total,UUID,Método de Pago,Forma de Pago,Moneda,Tipo de Cambio,Versión,Uso CFDI,Régimen Fiscal,Estado,Estatus,Conceptos
0,e1870129-d4a4-41e9-b656-1efac1010b4e.xml,SMP190128NHA,SERVICIOS MEDICOS PUNTO CLINICO SC,CASL660621SXA,LUISA CHAVARRIA SALAS,ingreso,CAFETALES,1306,2020-08-01T08:34:04,399.0,0.0,0,,0.0,,399.0,e1870129-d4a4-41e9-b656-1efac1010b4e,PUE - Pago en una sola exhibición,28 - Tarjeta de Débito,MXN,,3.3,"D01 - Honorarios médicos, dentales y gastos ho...",General de Ley Personas Morales,Vigente,S - Comprobante obtenido satisfactoriamente.,Cantidad : 1 valorUnitario : 399.00 Importe ...
1,d01e6b8f-8bf4-4780-b78c-2d14936f4a96.xml,SMP190128NHA,SERVICIOS MEDICOS PUNTO CLINICO SC,ROPE800823FE4,EDGAR DAVID RODRIGUEZ PEÑA,ingreso,CAFETALES,1307,2020-08-01T08:41:35,399.0,0.0,0,,0.0,,399.0,d01e6b8f-8bf4-4780-b78c-2d14936f4a96,PUE - Pago en una sola exhibición,01 - Efectivo,MXN,,3.3,"D01 - Honorarios médicos, dentales y gastos ho...",General de Ley Personas Morales,Vigente,S - Comprobante obtenido satisfactoriamente.,Cantidad : 1 valorUnitario : 399.00 Importe ...
2,27a6a430-2268-4aca-9c46-ea6e34a91358.xml,SMP190128NHA,SERVICIOS MEDICOS PUNTO CLINICO SC,ROMH6705141F6,HECTOR ROSAS MORA,ingreso,CAFETALES,1308,2020-08-01T09:02:47,399.0,0.0,0,,0.0,,399.0,27a6a430-2268-4aca-9c46-ea6e34a91358,PUE - Pago en una sola exhibición,01 - Efectivo,MXN,,3.3,"D01 - Honorarios médicos, dentales y gastos ho...",General de Ley Personas Morales,Vigente,S - Comprobante obtenido satisfactoriamente.,Cantidad : 1 valorUnitario : 399.00 Importe ...
3,e2a8a9f2-16bc-457a-9bd2-c1a48fad71a5.xml,SMP190128NHA,SERVICIOS MEDICOS PUNTO CLINICO SC,GAHA9207028B6,ANA GABRIELA GARCIA HERNANDEZ,ingreso,VILLA,1309,2020-08-01T10:12:16,899.0,0.0,0,,0.0,,899.0,e2a8a9f2-16bc-457a-9bd2-c1a48fad71a5,PUE - Pago en una sola exhibición,28 - Tarjeta de Débito,MXN,,3.3,"D01 - Honorarios médicos, dentales y gastos ho...",General de Ley Personas Morales,Vigente,S - Comprobante obtenido satisfactoriamente.,Cantidad : 1 valorUnitario : 899.00 Importe ...
4,e694c160-6360-4db3-aa98-3866ac2e23a2.xml,SMP190128NHA,SERVICIOS MEDICOS PUNTO CLINICO SC,SOBA760901JS6,ALMA LUCERO SOSA BLANCAS,ingreso,CAFETALES,1310,2020-08-01T10:20:34,599.0,0.0,0,,0.0,,599.0,e694c160-6360-4db3-aa98-3866ac2e23a2,PUE - Pago en una sola exhibición,28 - Tarjeta de Débito,MXN,,3.3,"D01 - Honorarios médicos, dentales y gastos ho...",General de Ley Personas Morales,Vigente,S - Comprobante obtenido satisfactoriamente.,Cantidad : 1 valorUnitario : 399.00 Importe ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
465,c0a0eb8b-502b-41d7-af7a-ea94cd546930.xml,SMP190128NHA,SERVICIOS MEDICOS PUNTO CLINICO SC,MGB051006HT8,MULTISISTEMAS GUJADI BOSS SEGURIDAD PRIVADA S...,ingreso,MARINA,1770,2020-08-31T17:04:30,399.0,0.0,0,,0.0,,399.0,c0a0eb8b-502b-41d7-af7a-ea94cd546930,PUE - Pago en una sola exhibición,28 - Tarjeta de Débito,MXN,,3.3,G03 - Gastos en general,General de Ley Personas Morales,Vigente,S - Comprobante obtenido satisfactoriamente.,Cantidad : 1 valorUnitario : 399.00 Importe ...
466,e0843bfd-e086-4cee-ae31-048d5ab0f303.xml,SMP190128NHA,SERVICIOS MEDICOS PUNTO CLINICO SC,GOAJ750428CG0,JACOB GONZALEZ ALONSO,ingreso,CAFETALES,1771,2020-08-31T18:47:44,1755.0,0.0,0,,0.0,,1755.0,e0843bfd-e086-4cee-ae31-048d5ab0f303,PUE - Pago en una sola exhibición,01 - Efectivo,MXN,,3.3,"D01 - Honorarios médicos, dentales y gastos ho...",General de Ley Personas Morales,Vigente,S - Comprobante obtenido satisfactoriamente.,Cantidad : 1 valorUnitario : 399.00 Importe ...
467,f204c401-25d1-402b-bd84-3358970306aa.xml,SMP190128NHA,SERVICIOS MEDICOS PUNTO CLINICO SC,TEPR731105I94,RUBEN TELLEZ PEREZ,ingreso,VILLA,1772,2020-08-31T18:59:25,399.0,0.0,0,,0.0,,399.0,f204c401-25d1-402b-bd84-3358970306aa,PUE - Pago en una sola exhibición,28 - Tarjeta de Débito,MXN,,3.3,G03 - Gastos en general,General de Ley Personas Morales,Vigente,S - Comprobante obtenido satisfactoriamente.,Cantidad : 1 valorUnitario : 399.00 Importe ...
468,0eb557fb-4cd7-42d7-b38a-f282eb67c660.xml,SMP190128NHA,SERVICIOS MEDICOS PUNTO CLINICO SC,SOVL600421HU8,LIDIA SOTO VAZQUEZ,ingreso,MARINA,1773,2020-08-31T19:26:13,150.0,0.0,0,,0.0,,150.0,0eb557fb-4cd7-42d7-b38a-f282eb67c660,PUE - Pago en una sola exhibición,04 - Tarjetas de crédito,MXN,,3.3,G03 - Gastos en general,General de Ley Personas Morales,Vigente,S - Comprobante obtenido satisfactoriamente.,Cantidad : 1 valorUnitario : 150.00 Importe ...


In [34]:
df_sc["Estado"].value_countscounts()

Vigente      468
Cancelado      2
Name: Estado, dtype: int64

In [25]:
df_sc.shape[0]

470

#### Información acerca de las facturas
**- Número de facturas generadas (total):** 670 (100%)
    **- Número de facturas generadas (mes agosto) (SAPI):** 200 (29.85%)
    **- Número de facturas generadas (mes agosto) (SC):** 470 (70.15%)
    
- Número de facturas canceladas (total): 2 (100%)
    - Número de facturas canceladas (SAPI): 0 (0%)
    - Número de facturas canceladas (SC): 2 (100%)

# *Notes*

#### Substituting characters with re

In [None]:
x = 'Melón'

In [None]:
x = re.sub('ó', 'o', x)

In [None]:
x

#### Creatting formatted tables related with parameters

**Doctors dicctionary**

In [78]:
meds_dict_ref

{'ARROYO COUTIÑO, IBER': {'Especialidad': ['UROLOGIA'],
  'Sitios': ['Punto Clínico- Coapa']},
 'ASCENCIO BARRIENTOS, CARLOS NORBERTO': {'Especialidad': ['GASTROENTEROLOGIA'],
  'Sitios': ['Punto Clinico- Marina',
   'Punto Clínico- Basílica',
   'LABORATORIO- Basílica']},
 'CHAYA SALGADO, SAID': {'Especialidad': ['DERMATOLOGIA'],
  'Sitios': ['Punto Clinico- Marina', 'LABORATORIO- Marina']},
 'CITALAN PROJ, HARRY WILLIAM': {'Especialidad': ['GASTROENTEROLOGIA'],
  'Sitios': ['Punto Clínico- Coapa']},
 'ENRIQUEZ ROMAN, ARLETTE': {'Especialidad': ['ENDOCRINOLOGIA'],
  'Sitios': ['Punto Clínico- Basílica',
   'Punto Clinico- Marina',
   'LABORATORIO- Marina',
   'LABORATORIO- Basílica']},
 'ERAZO PEREZ, LUSVI LUDGARDIZ': {'Especialidad': ['NEUMOLOGIA'],
  'Sitios': ['Punto Clínico- Coapa', 'LABORATORIO- Coapa']},
 'FRANCO ESTRADA, SILVIA': {'Especialidad': ['GASTROENTEROLOGIA'],
  'Sitios': ['Punto Clinico- Marina', 'Punto Clínico- Basílica']},
 'GARCIA CRUZ, LILIANA ESTHER': {'Especiali

In [86]:
pd.DataFrame.from_dict(meds_dict_ref, orient="index").to_csv("Doctors.csv")

**Specialities reference**

In [87]:
specialties_ref

{'GINECOLOGIA': ['GINECOLOGIA',
  'ENDOVAGINAL',
  'TRANSVAGINAL',
  'GINECO',
  'CERVIX',
  'VAGIN',
  'DIU',
  'OBSTETRIC',
  'EMBARAZ',
  'PROLACTIN',
  'COLPOSCOPIA',
  'PAPANICOLAOU'],
 'DERMATOLOGIA': ['DERMATOLOGIA'],
 'UROLOGIA': ['UROLOGIA', 'PROST', 'RENAL', 'PENESCOPIA', 'ANDROSCOPIA'],
 'PEDIATRIA': ['PEDIATRIA'],
 'ENDOCRINOLOGIA': ['ENDOCRINOLOGIA', 'TIROIDEO', 'TIROIDES'],
 'OTORRINOLARINGOLOGIA': ['OTORRINOLARINGOLOGIA',
  'OTORRINOLARINGO',
  'CERUMEN'],
 'GASTROENTEROLOGIA': ['GASTROENTEROLOGIA', 'GASTROENTEROLGIA'],
 'MEDICINA GENERAL': ['MEDICINA GENERAL'],
 'NEUMOLOGIA': ['NEUMOLOGIA'],
 'CARDIOLOGIA': ['CARDIOLOGIA'],
 'NUTRICION': ['NUTRICION'],
 'ALERGOLOGIA': ['ALERGOLOGIA'],
 'NEUROLOGIA': ['NEUROLOG'],
 'PROCTOLOGIA': ['PROCTOLOG']}

In [97]:
pd.DataFrame(dict([(key, pd.Series(val)) for key, val in specialties_ref.items()])).fillna("-").T.to_csv("spec_refs.csv")

**Business lines**

In [98]:
business_line_ref

{'CONSULTA': ['CONSULTA MEDICA',
  'CONSULTA ESPECIALISTA',
  'CONSULTA DE',
  'CONSULTA MEDICINA GENERAL',
  'CONSULTA GENERAL',
  'OTORRINOLARINGOLOGIA',
  'CARDIOLOGIA',
  'TELECONSULTA',
  'DIETA',
  'REVISION'],
 'LABORATORIO': ['CHECK UP',
  'PERFIL TIROIDEO',
  'BIOMETRIA HEMATICA',
  'QUIMICA DE',
  'HEMOGLOBINA',
  'INSULINA',
  'CULTIVO',
  'EXAMEN',
  'PRUEBA',
  'COPROCULTIVO',
  'AMILASA',
  'VDRL',
  'AMIBA',
  'HORMONA',
  'WINTROBE',
  'GLUCOSA',
  'PROLACTINA',
  'TIEMPO DE',
  'PERFIL',
  'GRUPO SANGUINEO',
  'VIRUS INMUNODEFICIENCIA',
  'HIV',
  'CITOLOGIA',
  'ANTICUERPOS',
  'ANTIGENO',
  'PROGESTERONA',
  'DEHIDROEPIANDROSTERONA',
  'TRANSAMINASA'],
 'ULTRASONIDO': ['ULTRASON'],
 'PROCEDIMIENTOS': ['ELECTROFULGURACION',
  'COLPOSCOPIA',
  'PAPANICOLAOU',
  'RASTREO',
  'INFOGRAFIA',
  'BIOPSIA',
  'BIOPISA',
  'APLICACION DE',
  'TOMA',
  'RETIRO',
  'EXTRACCCION',
  'INSERCION',
  'ENVIADO A',
  'CRIOTERAPIA',
  'SCOPIA',
  'CIRUGIA',
  'INYECCION',
  'ELECTROCAR

In [100]:
pd.DataFrame(dict([(key, pd.Series(val)) for key, val in business_line_ref.items()])).fillna("-").T.to_csv("BL_refs.csv")

---
---