# OSN postprocessing

Spracovanie výstupu algoritmu do formy využiteľnej pre ďalšie analýzy

In [126]:
import pandas as pd
from tqdm.auto import tqdm
tqdm.pandas()

from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.worksheet.copier import WorksheetCopy

In [2]:
# Aby fungovalo nacitanie spolocnych funkcii, je nutne ich nainstalovat cez `pip install .` na urovni, kde sa nachadza pyproject.toml

from OSN_common.nacitanie_dat import nacitaj_vsetku_starostlivost, nacitaj_vystup_algoritmu, nacitaj_vsetku_starostlivost_s_ms, nacitaj_zoznam_ms, nacitaj_programovy_profil, nacitaj_zoznam_nemocnic, nacitaj_siet_nemocnic
from OSN_common.pomocne_funkcie import prirad_vekovu_kategoriu, zisti_uroven_ms


In [3]:
_rok = 2023
_verzia = 'v2024.2'
_prepinace_algoritmu = '-vnd'

# Pripojenie výstupu algoritmu ku všetkej starostlivosti

In [4]:
# vystup = nacitaj_vystup_algoritmu(_rok, _verzia, _prepinace_algoritmu)
# starostlivost = nacitaj_vsetku_starostlivost(_rok)
# final = starostlivost.merge(vystup, on='id_hp', how='left')

In [96]:
final = nacitaj_vsetku_starostlivost_s_ms(_rok, _verzia)

## Pripojenie úrovní medicínskych služieb

In [None]:
final['vekova_kategoria'] = final['vek_roky'].apply(prirad_vekovu_kategoriu)
final['ms_list'] = final['ms'].apply(lambda s: s.split('~') if pd.notna(s) else s)


In [None]:
zoznam_ms = nacitaj_zoznam_ms(_verzia)
zoznam_ms = zoznam_ms[~zoznam_ms['zdielana_ms']]

In [None]:
final_exploded = final[['id_hp', 'pzs_8', 'vekova_kategoria', 'ms_list']].explode('ms_list')
final_exploded = final_exploded.merge(zoznam_ms.filter(regex='kod_ms|uroven_'), left_on='ms_list', right_on='kod_ms', how='left')


In [None]:
final_exploded['uroven_ms'] = final_exploded.progress_apply(zisti_uroven_ms, axis=1).astype('Int16')

In [None]:
urovne_ms = final_exploded.groupby('id_hp')['uroven_ms'].progress_apply(lambda z: '~'.join([str(x) for x in z]))

In [None]:
final = final.merge(urovne_ms, on='id_hp')
final['hlavna_ms'] = final['ms_list'].str[0]

## MS na úrovne programu 

In [105]:
all = nacitaj_vsetku_starostlivost_s_ms(_rok, _verzia)

In [124]:
final = all[['id_hp', 'ms', 'uroven_ms', 'pzs_6']].copy()

programovy_profil = nacitaj_programovy_profil(2024, _verzia)
cisla_programov = programovy_profil['cislo_programu'].unique()

final.loc[:, 'cislo_programu'] = final['ms'].str.split('~').str[0].str[1:3].astype(int)
final.loc[:, 'uroven_hlavnej_ms'] = final['uroven_ms'].str.split('~').str[0]
final.loc[:, 'uroven_hlavnej_ms'] = final['uroven_hlavnej_ms'].fillna('<NA>')

zoznam_nemocnic = nacitaj_zoznam_nemocnic()
kraje = zoznam_nemocnic['kraj'].unique()

final = final.reset_index().merge(nacitaj_siet_nemocnic(_rok).index.to_frame().set_index('pzs_6')[['uroven_nemocnice']], how='left', left_on='pzs_6', right_index=True)
final['uroven_nemocnice'] = final['uroven_nemocnice'].fillna(1).astype(int)
final = final.merge(zoznam_nemocnic[['pzs_6', 'kraj']], how='left', on='pzs_6')
final = final.merge(programovy_profil[['cislo_programu', 'nazov_programu']].drop_duplicates(), how='left', on='cislo_programu')

# Create a MultiIndex with all combinations of uroven_nemocnice and uroven_hlavnej_ms
full_column_index = pd.MultiIndex.from_product([range(1, 6), ['1', '2', '3', '4', '5', '<NA>']], names=['uroven_nemocnice', 'uroven_hlavnej_ms'])

In [None]:
def create_pivot_table(data):
    # Create the pivot table
    pivot = pd.pivot_table(data, values='id_hp', index=['cislo_programu'], columns=['uroven_nemocnice', 'uroven_hlavnej_ms'], aggfunc='count').fillna(0).astype(int)

    # Reindex the pivot table to include all combinations
    pivot = pivot.reindex(columns=full_column_index, fill_value=0)

    # Reindex to include all cislo_programov values
    pivot = pivot.reindex(cisla_programov, fill_value=0)
    
    return pivot

def write_pivot_into_template(ws, pivot):
    # starting with row 3, rewrite every second row wirh data from pivot table
    curr_row = 3
    curr_col = 2
    
    for r in dataframe_to_rows(pivot, index=False, header=False):
        for c, value in enumerate(r, curr_col):
            ws.cell(row=curr_row, column=c, value=value)
        curr_row += 2

template_name = f'TEMPLATE_pocet_MS_na_uroven_programu_iba_hlavne_{_rok}_{_verzia}.xlsx'
output_name = f'Vystupy/pocet_MS_na_uroven_programu_iba_hlavne_{_rok}_{_verzia}_podla_krajov.xlsx'

wb = load_workbook(template_name)

ws = wb.active
pivot = create_pivot_table(final)

write_pivot_into_template(ws, pivot)

for kraj in kraje:
    ws = wb.copy_worksheet(wb.active)
    ws.title = kraj
    
    pivot = create_pivot_table(final[final['kraj'] == kraj])
    
    write_pivot_into_template(ws, pivot)            

wb.save(output_name)



# pzs_6 = 'P43059'

# ws = wb.create_sheet(pzs_6)

# pivot = pd.pivot_table(final[final['pzs_6'] == pzs_6], values='id_hp', index=['cislo_programu', 'nazov_programu'], columns=['pzs_6', 'uroven_nemocnice', 'uroven_hlavnej_ms'], aggfunc='count').fillna(0).astype(int)

# for r in dataframe_to_rows(pivot):
#     ws.append(r)
    
# wb.save(output_name)

# Export

In [None]:
vystupne_stlpce = ['id_hp', 'vek_roky', 'vek_dni', 'hmotnost', 'upv', 'diagnozy', 'vykony', 'drg', 'erv', 'typ_starostlivosti', 'typ_hospitalizacie', 'druh_prijatia', 'dovod_prijatia', 'dovod_prepustenia', 'posledna_dgn_prijem', 'posledna_dgn_prepustenie', 'posledny_pohyb_poistenca', 'diagnozy_z_01', 'vykony_z_01', 'novorodenec', 'obdobie_od', 'obdobie_do', 'datum_od', 'datum_do', 'osetrovacia_doba', 'id_poistenca', 'datum_narodenia', 'pohlavie', 'kod_pobytu', 'kod_zp', 'pzs_12', 'IDENTIFZAR', 'pzs_8', 'pzs_6', 'pzs_ico', 'id_hp_pzs', 'ms', 'uroven_ms']

final.filter(vystupne_stlpce).to_csv(f'Vystupy/osn_vsetka_starostlivost_{_rok}_ms_{_verzia}.csv', sep=';', index=False)


In [None]:
final.groupby(['hlavna_ms', 'pzs_8'])['id_hp'].count().rename('pocet_hospitalizacnych_pripadov').to_csv(f'Vystupy/pocet_MS_na_nemocnicu_iba_hlavne_{_rok}_{_verzia}.csv')
final_exploded.rename({'ms_list': 'ms'}, axis=1).groupby(['ms', 'pzs_8'])['id_hp'].count().rename('pocet_hospitalizacnych_pripadov').to_csv(f'Vystupy/pocet_MS_na_nemocnicu_{_rok}_{_verzia}.csv')


In [None]:
final.groupby(['hlavna_ms', 'pzs_ico', 'typ_hospitalizacie'])['id_hp'].count().rename('pocet_hospitalizacnych_pripadov').to_csv(f'Vystupy/pocet_MS_na_ico_iba_hlavne_{_rok}_{_verzia}.csv')

## Kontrola s pôvodným výstupom

In [None]:
final_old = nacitaj_vsetku_starostlivost_s_ms(_rok, _verzia)

merged = final.merge(final_old[['id_hp', 'ms']], on='id_hp', how='outer')

In [None]:
len(merged[merged['ms_x'] != merged['ms_y']])

In [None]:
a=merged[merged['ms_x'] != merged['ms_y']]