source of data: [Abridged life tables by province](https://kosis.kr/statHtml/statHtml.do?orgId=101&tblId=DT_1B44&conn_path=I2&language=en), [	Abridged life tables](https://kosis.kr/statHtml/statHtml.do?orgId=101&tblId=DT_1B41&conn_path=I2&language=en),<br>[Life Tables for Korea, 2023 (PDF)](https://kostat.go.kr/board.es?mid=a20108060000&bid=11746&act=view&list_no=434451)

wiki > [List of South Korean regions by life expectancy](https://en.wikipedia.org/wiki/List_of_South_Korean_regions_by_life_expectancy) / [Продолжительность жизни в регионах Республики Корея](https://ru.wikipedia.org/wiki/Продолжительность_жизни_в_регионах_Республики_Корея)

In [2]:
import pandas as pd
import math
import re

import sys
sys.path.append("..")
import mal_moduls_private.mal_total as mal

In [3]:
PROVINCE_1 = 'Gyeonggi-do'
PROVINCE_2 = 'Seoul'

In [4]:
df_provinces = pd.read_csv('data/South_Korea_2023 (UTF8).csv', skiprows=1,
                           usecols=["By province", "By age ",
                                    "Number of people left alive at age (Total) (Person)",
                                    "Number of people left alive at age (Male) (Person)",
                                    "Number of people left alive at age (Female) (Person)"
                                   ]
                          ) \
                 .rename(columns={"By province": "province",
                                  "By age ": "age",
                                  "Number of people left alive at age (Total) (Person)": "total",
                                  "Number of people left alive at age (Male) (Person)": "male",
                                  "Number of people left alive at age (Female) (Person)": "female"
                                 }
                        )

print(df_provinces.shape)
df_provinces.head()

(374, 5)


Unnamed: 0,province,age,total,male,female
0,Seoul,0,100000,100000,100000
1,Seoul,1,99800,99771,99831
2,Seoul,5,99764,99744,99787
3,Seoul,10,99745,99722,99769
4,Seoul,15,99689,99668,99712


In [5]:
def allocate_region_from_df(df_total, region):
    df_region = df_total.loc[df_total['province'] == region]
    df_region = df_region.set_index('age')[['male', 'total', 'female']]
    df_region.index.name = ''
    return df_region

df_region_1 = allocate_region_from_df(df_provinces, PROVINCE_1)
df_region_2 = allocate_region_from_df(df_provinces, PROVINCE_2)

print(df_region_1.shape)
df_region_1

(22, 3)


Unnamed: 0,male,total,female
,,,
0.0,100000.0,100000.0,100000.0
1.0,99791.0,99803.0,99815.0
5.0,99733.0,99744.0,99756.0
10.0,99692.0,99703.0,99715.0
15.0,99634.0,99650.0,99666.0
20.0,99498.0,99534.0,99571.0
25.0,99300.0,99376.0,99452.0
30.0,99029.0,99159.0,99290.0
35.0,98708.0,98904.0,99101.0


<br>

In [7]:
df_country = pd.read_csv('data/South_Korea_2023-whole_country (UTF8).csv', skiprows=1,
                           usecols=["By age",
                                    "Number of people left alive at age (Total)",
                                    "Number of people left alive at age (Male)",
                                    "Number of people left alive at age (Female)"
                                   ]
                          ) \
                 .rename(columns={"By age": "age",
                                  "Number of people left alive at age (Total)": "total",
                                  "Number of people left alive at age (Male)": "male",
                                  "Number of people left alive at age (Female)": "female"
                                 }
                        )

df_country.loc[:, 'age'] = df_country.loc[:, 'age'].map(lambda st: int(st.split(' ')[0]))

df_country = df_country.set_index('age')[['male', 'total', 'female']]
df_country.index.name = ''

print(df_country.shape)
df_country.head()

(22, 3)


Unnamed: 0,male,total,female
,,,
0.0,100000.0,100000.0,100000.0
1.0,99732.0,99758.0,99786.0
5.0,99673.0,99703.0,99734.0
10.0,99635.0,99666.0,99699.0
15.0,99582.0,99613.0,99646.0


<br>

In [9]:
# combine dataframes
df = pd.concat([df_country, df_region_1, df_region_2], axis='columns')

df.columns = ['male', 'total', 'female', 'male_r1', 'total_r1', 'female_r1', 'male_r2', 'total_r2', 'female_r2']

# delete redundant dataFrames
del df_country, df_provinces, df_region_1, df_region_2

df.insert(loc=3,  column='fΔm', value=df['female'] - df['male'])  # rounding is not used here since numbers are integers
df.insert(loc=4,  column='ratio_fm', value=(df['female'] / df['male']).round(2))

df.insert(loc=8,  column='fΔm_r1', value=df['female_r1'] - df['male_r1'])
df.insert(loc=9,  column='ratio_fm_r1', value=(df['female_r1'] / df['male_r1']).round(2))

df.insert(loc=13,  column='fΔm_r2', value=df['female_r2'] - df['male_r2'])
df.insert(loc=14,  column='ratio_fm_r2', value=(df['female_r2'] / df['male_r2']).round(2))

print(df.shape)
df.loc[[0, 1, 5, 65, 80, 90, 95, 100]]

(22, 15)


Unnamed: 0,male,total,female,fΔm,ratio_fm,male_r1,total_r1,female_r1,fΔm_r1,ratio_fm_r1,male_r2,total_r2,female_r2,fΔm_r2,ratio_fm_r2
,,,,,,,,,,,,,,,
0.0,100000.0,100000.0,100000.0,0.0,1.0,100000.0,100000.0,100000.0,0.0,1.0,100000.0,100000.0,100000.0,0.0,1.0
1.0,99732.0,99758.0,99786.0,54.0,1.0,99791.0,99803.0,99815.0,24.0,1.0,99771.0,99800.0,99831.0,60.0,1.0
5.0,99673.0,99703.0,99734.0,61.0,1.0,99733.0,99744.0,99756.0,23.0,1.0,99744.0,99764.0,99787.0,43.0,1.0
65.0,89007.0,91902.0,94798.0,5791.0,1.07,90075.0,92579.0,95083.0,5008.0,1.06,90365.0,92913.0,95462.0,5097.0,1.06
80.0,63640.0,72729.0,81819.0,18179.0,1.29,65254.0,73689.0,82125.0,16871.0,1.26,67765.0,76154.0,84544.0,16779.0,1.25
90.0,22445.0,33235.0,44024.0,21579.0,1.96,24341.0,34289.0,44238.0,19897.0,1.82,27420.0,39089.0,50759.0,23339.0,1.85
95.0,6799.0,13109.0,19419.0,12620.0,2.86,7903.0,13750.0,19598.0,11695.0,2.48,9789.0,17890.0,25991.0,16202.0,2.66
100.0,966.0,2794.0,4621.0,3655.0,4.78,1268.0,2985.0,4702.0,3434.0,3.71,1854.0,5000.0,8147.0,6293.0,4.39


In [10]:
df.loc[[0, 1, 5, 65, 80, 90, 95, 100], ['male', 'male_r1', 'male_r2', 'female', 'female_r1', 'female_r2']]

Unnamed: 0,male,male_r1,male_r2,female,female_r1,female_r2
,,,,,,
0.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
1.0,99732.0,99791.0,99771.0,99786.0,99815.0,99831.0
5.0,99673.0,99733.0,99744.0,99734.0,99756.0,99787.0
65.0,89007.0,90075.0,90365.0,94798.0,95083.0,95462.0
80.0,63640.0,65254.0,67765.0,81819.0,82125.0,84544.0
90.0,22445.0,24341.0,27420.0,44024.0,44238.0,50759.0
95.0,6799.0,7903.0,9789.0,19419.0,19598.0,25991.0
100.0,966.0,1268.0,1854.0,4621.0,4702.0,8147.0


In [11]:
def create_table(df, file_header, lang='en'):

    def transform_value(x, prec=1):
        return f"{x/1000:0.{prec}f}"

    
    with open('design/' + file_header, mode='r', encoding="utf-8") as fh:
        table_header = fh.read()

    st = ''
    for age in df.index.to_list()[1:]:
        ser = df.loc[age]

        st += '\n' + '|-\n' + \
            f'|style="padding-right:2ex;"| {age} ' + \
            f'||style="color:blue;padding-right:3ex;"| {transform_value(ser.loc['male'])} ' + \
            f'||style="color:red;padding-right:3ex;"| {transform_value(ser.loc['female'])} ' + \
            f'||style="color:darkgray;padding-right:3ex;"| {transform_value(ser.loc['fΔm'])} ' + \
            f'||style="color:darkgray;"| {ser.loc['ratio_fm']:0.2f} ' + \
            f'||style="color:blue;padding-right:3ex;border-left-width:2px;"| {transform_value(ser.loc['male_r1'])} ' + \
            f'||style="color:red;padding-right:3ex;"| {transform_value(ser.loc['female_r1'])} ' + \
            f'||style="color:darkgray;padding-right:3ex;"| {transform_value(ser.loc['fΔm_r1'])} ' + \
            f'||style="color:darkgray;"| {ser.loc['ratio_fm_r1']:0.2f} ' + \
            f'||style="color:blue;padding-right:3ex;border-left-width:2px;"| {transform_value(ser.loc['male_r2'])} ' + \
            f'||style="color:red;padding-right:3ex;"| {transform_value(ser.loc['female_r2'])} ' + \
            f'||style="color:darkgray;padding-right:3ex;"| {transform_value(ser.loc['fΔm_r2'])} ' + \
            f'||style="color:darkgray;"| {ser.loc['ratio_fm_r2']:0.2f}'

    if lang == 'ru':
        st = re.sub('(?<=\\d)\\.(?=\\d)', ',', st)  # replace . to comma, if this . is between two digits

    st = table_header + st + '\n|}'
    return st


table_code = create_table(df, file_header='Perc_surviving_header_extended -ru.txt', lang='ru')
with open('output/Table code for percentage surviving extended -ru.txt', 'w', encoding="utf-8") as fh:
    fh.write(table_code)

table_code = create_table(df, file_header='Perc_surviving_header_extended -en.txt', lang='en')
with open('output/Table code for percentage surviving extended -en.txt', 'w', encoding="utf-8") as fh:
    fh.write(table_code)