source of data: [Abridged life tables by province](https://kosis.kr/statHtml/statHtml.do?orgId=101&tblId=DT_1B44&conn_path=I2&language=en), [	Abridged life tables](https://kosis.kr/statHtml/statHtml.do?orgId=101&tblId=DT_1B41&conn_path=I2&language=en),<br>[Life Tables for Korea, 2023 (PDF)](https://kostat.go.kr/board.es?mid=a20108060000&bid=11746&act=view&list_no=434451)

wiki > [Demographics of South Korea](https://en.wikipedia.org/wiki/Demographics_of_South_Korea#Life_expectancy) / [Население Республики Корея](https://ru.wikipedia.org/wiki/Население_Республики_Корея#Ожидаемая_продолжительность_жизни)

In [2]:
import pandas as pd
import math
import re

import sys
sys.path.append("..")
import mal_moduls_private.mal_total as mal

In [3]:
PROVINCE_1 = 'Gyeonggi-do'
PROVINCE_2 = 'Seoul'

In [4]:
df_provinces = pd.read_csv('data/South_Korea_2023 (UTF8).csv', skiprows=1,
                           usecols=["By province", "By age ",
                                    "Expectation of life at age (Total) (Years)",
                                    "Expectation of life at age (Male) (Years)",
                                    "Expectation of life at age (Female) (Years)"
                                   ]
                          ) \
                 .rename(columns={"By province": "province",
                                  "By age ": "age",
                                  "Expectation of life at age (Total) (Years)": "total",
                                  "Expectation of life at age (Male) (Years)": "male",
                                  "Expectation of life at age (Female) (Years)": "female"
                                 }
                        )

print(df_provinces.shape)
df_provinces.head()

(374, 5)


Unnamed: 0,province,age,total,male,female
0,Seoul,0,85.0,82.0,88.0
1,Seoul,1,84.2,81.2,87.1
2,Seoul,5,80.2,77.2,83.2
3,Seoul,10,75.2,72.2,78.2
4,Seoul,15,70.3,67.3,73.2


In [5]:
def allocate_region_from_df(df_total, region):
    df_region = df_total.loc[df_total['province'] == region]
    df_region = df_region.set_index('age')[['male', 'total', 'female']]
    df_region.index.name = ''
    return df_region

df_region_1 = allocate_region_from_df(df_provinces, PROVINCE_1)
df_region_2 = allocate_region_from_df(df_provinces, PROVINCE_2)

print(df_region_1.shape)
df_region_1

(22, 3)


Unnamed: 0,male,total,female
,,,
0.0,81.2,83.9,86.6
1.0,80.4,83.1,85.7
5.0,76.5,79.1,81.8
10.0,71.5,74.2,76.8
15.0,66.5,69.2,71.9
20.0,61.6,64.3,66.9
25.0,56.7,59.4,62.0
30.0,51.9,54.5,57.1
35.0,47.0,49.6,52.2


<br>

In [7]:
df_country = pd.read_csv('data/South_Korea_2023-whole_country (UTF8).csv', skiprows=1,
                           usecols=["By age",
                                    "Expectation of life at age (Total) (Years)",
                                    "Expectation of life at age (Male) (Years)",
                                    "Expectation of life at age (Female) (Years)"
                                   ]
                          ) \
                 .rename(columns={"By age": "age",
                                  "Expectation of life at age (Total) (Years)": "total",
                                  "Expectation of life at age (Male) (Years)": "male",
                                  "Expectation of life at age (Female) (Years)": "female"
                                 }
                        )

df_country.loc[:, 'age'] = df_country.loc[:, 'age'].map(lambda st: int(st.split(' ')[0]))

df_country = df_country.set_index('age')[['male', 'total', 'female']]
df_country.index.name = ''

print(df_country.shape)
df_country.head()

(22, 3)


Unnamed: 0,male,total,female
,,,
0.0,80.6,83.5,86.4
1.0,79.8,82.7,85.6
5.0,75.8,78.7,81.6
10.0,70.9,73.8,76.7
15.0,65.9,68.8,71.7


<br>

In [9]:
# combine dataframes
df = pd.concat([df_country, df_region_1, df_region_2], axis='columns')

df.columns = ['male', 'total', 'female', 'male_r1', 'total_r1', 'female_r1', 'male_r2', 'total_r2', 'female_r2']

# delete redundant dataFrames
del df_country, df_provinces, df_region_1, df_region_2

df.insert(loc=3,  column='fΔm', value=(df['female'] - df['male']).round(1))

df.insert(loc=7,  column='fΔm_r1', value=(df['female_r1'] - df['male_r1']).round(1))

df.insert(loc=11,  column='fΔm_r2', value=(df['female_r2'] - df['male_r2']).round(1))

print(df.shape)
df.loc[[0, 1, 5, 65, 80, 90, 95, 100]]

(22, 12)


Unnamed: 0,male,total,female,fΔm,male_r1,total_r1,female_r1,fΔm_r1,male_r2,total_r2,female_r2,fΔm_r2
,,,,,,,,,,,,
0.0,80.6,83.5,86.4,5.8,81.2,83.9,86.6,5.4,82.0,85.0,88.0,6.0
1.0,79.8,82.7,85.6,5.8,80.4,83.1,85.7,5.3,81.2,84.2,87.1,5.9
5.0,75.8,78.7,81.6,5.8,76.5,79.1,81.8,5.3,77.2,80.2,83.2,6.0
65.0,19.2,21.5,23.6,4.4,19.6,21.6,23.6,4.0,20.3,22.6,24.9,4.6
80.0,8.3,9.7,10.7,2.4,8.5,9.8,10.8,2.3,9.0,10.5,11.8,2.8
90.0,3.9,4.7,5.0,1.1,4.1,4.7,5.1,1.0,4.3,5.3,5.8,1.5
95.0,2.6,3.2,3.4,0.8,2.8,3.2,3.4,0.6,3.0,3.7,4.0,1.0
100.0,1.8,2.2,2.3,0.5,2.0,2.3,2.3,0.3,2.1,2.7,2.8,0.7


In [10]:
df.loc[[0, 1, 5, 65, 80, 90, 95, 100], ['male', 'male_r1', 'male_r2', 'female', 'female_r1', 'female_r2']]

Unnamed: 0,male,male_r1,male_r2,female,female_r1,female_r2
,,,,,,
0.0,80.6,81.2,82.0,86.4,86.6,88.0
1.0,79.8,80.4,81.2,85.6,85.7,87.1
5.0,75.8,76.5,77.2,81.6,81.8,83.2
65.0,19.2,19.6,20.3,23.6,23.6,24.9
80.0,8.3,8.5,9.0,10.7,10.8,11.8
90.0,3.9,4.1,4.3,5.0,5.1,5.8
95.0,2.6,2.8,3.0,3.4,3.4,4.0
100.0,1.8,2.0,2.1,2.3,2.3,2.8


In [11]:
def create_table(df, file_header, lang='en'):
    
    with open('design/' + file_header, mode='r', encoding="utf-8") as fh:
        table_header = fh.read()

    st = ''
    for age in df.index.to_list():
        ser = df.loc[age]

        st += '\n' + '|-\n' + \
            f'|style="padding-right:2ex;"| {age} ' + \
            f'||style="padding-right:2ex;background:#e0ffd8;"| {ser.loc['total']} ' + \
            f'||style="padding-right:2ex;background:#eaf3ff;"| {ser.loc['male']} ' + \
            f'||style="padding-right:2ex;background:#fee7f6;"| {ser.loc['female']} ' + \
            f'|| {ser.loc['fΔm']} ' + \
            f'||style="padding-right:2ex;border-left-width:2px;background:#e0ffd8;"| {ser.loc['total_r1']} ' + \
            f'||style="padding-right:2ex;background:#eaf3ff;"| {ser.loc['male_r1']} ' + \
            f'||style="padding-right:2ex;background:#fee7f6;"| {ser.loc['female_r1']} ' + \
            f'|| {ser.loc['fΔm_r1']} ' + \
            f'||style="padding-right:2ex;border-left-width:2px;background:#e0ffd8;"| {ser.loc['total_r2']} ' + \
            f'||style="padding-right:2ex;background:#eaf3ff;"| {ser.loc['male_r2']} ' + \
            f'||style="padding-right:2ex;background:#fee7f6;"| {ser.loc['female_r2']} ' + \
            f'|| {ser.loc['fΔm_r2']}'

    if lang == 'ru':
        st = re.sub('(?<=\\d)\\.(?=\\d)', ',', st)  # replace . to comma, if this . is between two digits

    st = table_header + st + '\n|}'
    return st


table_code = create_table(df, file_header='LE_header_extended -ru.txt', lang='ru')
with open('output/Table code for LE extended -ru.txt', 'w', encoding="utf-8") as fh:
    fh.write(table_code)

table_code = create_table(df, file_header='LE_header_extended -en.txt', lang='en')
with open('output/Table code for LE extended -en.txt', 'w', encoding="utf-8") as fh:
    fh.write(table_code)

<br>