In [61]:
from pathlib import Path
import pandas as pd
from typing import Dict, Union
import json

SOURCE_PATH = Path('/home/jovyan/work/source/')

class BornMortFeatures():
    def __init__(self, 
                 source_path: str = SOURCE_PATH):
        with open(SOURCE_PATH / 'id2featureproxy.json') as f:
            self.id2featureproxy = json.load(f)
        self.bornstat_df = pd.read_csv(SOURCE_PATH / 'bornstat.csv', index_col = 'Year')
        self.babybooms_df = pd.read_csv(SOURCE_PATH / 'babybooms.csv', 
                                        index_col = 'year')
        self.deathcause_df_dict = {gender: 
                                   pd.read_csv(SOURCE_PATH / 'deathcause' / f'{gender}.csv',
                                               index_col = 0, 
                                               names = [f'deathcause_{x}' for x in [47, 93, 99, 159]]) 
                                   for gender in 
                                   ['male', 'female']
                                  }
    def shorten_range(self, 
                      num: int,
                      start: int, 
                      end: int) -> int:
        return min(end, max(start, num))
    
    def get_born_data(self,
                      born_year: int) -> Dict[str, float]:
        born_year = self.shorten_range(born_year, 1959, 1988)
        return self.bornstat_df.loc[born_year, :]
    
    def get_deathcause_data(self,
                            born_year: int,
                            gender: str) -> Dict[str, float]:
        born_year = self.shorten_range(born_year, 1959, 1988)
        return self.deathcause_df_dict[gender].loc[born_year, :]
    
    def get_babybooms_data(self, 
                           born_year: int) -> Dict[str, float]:
        return self.babybooms_df.loc[born_year, :]
    
    def get_featureproxy_by_id(self, 
                              user_id: int) -> Dict[str, Union[str, int]]:
        return self.id2featureproxy[user_id]
    
    def get_features_by_id(self, 
                           user_id: int) -> Dict[str, float]:
        features_proxy = self.get_featureproxy_by_id(user_id)
        features = {}
        features.update(self.get_born_data(features_proxy['born_year']))
        features.update(self.get_deathcause_data(features_proxy['born_year'], features_proxy['gender']))
        features.update(self.get_babybooms_data(features_proxy['born_year']))
        return features 
        
        
        
# USAGE EXAMPLE
BornMortFeatures().get_features_by_id('101366310')

{'BraOAT15': 0.0588331213514659,
 'BraOAT20': 0.3061152452371223,
 'BraOAT25': 0.2910194505353261,
 'BraOAT30': 0.1927615386238635,
 'BraOAT35': 0.1073702546276313,
 'BraOAT40': 0.0377711401555045,
 'BraOAT45': 0.005333118473884,
 'BraOAT50': 0.0007961309952021,
 'deathcause_47': 0.0448735574719826,
 'deathcause_93': 0.0781433570803649,
 'deathcause_99': 0.0475232262633837,
 'deathcause_159': 0.0350008332291796,
 'babyboom_delta': 0.197196261682243}

In [68]:
! tar -czf /home/jovyan/work/source.tar.gz /home/jovyan/work/source

tar: Removing leading `/' from member names
