# Example usage

To use `cat2cat` in a project:

### Load data

In [21]:
# cat2cat datasets
from cat2cat.datasets import load_trans, load_occup
trans = load_trans()
occup = load_occup()

### Low-level functions

In [22]:

# Low-level functions
from cat2cat.mappings import get_mappings, get_freqs, cat_apply_freq


mappings = get_mappings(trans)
codes_new = occup.code[occup.year == 2010].map(str).values
freqs = get_freqs(codes_new)
mapp_new_p = cat_apply_freq(mappings["to_new"], freqs)
# base period mappings and their frequencies probabilities
(mappings["to_new"]['3481'], mapp_new_p['3481'])

(['441401', '441402', '441403', '441490'], [0.0, 0.6, 0.0, 0.4])

### cat2cat procedure

In [23]:
from cat2cat import cat2cat
from cat2cat.dataclass import cat2cat_data, cat2cat_mappings, cat2cat_ml

from pandas import DataFrame, concat
from sklearn.ensemble import RandomForestClassifier

o_2006 = occup.loc[occup.year == 2006, :].copy()
o_2008 = o_old = occup.loc[occup.year == 2008, :].copy()
o_2010 = o_new = occup.loc[occup.year == 2010, :].copy()
o_2012 = occup.loc[occup.year == 2012, :].copy()

data = cat2cat_data(o_old, o_new, "code", "code", "year")
mappings = cat2cat_mappings(trans, "backward")
ml = cat2cat_ml(o_new, "code", ["salary", "age"], [RandomForestClassifier()])

res = cat2cat(data, mappings, ml)
data_final = concat([res["old"], res["new"]])
sub_cols = ["id", "edu", "code", "year", "index_c2c", "g_new_c2c", "rep_c2c", "wei_naive_c2c", "wei_freq_c2c"]
data_final.groupby(["year"]).sample(5).loc[:, sub_cols]

Unnamed: 0,id,edu,code,year,index_c2c,g_new_c2c,rep_c2c,wei_naive_c2c,wei_freq_c2c
222634,33349,6,7121,2008,222634,711201,4,0.25,0.035714
108644,24494,3,2242,2008,108644,223290,9,0.111111,0.018182
123520,25750,6,9132,2008,123520,941201,18,0.055556,0.093144
187485,30627,1,2321,2008,187485,233006,34,0.029412,0.067718
23236,18140,1,2419,2008,23236,242105,46,0.021739,0.0
37447,37448,1,235914,2010,3684,235914,1,1.0,1.0
39214,39215,4,411003,2010,5451,411003,1,1.0,1.0
35735,35736,6,522304,2010,1972,522304,1,1.0,1.0
38611,38612,1,112006,2010,4848,112006,1,1.0,1.0
47756,47757,1,234201,2010,13993,234201,1,1.0,1.0
