# Example usage

To use `cat2cat` in a project:

### Load data

In [1]:
# cat2cat datasets
from cat2cat.datasets import load_trans, load_occup
trans = load_trans()
occup = load_occup()

### Low-level functions

In [2]:

# Low-level functions
from cat2cat.mappings import get_mappings, get_freqs, cat_apply_freq


mappings = get_mappings(trans)
occup = load_occup()
codes_new = occup.code[occup.year == 2010].map(str).values
freqs = get_freqs(codes_new)
mapp_new_p = cat_apply_freq(mappings["to_new"], freqs)
# base period mappings and their frequencies probabilities
(mappings["to_new"]['3481'], mapp_new_p['3481'])

(['441401', '441402', '441403', '441490'], [0.0, 0.6, 0.0, 0.4])

### cat2cat procedure

In [3]:
from cat2cat import cat2cat
from cat2cat.dataclass import cat2cat_data, cat2cat_mappings, cat2cat_ml

from pandas import DataFrame, concat
from sklearn.ensemble import RandomForestClassifier

o_2006 = occup.loc[occup.year == 2006, :].copy()
o_2008 = o_old = occup.loc[occup.year == 2008, :].copy()
o_2010 = o_new = occup.loc[occup.year == 2010, :].copy()
o_2012 = occup.loc[occup.year == 2012, :].copy()

data = cat2cat_data(o_old, o_new, "code", "code", "year")
mappings = cat2cat_mappings(trans, "backward")
ml = cat2cat_ml(o_new, "code", ["salary", "age"], [RandomForestClassifier()])

res = cat2cat(data, mappings, ml)
data_final = concat([res["old"], res["new"]])
data_final.sample(5)

Unnamed: 0,id,age,sex,edu,exp,district,parttime,salary,code,multiplier,year,code4,index_c2c,g_new_c2c,rep_c2c,wei_naive_c2c,wei_freq_c2c
43805,43806,48.959711,False,4,24.039711,14,1.0,23814.997211,814208,611.706547,2010,8142,10042,814208,1,1.0,1.0
50853,50854,33.503743,False,6,15.583743,4,1.0,15920.851078,752106,567.751734,2010,7521,17090,752106,1,1.0,1.0
16902,16903,33.809735,False,1,4.979735,3,0.67,19272.250602,2321,410.323637,2008,2321,5436,232003,34,0.029412,0.025046
18024,18025,30.503545,False,1,7.503545,8,1.0,220797.186407,2419,575.660107,2008,2419,21519,242107,46,0.021739,0.018519
19829,19830,39.112248,False,1,13.282248,8,1.0,46225.35613,2321,488.8157,2008,2321,44731,233010,34,0.029412,0.000928
