# Example usage

To use `cat2cat` in a project:

### Load data

In [9]:
# cat2cat datasets
from cat2cat.datasets import load_trans, load_occup
trans = load_trans()
occup = load_occup()

### Low-level functions

In [10]:

# Low-level functions
from cat2cat.mappings import get_mappings, get_freqs, cat_apply_freq


mappings = get_mappings(trans)
codes_new = occup.code[occup.year == 2010].map(str).values
freqs = get_freqs(codes_new)
mapp_new_p = cat_apply_freq(mappings["to_new"], freqs)
# base period mappings and their frequencies probabilities
(mappings["to_new"]['3481'], mapp_new_p['3481'])

(['441401', '441402', '441403', '441490'], [0.0, 0.6, 0.0, 0.4])

### cat2cat procedure

In [13]:
from cat2cat import cat2cat
from cat2cat.dataclass import cat2cat_data, cat2cat_mappings, cat2cat_ml

from pandas import DataFrame, concat
from sklearn.ensemble import RandomForestClassifier

o_2006 = occup.loc[occup.year == 2006, :].copy()
o_2008 = o_old = occup.loc[occup.year == 2008, :].copy()
o_2010 = o_new = occup.loc[occup.year == 2010, :].copy()
o_2012 = occup.loc[occup.year == 2012, :].copy()

data = cat2cat_data(o_old, o_new, "code", "code", "year")
mappings = cat2cat_mappings(trans, "backward")
ml = cat2cat_ml(o_new, "code", ["salary", "age"], [RandomForestClassifier()])

res = cat2cat(data, mappings, ml)
data_final = concat([res["old"], res["new"]])
sub_cols = [0,1,2,8,10,12,13,14,15,16]
data_final.groupby(["year"]).sample(5).iloc[:, sub_cols]

Unnamed: 0,id,age,sex,code,year,index_c2c,g_new_c2c,rep_c2c,wei_naive_c2c,wei_freq_c2c
207895,32214,31.947866,False,2321,2008,207895,233017,34,0.029412,0.010204
2186,16699,55.489737,False,1212,2008,2186,112010,16,0.0625,0.040541
52633,20425,20.008143,True,9321,2008,52633,932911,18,0.055556,0.088889
224147,33474,44.746327,True,2321,2008,224147,233020,34,0.029412,0.071429
94624,23434,52.756364,True,7222,2008,94624,722290,8,0.125,0.215447
43533,43534,63.787755,True,932901,2010,9770,932901,1,1.0,1.0
40731,40732,41.236292,False,831207,2010,6968,831207,1,1.0,1.0
50017,50018,60.597606,False,222101,2010,16254,222101,1,1.0,1.0
43750,43751,52.941089,False,512090,2010,9987,512090,1,1.0,1.0
43884,43885,40.383556,True,312202,2010,10121,312202,1,1.0,1.0
