# Example usage

To use `cat2cat` in a project:

### Load data

In [1]:
# cat2cat datasets
from cat2cat.datasets import load_trans, load_occup
trans = load_trans()
occup = load_occup()

### Low-level functions

In [2]:

# Low-level functions
from cat2cat.mappings import get_mappings, get_freqs, cat_apply_freq


mappings = get_mappings(trans)
codes_new = occup.code[occup.year == 2010].map(str).values
freqs = get_freqs(codes_new)
mapp_new_p = cat_apply_freq(mappings["to_new"], freqs)
# base period mappings and their frequencies probabilities
(mappings["to_new"]['3481'], mapp_new_p['3481'])

(['441401', '441402', '441403', '441490'], [0.0, 0.6, 0.0, 0.4])

### cat2cat procedure

In [3]:
from cat2cat import cat2cat
from cat2cat.dataclass import cat2cat_data, cat2cat_mappings, cat2cat_ml

from pandas import DataFrame, concat

o_2006 = occup.loc[occup.year == 2006, :].copy()
o_2008 = o_old = occup.loc[occup.year == 2008, :].copy()
o_2010 = o_new = occup.loc[occup.year == 2010, :].copy()
o_2012 = occup.loc[occup.year == 2012, :].copy()

data = cat2cat_data(o_old, o_new, "code", "code", "year")
mappings = cat2cat_mappings(trans, "backward")

res = cat2cat(data, mappings)

data_final = concat([res["old"], res["new"]])
sub_cols = ["id", "edu", "code", "year", "index_c2c", "g_new_c2c", "rep_c2c", "wei_naive_c2c", "wei_freq_c2c"]
data_final.groupby(["year"]).sample(5).loc[:, sub_cols]

Unnamed: 0,id,edu,code,year,index_c2c,g_new_c2c,rep_c2c,wei_naive_c2c,wei_freq_c2c
70997,21733,1,2321,2008,70997,233001,34,0.029412,0.039889
53635,20505,4,3115,2008,53635,311512,13,0.076923,0.0
211310,32476,2,2321,2008,211310,233003,34,0.029412,0.000928
185096,30444,6,7415,2008,185096,751505,6,0.166667,0.166667
107709,24437,6,7242,2008,107709,741220,16,0.0625,0.006579
43103,43104,3,432303,2010,9340,432303,1,1.0,1.0
50782,50783,1,232003,2010,17019,232003,1,1.0,1.0
46563,46564,1,227101,2010,12800,227101,1,1.0,1.0
45265,45266,1,132103,2010,11502,132103,1,1.0,1.0
39614,39615,3,333101,2010,5851,333101,1,1.0,1.0


### With ML

In [4]:
from sklearn.neighbors import KNeighborsClassifier
ml = cat2cat_ml(o_new, "code", ["salary", "age"], [KNeighborsClassifier()])

# cat2cat