# Example usage

To use `cat2cat` in a project:

### Load data

In [1]:
# cat2cat datasets
from cat2cat.datasets import load_trans, load_occup
trans = load_trans()
occup = load_occup()

/Users/maciejnasinski/python/cat2cat/src/cat2cat/data/trans.csv
/Users/maciejnasinski/python/cat2cat/src/cat2cat/data/occup.pkl


### Low-level functions

In [2]:

# Low-level functions
from cat2cat.mappings import get_mappings, get_freqs, cat_apply_freq


mappings = get_mappings(trans)
occup = load_occup()
codes_new = occup.code[occup.year == 2010].map(str).values
freqs = get_freqs(codes_new)
mapp_new_p = cat_apply_freq(mappings["to_new"], freqs)
# base period mappings and their frequencies probabilities
(mappings["to_new"]['3481'], mapp_new_p['3481'])

/Users/maciejnasinski/python/cat2cat/src/cat2cat/data/occup.pkl


(['441401', '441402', '441403', '441490'], [0.0, 0.6, 0.0, 0.4])

### cat2cat procedure

In [3]:
from cat2cat import cat2cat
from cat2cat.dataclass import cat2cat_data, cat2cat_mappings, cat2cat_ml

from pandas import DataFrame, concat
from sklearn.ensemble import RandomForestClassifier

o_2006 = occup.loc[occup.year == 2006, :].copy()
o_2008 = o_old = occup.loc[occup.year == 2008, :].copy()
o_2010 = o_new = occup.loc[occup.year == 2010, :].copy()
o_2012 = occup.loc[occup.year == 2012, :].copy()

data = cat2cat_data(o_old, o_new, "code", "code", "year")
mappings = cat2cat_mappings(trans, "backward")
ml = cat2cat_ml(o_new, "code", ["salary", "age"], [RandomForestClassifier()])

res = cat2cat(data, mappings, ml)
data_final = concat([res["old"], res["new"]])
data_final.sample(5)

Unnamed: 0,id,age,sex,edu,exp,district,parttime,salary,code,multiplier,year,code4,index_c2c,g_new_c2c,rep_c2c,wei_naive_c2c,wei_freq_c2c
37880,37881,35.081697,True,4,11.501697,14,1.0,38956.635541,432103,620.721854,2010,4321,4117,432103,1,1.0,1.0
31277,31278,29.404688,True,1,2.574688,17,0.333333,7271.85735,3119,381.5345,2008,3119,196609,311901,33,0.030303,0.0
20643,20644,29.474568,False,1,5.144568,6,0.183333,4918.169395,2321,533.942083,2008,2321,55456,233012,34,0.029412,0.119666
33164,33165,58.755668,True,6,39.505668,17,0.5,13541.599624,5159,505.933189,2008,5159,220297,541908,23,0.043478,0.0
26742,26743,65.973851,False,1,47.973851,2,1.0,57177.518505,1211,321.174403,2008,1211,136440,112007,4,0.25,0.4
