# ACRO Tests

In [1]:
import os
import sys
import pandas as pd
import numpy as np

In [2]:
sys.path.insert(0, os.path.abspath(".."))

In [3]:
from acro import ACRO, add_constant, utils

### Instantiate ACRO

In [4]:
acro = ACRO()

INFO:acro:config: {'safe_threshold': 10, 'safe_dof_threshold': 10, 'safe_nk_n': 2, 'safe_nk_k': 0.9, 'safe_pratio_p': 0.1, 'check_missing_values': False}


### Load test data

In [5]:
# path = os.path.join("../data", "test_data.dta")
# df = pd.read_stata(path)
# df.head()
from sklearn.datasets import fetch_openml

data = fetch_openml(data_id=26, as_frame=True)
df = data.data
df["recommend"] = data.target
# print(df.describe())
df.head()

  warn(


Unnamed: 0,parents,has_nurs,form,children,housing,finance,social,health,recommend
0,usual,proper,complete,1,convenient,convenient,nonprob,recommended,recommend
1,usual,proper,complete,1,convenient,convenient,nonprob,priority,priority
2,usual,proper,complete,1,convenient,convenient,nonprob,not_recom,not_recom
3,usual,proper,complete,1,convenient,convenient,slightly_prob,recommended,recommend
4,usual,proper,complete,1,convenient,convenient,slightly_prob,priority,priority


### convert 'more than 3' children to random between 4 and 10

In [14]:
print(df["children"].unique())
df["children"].replace(to_replace={"more": "4"}, inplace=True)
df["children"] = pd.to_numeric(df["children"])
print(f"values before apply transforamtion: {df['children'].unique()}")

df["children"] = df.apply(
    lambda row: row["children"]
    if row["children"] in (1, 2, 3)
    else np.random.randint(4, 10),
    axis=1,
)
print(f"values after apply transformation: {df['children'].unique()}")

df.describe()

[1 2 3 5 9 7 4 8 6]
values before apply transforamtion: [1 2 3 5 9 7 4 8 6]
values after apply transformation: [1 2 3 5 6 9 7 8 4]


Unnamed: 0,children
count,12960.0
mean,3.13642
std,2.265549
min,1.0
25%,1.75
50%,2.5
75%,3.25
max,9.0


### Pandas crosstab

In [7]:
table = pd.crosstab(df.recommend, df.parents)
table

parents,great_pret,pretentious,usual
recommend,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
not_recom,1440,1440,1440
priority,858,1484,1924
recommend,0,0,2
spec_prior,2022,1264,758
very_recom,0,132,196


### ACRO crosstab

In [8]:
safe_table = acro.crosstab(df.recommend, df.parents)
safe_table

INFO:acro:outcome_df:
parents      great_pret  pretentious        usual
recommend                                        
not_recom            ok           ok           ok
priority             ok           ok           ok
recommend   threshold;   threshold;   threshold; 
spec_prior           ok           ok           ok
very_recom  threshold;            ok           ok
INFO:acro:get_summary(): fail; threshold: 4 cells suppressed; 
INFO:acro:add_output(): output_0_2023-04-27-19542171


parents,great_pret,pretentious,usual
recommend,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
not_recom,1440.0,1440.0,1440.0
priority,858.0,1484.0,1924.0
recommend,,,
spec_prior,2022.0,1264.0,758.0
very_recom,,132.0,196.0


### ACRO crosstab with aggregation function

In [9]:
safe_table = acro.crosstab(df.recommend, df.parents, values=df.children, aggfunc="mean")
safe_table

INFO:acro:outcome_df:
parents                        great_pret                    pretentious  \
recommend                                                                  
not_recom                              ok                             ok   
priority                               ok                             ok   
recommend   threshold; p-ratio; nk-rule;   threshold; p-ratio; nk-rule;    
spec_prior                             ok                             ok   
very_recom  threshold; p-ratio; nk-rule;                              ok   

parents                             usual  
recommend                                  
not_recom                              ok  
priority                               ok  
recommend   threshold; p-ratio; nk-rule;   
spec_prior                             ok  
very_recom                             ok  
INFO:acro:get_summary(): fail; threshold: 4 cells suppressed; p-ratio: 4 cells suppressed; nk-rule: 4 cells suppressed; 
INFO:acro:add_ou

parents,great_pret,pretentious,usual
recommend,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
not_recom,3.101389,3.113889,3.109028
priority,2.594406,3.037062,3.110187
recommend,,,
spec_prior,3.388724,3.307753,3.369393
very_recom,,2.242424,2.239796


### ACRO crosstab with missing values

In [25]:
utils.CHECK_MISSING_VALUES = True

missing = df.children.copy()
missing[0:10] = np.NaN

safe_table = acro.crosstab(df.form, df.parents, values=missing, aggfunc="mean")
safe_table

INFO:acro:outcome_df:
parents    great_pret pretentious    usual
form                                      
complete                           missing
completed                                 
foster                                    
incomplete                                
INFO:acro:get_summary(): review; missing values found
INFO:acro:add_output(): output_10_2023-04-27-20084191


parents,great_pret,pretentious,usual
form,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
complete,3.126852,3.125,3.161682
completed,3.15463,3.087037,3.173148
foster,3.137037,3.111111,3.133333
incomplete,3.156481,3.178704,3.112037


In [26]:
utils.CHECK_MISSING_VALUES = False

### ACRO crosstab with negative values

In [27]:
negative = df.children.copy()
negative[0:10] = -10
safe_table = acro.crosstab(df.form, df.parents, values=negative, aggfunc="mean")
safe_table

INFO:acro:outcome_df:
parents    great_pret pretentious     usual
form                                       
complete                           negative
completed                                  
foster                                     
incomplete                                 
INFO:acro:get_summary(): review; negative values found
INFO:acro:add_output(): output_11_2023-04-27-20092201


parents,great_pret,pretentious,usual
form,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
complete,3.126852,3.125,3.039815
completed,3.15463,3.087037,3.173148
foster,3.137037,3.111111,3.133333
incomplete,3.156481,3.178704,3.112037


### ACRO pivot_table

In [24]:
table = acro.pivot_table(
    df, index=["parents"], values=["children"], aggfunc=["mean", "std"]
)
table

INFO:acro:outcome_df:
                mean      std
            children children
parents                      
great_pret        ok       ok
pretentious       ok       ok
usual             ok       ok
INFO:acro:get_summary(): pass
INFO:acro:add_output(): output_8_2023-04-27-19250153


Unnamed: 0_level_0,mean,std
Unnamed: 0_level_1,children,children
parents,Unnamed: 1_level_2,Unnamed: 2_level_2
great_pret,3.103241,2.212384
pretentious,3.121759,2.239532
usual,3.132407,2.254078


### ACRO pivot_table with missing values

In [27]:
utils.CHECK_MISSING_VALUES = True

df.loc[0:10, "children"] = np.NaN

table = acro.pivot_table(
    df, index=["parents"], values=["children"], aggfunc=["mean", "std"]
)
table

INFO:acro:outcome_df:
                mean      std
            children children
parents                      
great_pret                   
pretentious                  
usual        missing  missing
INFO:acro:get_summary(): review; missing values found
INFO:acro:add_output(): output_10_2023-04-27-19263597


Unnamed: 0_level_0,mean,std
Unnamed: 0_level_1,children,children
parents,Unnamed: 1_level_2,Unnamed: 2_level_2
great_pret,3.103241,2.212384
pretentious,3.121759,2.239532
usual,3.137851,2.254373


In [28]:
utils.CHECK_MISSING_VALUES = False

### ACRO pivot_table with negative values

In [29]:
df.loc[0:10, "children"] = -10

table = acro.pivot_table(
    df, index=["parents"], values=["children"], aggfunc=["mean", "std"]
)
table

INFO:acro:outcome_df:
                 mean       std
             children  children
parents                        
great_pret                     
pretentious                    
usual        negative  negative
INFO:acro:get_summary(): review; negative values found
INFO:acro:add_output(): output_11_2023-04-27-19265430


Unnamed: 0_level_0,mean,std
Unnamed: 0_level_1,children,children
parents,Unnamed: 1_level_2,Unnamed: 2_level_2
great_pret,3.103241,2.212384
pretentious,3.121759,2.239532
usual,3.104398,2.346857


### ACRO OLS

In [33]:
print(df["recommend"].unique())
df["recommend"].replace(
    to_replace={
        "not_recom": "0",
        "priority": "1",
        "recommend": "2",
        "spec_prior": "3",
        "very_recom": "4",
    },
    inplace=True,
)
df["recommend"] = pd.to_numeric(df["recommend"])
print(df["recommend"].unique())

# df.describe()

[2 1 0 4 3]
[2 1 0 4 3]


In [34]:
new_df = df[["recommend", "children"]]
new_df = new_df.dropna()

y = new_df["recommend"]
x = new_df["children"]
x = add_constant(x)

results = acro.ols(y, x)
results.summary()

INFO:acro:ols() outcome: pass; dof=12958.0 >= 10
INFO:acro:add_output(): output_12_2023-04-27-19375775


0,1,2,3
Dep. Variable:,recommend,R-squared:,0.001
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,7.025
Date:,"Thu, 27 Apr 2023",Prob (F-statistic):,0.00805
Time:,19:37:57,Log-Likelihood:,-21728.0
No. Observations:,12960,AIC:,43460.0
Df Residuals:,12958,BIC:,43470.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.3255,0.019,68.694,0.000,1.288,1.363
children,0.0133,0.005,2.651,0.008,0.003,0.023

0,1,2,3
Omnibus:,261578.361,Durbin-Watson:,2.416
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1403.968
Skew:,0.44,Prob(JB):,1.36e-305
Kurtosis:,1.649,Cond. No.,6.83


### ACRO OLSR

In [35]:
results = acro.olsr(formula="recommend ~ children", data=new_df)
results.summary()

INFO:acro:olsr() outcome: pass; dof=12958.0 >= 10
INFO:acro:add_output(): output_13_2023-04-27-19383360


0,1,2,3
Dep. Variable:,recommend,R-squared:,0.001
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,7.025
Date:,"Thu, 27 Apr 2023",Prob (F-statistic):,0.00805
Time:,19:38:33,Log-Likelihood:,-21728.0
No. Observations:,12960,AIC:,43460.0
Df Residuals:,12958,BIC:,43470.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.3255,0.019,68.694,0.000,1.288,1.363
children,0.0133,0.005,2.651,0.008,0.003,0.023

0,1,2,3
Omnibus:,261578.361,Durbin-Watson:,2.416
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1403.968
Skew:,0.44,Prob(JB):,1.36e-305
Kurtosis:,1.649,Cond. No.,6.83


### ACRO Probit

In [49]:
new_df = df[["finance", "children"]]
new_df = new_df.dropna()

y = new_df["finance"].astype("category").cat.codes  # numeric
y.name = "finance"
x = new_df["children"]
x = add_constant(x)

results = acro.probit(y, x)
results.summary()

INFO:acro:probit() outcome: pass; dof=12958.0 >= 10
INFO:acro:add_output(): output_15_2023-04-27-19491702


Optimization terminated successfully.
         Current function value: 0.693114
         Iterations 3


0,1,2,3
Dep. Variable:,finance,No. Observations:,12960.0
Model:,Probit,Df Residuals:,12958.0
Method:,MLE,Df Model:,1.0
Date:,"Thu, 27 Apr 2023",Pseudo R-squ.:,4.775e-05
Time:,19:49:17,Log-Likelihood:,-8982.8
converged:,True,LL-Null:,-8983.2
Covariance Type:,nonrobust,LLR p-value:,0.3543

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.0140,0.019,-0.749,0.454,-0.051,0.023
children,0.0045,0.005,0.926,0.354,-0.005,0.014


### ACRO Logit

In [50]:
results = acro.logit(y, x)
results.summary()

INFO:acro:logit() outcome: pass; dof=12958.0 >= 10
INFO:acro:add_output(): output_16_2023-04-27-19492044


Optimization terminated successfully.
         Current function value: 0.693114
         Iterations 3


0,1,2,3
Dep. Variable:,finance,No. Observations:,12960.0
Model:,Logit,Df Residuals:,12958.0
Method:,MLE,Df Model:,1.0
Date:,"Thu, 27 Apr 2023",Pseudo R-squ.:,4.775e-05
Time:,19:49:20,Log-Likelihood:,-8982.8
converged:,True,LL-Null:,-8983.2
Covariance Type:,nonrobust,LLR p-value:,0.3543

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.0223,0.030,-0.748,0.454,-0.081,0.036
children,0.0072,0.008,0.926,0.354,-0.008,0.022


### List current ACRO outputs

In [36]:
acro.print_outputs()

output_0_2023-04-27-19161061:
command: safe_table = acro.crosstab(df.recommend, df.parents)
summary: fail; threshold: 4 cells suppressed; 
outcome: parents      great_pret  pretentious        usual
recommend                                        
not_recom            ok           ok           ok
priority             ok           ok           ok
recommend   threshold;   threshold;   threshold; 
spec_prior           ok           ok           ok
very_recom  threshold;            ok           ok
output: [parents     great_pret  pretentious   usual
recommend                                  
not_recom       1440.0       1440.0  1440.0
priority         858.0       1484.0  1924.0
recommend          NaN          NaN     NaN
spec_prior      2022.0       1264.0   758.0
very_recom         NaN        132.0   196.0]
timestamp: 2023-04-27-19161061
comments: 


output_1_2023-04-27-19161631:
command: safe_table = acro.crosstab(df.recommend, df.parents, values=df.children, aggfunc="mean")
summary: fai

### Remove some ACRO outputs before finalising

In [37]:
output_1 = list(acro.results.keys())[1]
output_4 = list(acro.results.keys())[4]

acro.remove_output(output_1)
acro.remove_output(output_4)

INFO:acro:remove_output(): output_1_2023-04-27-19161631 removed
INFO:acro:remove_output(): output_4_2023-04-27-19214775 removed


### Rename ACRO outputs before finalising

In [38]:
acro.rename_output(list(acro.results.keys())[2], "pivot_table")

INFO:acro:rename_output(): output_3_2023-04-27-19211026 renamed to pivot_table_2023-04-27-19211026


### Add a comment to output

In [39]:
acro.add_comments(
    list(acro.results.keys())[0], "This is a cross table between year and grant_type"
)
acro.add_comments(list(acro.results.keys())[0], "6 cells were supressed in this table")

INFO:acro:a comment was added to output_0_2023-04-27-19161061
INFO:acro:a comment was added to output_0_2023-04-27-19161061


### Add an unsupported output to the list of outputs

In [40]:
acro.custom_output(
    "XandY.jfif", "This output is an image showing the relationship between X and Y"
)

INFO:acro:add_output(): output_14_2023-04-27-19393314


### Finalise ACRO

In [42]:
# output = acro.finalise("test_results.xlsx")
output = acro.finalise("test_results.json")

INFO:acro:Directory outputs/ already exists
INFO:acro:output written to: test_results.json
