# ACRO Tests

In [56]:
import os
import sys
import pandas as pd
import numpy as np

In [57]:
sys.path.insert(0, os.path.abspath(".."))

In [58]:
from acro import ACRO, add_constant, utils

### Instantiate ACRO

In [59]:
acro = ACRO()

INFO:acro:config: {'safe_threshold': 10, 'safe_dof_threshold': 10, 'safe_nk_n': 2, 'safe_nk_k': 0.9, 'safe_pratio_p': 0.1, 'check_missing_values': False}


### Load test data

In [60]:
from sklearn.datasets import fetch_openml

data = fetch_openml(data_id=26, as_frame=True)
df = data.data
df["recommend"] = data.target
df.head()

  warn(


Unnamed: 0,parents,has_nurs,form,children,housing,finance,social,health,recommend
0,usual,proper,complete,1,convenient,convenient,nonprob,recommended,recommend
1,usual,proper,complete,1,convenient,convenient,nonprob,priority,priority
2,usual,proper,complete,1,convenient,convenient,nonprob,not_recom,not_recom
3,usual,proper,complete,1,convenient,convenient,slightly_prob,recommended,recommend
4,usual,proper,complete,1,convenient,convenient,slightly_prob,priority,priority


### convert 'more than 3' children to random between 4 and 10

In [61]:
print(df["children"].unique())
df["children"].replace(to_replace={"more": "4"}, inplace=True)
df["children"] = pd.to_numeric(df["children"])
print(f"values before apply transforamtion: {df['children'].unique()}")

df["children"] = df.apply(
    lambda row: row["children"]
    if row["children"] in (1, 2, 3)
    else np.random.randint(4, 10),
    axis=1,
)
print(f"values after apply transformation: {df['children'].unique()}")

df.describe()

['1', '2', '3', 'more']
Categories (4, object): ['1', '2', '3', 'more']
values before apply transforamtion: [1 2 3 4]
values after apply transformation: [1 2 3 4 8 6 5 9 7]


Unnamed: 0,children
count,12960.0
mean,3.132099
std,2.254435
min,1.0
25%,1.75
50%,2.5
75%,3.25
max,9.0


### Pandas crosstab

In [62]:
table = pd.crosstab(df.recommend, df.parents)
table

parents,great_pret,pretentious,usual
recommend,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
not_recom,1440,1440,1440
priority,858,1484,1924
recommend,0,0,2
spec_prior,2022,1264,758
very_recom,0,132,196


### ACRO crosstab

In [63]:
safe_table = acro.crosstab(df.recommend, df.parents)
safe_table

INFO:acro:outcome_df:
parents      great_pret  pretentious        usual
recommend                                        
not_recom            ok           ok           ok
priority             ok           ok           ok
recommend   threshold;   threshold;   threshold; 
spec_prior           ok           ok           ok
very_recom  threshold;            ok           ok
INFO:acro:get_summary(): fail; threshold: 4 cells suppressed; 
INFO:acro:add_output(): output_0_2023-04-27-20431880


parents,great_pret,pretentious,usual
recommend,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
not_recom,1440.0,1440.0,1440.0
priority,858.0,1484.0,1924.0
recommend,,,
spec_prior,2022.0,1264.0,758.0
very_recom,,132.0,196.0


### ACRO crosstab with aggregation function

In [64]:
safe_table = acro.crosstab(df.recommend, df.parents, values=df.children, aggfunc="mean")
safe_table

INFO:acro:outcome_df:
parents                        great_pret                    pretentious  \
recommend                                                                  
not_recom                              ok                             ok   
priority                               ok                             ok   
recommend   threshold; p-ratio; nk-rule;   threshold; p-ratio; nk-rule;    
spec_prior                             ok                             ok   
very_recom  threshold; p-ratio; nk-rule;                              ok   

parents                             usual  
recommend                                  
not_recom                              ok  
priority                               ok  
recommend   threshold; p-ratio; nk-rule;   
spec_prior                             ok  
very_recom                             ok  
INFO:acro:get_summary(): fail; threshold: 4 cells suppressed; p-ratio: 4 cells suppressed; nk-rule: 4 cells suppressed; 
INFO:acro:add_ou

parents,great_pret,pretentious,usual
recommend,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
not_recom,3.119444,3.128472,3.145833
priority,2.592075,3.037736,3.117464
recommend,,,
spec_prior,3.355094,3.336234,3.403694
very_recom,,2.348485,2.255102


### ACRO crosstab with missing values

In [65]:
utils.CHECK_MISSING_VALUES = True

missing = df.children.copy()
missing[0:10] = np.NaN

safe_table = acro.crosstab(df.form, df.parents, values=missing, aggfunc="mean")
safe_table

INFO:acro:outcome_df:
parents    great_pret pretentious    usual
form                                      
complete                           missing
completed                                 
foster                                    
incomplete                                
INFO:acro:get_summary(): review; missing values found
INFO:acro:add_output(): output_2_2023-04-27-20431898


parents,great_pret,pretentious,usual
form,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
complete,3.099074,3.185185,3.139252
completed,3.122222,3.117593,3.137963
foster,3.174074,3.112963,3.089815
incomplete,3.10463,3.121296,3.200926


In [66]:
utils.CHECK_MISSING_VALUES = False

### ACRO crosstab with negative values

In [67]:
negative = df.children.copy()
negative[0:10] = -10
safe_table = acro.crosstab(df.form, df.parents, values=negative, aggfunc="mean")
safe_table

INFO:acro:outcome_df:
parents    great_pret pretentious     usual
form                                       
complete                           negative
completed                                  
foster                                     
incomplete                                 
INFO:acro:get_summary(): review; negative values found
INFO:acro:add_output(): output_3_2023-04-27-20431911


parents,great_pret,pretentious,usual
form,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
complete,3.099074,3.185185,3.017593
completed,3.122222,3.117593,3.137963
foster,3.174074,3.112963,3.089815
incomplete,3.10463,3.121296,3.200926


### ACRO pivot_table

In [68]:
table = acro.pivot_table(
    df, index=["parents"], values=["children"], aggfunc=["mean", "std"]
)
table

INFO:acro:outcome_df:
                mean      std
            children children
parents                      
great_pret        ok       ok
pretentious       ok       ok
usual             ok       ok
INFO:acro:get_summary(): pass
INFO:acro:add_output(): output_4_2023-04-27-20431920


Unnamed: 0_level_0,mean,std
Unnamed: 0_level_1,children,children
parents,Unnamed: 1_level_2,Unnamed: 2_level_2
great_pret,3.125,2.245651
pretentious,3.134259,2.254071
usual,3.137037,2.264051


### ACRO pivot_table with missing values

In [69]:
utils.CHECK_MISSING_VALUES = True

df.loc[0:10, "children"] = np.NaN

table = acro.pivot_table(
    df, index=["parents"], values=["children"], aggfunc=["mean", "std"]
)
table

INFO:acro:outcome_df:
                mean      std
            children children
parents                      
great_pret                   
pretentious                  
usual        missing  missing
INFO:acro:get_summary(): review; missing values found
INFO:acro:add_output(): output_5_2023-04-27-20431928


Unnamed: 0_level_0,mean,std
Unnamed: 0_level_1,children,children
parents,Unnamed: 1_level_2,Unnamed: 2_level_2
great_pret,3.125,2.245651
pretentious,3.134259,2.254071
usual,3.142492,2.264359


In [70]:
utils.CHECK_MISSING_VALUES = False

### ACRO pivot_table with negative values

In [71]:
df.loc[0:10, "children"] = -10

table = acro.pivot_table(
    df, index=["parents"], values=["children"], aggfunc=["mean", "std"]
)
table

INFO:acro:outcome_df:
                 mean       std
             children  children
parents                        
great_pret                     
pretentious                    
usual        negative  negative
INFO:acro:get_summary(): review; negative values found
INFO:acro:add_output(): output_6_2023-04-27-20431938


Unnamed: 0_level_0,mean,std
Unnamed: 0_level_1,children,children
parents,Unnamed: 1_level_2,Unnamed: 2_level_2
great_pret,3.125,2.245651
pretentious,3.134259,2.254071
usual,3.109028,2.356492


### ACRO OLS

In [72]:
print(df["recommend"].unique())
df["recommend"].replace(
    to_replace={
        "not_recom": "0",
        "priority": "1",
        "recommend": "2",
        "spec_prior": "3",
        "very_recom": "4",
    },
    inplace=True,
)
df["recommend"] = pd.to_numeric(df["recommend"])

['recommend', 'priority', 'not_recom', 'very_recom', 'spec_prior']
Categories (5, object): ['not_recom', 'priority', 'recommend', 'spec_prior', 'very_recom']


In [73]:
new_df = df[["recommend", "children"]]
new_df = new_df.dropna()

y = new_df["recommend"]
x = new_df["children"]
x = add_constant(x)

results = acro.ols(y, x)
results.summary()

INFO:acro:ols() outcome: pass; dof=12958.0 >= 10
INFO:acro:add_output(): output_7_2023-04-27-20431948


0,1,2,3
Dep. Variable:,recommend,R-squared:,0.001
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,9.277
Date:,"Thu, 27 Apr 2023",Prob (F-statistic):,0.00233
Time:,20:43:19,Log-Likelihood:,-21727.0
No. Observations:,12960,AIC:,43460.0
Df Residuals:,12958,BIC:,43470.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.3195,0.019,68.577,0.000,1.282,1.357
children,0.0151,0.005,3.046,0.002,0.005,0.025

0,1,2,3
Omnibus:,272555.46,Durbin-Watson:,2.417
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1401.456
Skew:,0.44,Prob(JB):,4.76e-305
Kurtosis:,1.65,Cond. No.,6.84


### ACRO OLSR

In [74]:
results = acro.olsr(formula="recommend ~ children", data=new_df)
results.summary()

INFO:acro:olsr() outcome: pass; dof=12958.0 >= 10
INFO:acro:add_output(): output_8_2023-04-27-20431954


0,1,2,3
Dep. Variable:,recommend,R-squared:,0.001
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,9.277
Date:,"Thu, 27 Apr 2023",Prob (F-statistic):,0.00233
Time:,20:43:19,Log-Likelihood:,-21727.0
No. Observations:,12960,AIC:,43460.0
Df Residuals:,12958,BIC:,43470.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.3195,0.019,68.577,0.000,1.282,1.357
children,0.0151,0.005,3.046,0.002,0.005,0.025

0,1,2,3
Omnibus:,272555.46,Durbin-Watson:,2.417
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1401.456
Skew:,0.44,Prob(JB):,4.76e-305
Kurtosis:,1.65,Cond. No.,6.84


### ACRO Probit

In [75]:
new_df = df[["finance", "children"]]
new_df = new_df.dropna()

y = new_df["finance"].astype("category").cat.codes  # numeric
y.name = "finance"
x = new_df["children"]
x = add_constant(x)

results = acro.probit(y, x)
results.summary()

INFO:acro:probit() outcome: pass; dof=12958.0 >= 10
INFO:acro:add_output(): output_9_2023-04-27-20431960


Optimization terminated successfully.
         Current function value: 0.693146
         Iterations 3


0,1,2,3
Dep. Variable:,finance,No. Observations:,12960.0
Model:,Probit,Df Residuals:,12958.0
Method:,MLE,Df Model:,1.0
Date:,"Thu, 27 Apr 2023",Pseudo R-squ.:,1.665e-06
Time:,20:43:19,Log-Likelihood:,-8983.2
converged:,True,LL-Null:,-8983.2
Covariance Type:,nonrobust,LLR p-value:,0.8627

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.0026,0.019,-0.140,0.889,-0.039,0.034
children,0.0008,0.005,0.173,0.863,-0.009,0.010


### ACRO Logit

In [76]:
results = acro.logit(y, x)
results.summary()

INFO:acro:logit() outcome: pass; dof=12958.0 >= 10
INFO:acro:add_output(): output_10_2023-04-27-20431973


Optimization terminated successfully.
         Current function value: 0.693146
         Iterations 3


0,1,2,3
Dep. Variable:,finance,No. Observations:,12960.0
Model:,Logit,Df Residuals:,12958.0
Method:,MLE,Df Model:,1.0
Date:,"Thu, 27 Apr 2023",Pseudo R-squ.:,1.665e-06
Time:,20:43:19,Log-Likelihood:,-8983.2
converged:,True,LL-Null:,-8983.2
Covariance Type:,nonrobust,LLR p-value:,0.8627

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.0042,0.030,-0.140,0.889,-0.062,0.054
children,0.0013,0.008,0.173,0.863,-0.014,0.016


### List current ACRO outputs

In [77]:
acro.print_outputs()

output_0_2023-04-27-20431880:
command: safe_table = acro.crosstab(df.recommend, df.parents)
summary: fail; threshold: 4 cells suppressed; 
outcome: parents      great_pret  pretentious        usual
recommend                                        
not_recom            ok           ok           ok
priority             ok           ok           ok
recommend   threshold;   threshold;   threshold; 
spec_prior           ok           ok           ok
very_recom  threshold;            ok           ok
output: [parents     great_pret  pretentious   usual
recommend                                  
not_recom       1440.0       1440.0  1440.0
priority         858.0       1484.0  1924.0
recommend          NaN          NaN     NaN
spec_prior      2022.0       1264.0   758.0
very_recom         NaN        132.0   196.0]
timestamp: 2023-04-27-20431880
comments: 


output_1_2023-04-27-20431889:
command: safe_table = acro.crosstab(df.recommend, df.parents, values=df.children, aggfunc="mean")
summary: fai

### Remove some ACRO outputs before finalising

In [78]:
output_1 = list(acro.results.keys())[1]
output_4 = list(acro.results.keys())[4]

acro.remove_output(output_1)
acro.remove_output(output_4)

INFO:acro:remove_output(): output_1_2023-04-27-20431889 removed
INFO:acro:remove_output(): output_4_2023-04-27-20431920 removed


### Rename ACRO outputs before finalising

In [79]:
acro.rename_output(list(acro.results.keys())[2], "pivot_table")

INFO:acro:rename_output(): output_3_2023-04-27-20431911 renamed to pivot_table_2023-04-27-20431911


### Add a comment to output

In [80]:
acro.add_comments(
    list(acro.results.keys())[0], "This is a cross table between year and grant_type"
)
acro.add_comments(list(acro.results.keys())[0], "6 cells were supressed in this table")

INFO:acro:a comment was added to output_0_2023-04-27-20431880
INFO:acro:a comment was added to output_0_2023-04-27-20431880


### Add an unsupported output to the list of outputs

In [81]:
acro.custom_output(
    "XandY.jfif", "This output is an image showing the relationship between X and Y"
)

INFO:acro:add_output(): output_11_2023-04-27-20431992


### Finalise ACRO

In [82]:
output = acro.finalise("test_results.json")

INFO:acro:Directory outputs/ already exists
INFO:acro:output written to: test_results.json


<class 'dict'>
{'output_0_2023-04-27-20431880': {'command': 'safe_table = acro.crosstab(df.recommend, df.parents)', 'summary': 'fail; threshold: 4 cells suppressed; ', 'outcome': '{"great_pret":{"not_recom":"ok","priority":"ok","recommend":"threshold; ","spec_prior":"ok","very_recom":"threshold; "},"pretentious":{"not_recom":"ok","priority":"ok","recommend":"threshold; ","spec_prior":"ok","very_recom":"ok"},"usual":{"not_recom":"ok","priority":"ok","recommend":"threshold; ","spec_prior":"ok","very_recom":"ok"}}', 'output': 'c:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\notebooks\\outputs\\output_0_2023-04-27-20431880.csv', 'timestamp': '2023-04-27-20431880', 'comments': 'This is a cross table between year and grant_type, 6 cells were supressed in this table'}, 'output_2_2023-04-27-20431898': {'command': 'safe_table = acro.crosstab(df.form, df.parents, values=missing, aggfunc="mean")', 'summary': 'review; missing values found', 'outcome': '{"great_pret":{"complete":"","completed":"","fo