# ACRO Tests

In [2]:
import os
import sys
import pandas as pd
import numpy as np

In [3]:
sys.path.insert(0, os.path.abspath(".."))

In [4]:
from acro import ACRO, acro_tables, add_constant, utils

### Instantiate ACRO

In [5]:
acro = ACRO(suppress=False)

INFO:acro:version: 0.4.3
INFO:acro:config: {'safe_threshold': 10, 'safe_dof_threshold': 10, 'safe_nk_n': 2, 'safe_nk_k': 0.9, 'safe_pratio_p': 0.1, 'check_missing_values': False, 'survival_safe_threshold': 10, 'zeros_are_disclosive': True}
INFO:acro:automatic suppression: False


### Load test data

In [6]:
path = os.path.join("../data", "test_data.dta")
df = pd.read_stata(path)
df.head()

Unnamed: 0,charity,grant_type,index,year,inc_activity,inc_grants,inc_donations,inc_other,inc_total,total_costs,...,sh_staff_grants_given,sh_assets_grants_given,sh_income_balance,sh_staff_balance,sh_assets_balance,sh_income_assets,sh_staff_assets,sh_income_staff_costs,sh_assets_staff_costs,wgt
0,4Children,R,1.0,2011,2880902.0,9603182.0,91404.0,310947.0,12886435.0,12127472.0,...,,,0.072636,0.135971,0.767809,0.094602,0.17709,0.534203,5.646843,1.0
1,4Children,R,1.0,2014,6810520.0,18768904.0,58002.0,401879.0,26039304.0,25493796.0,...,,,0.057641,0.08915,1.001396,0.05756,0.089026,0.646561,11.232729,1.0
2,4Children,R,1.0,2015,7199403.0,21638036.0,132191.0,512654.0,29482284.0,32290108.0,...,,,-0.049619,-0.079828,-0.62021,0.080004,0.128711,0.621583,7.769365,1.0
3,4Children,R,1.0,2013,5573013.0,15194731.0,228844.0,267156.0,21263744.0,20989048.0,...,,,0.04574,0.068251,1.008259,0.045365,0.067692,0.670166,14.772749,1.0
4,4Children,R,1.0,2010,2056816.0,7335103.0,110256.0,424628.0,9926803.0,9769816.0,...,,,0.057696,0.122532,0.567539,0.10166,0.215901,0.470862,4.631749,1.0


### Pandas crosstab

In [None]:
table = pd.crosstab(df.year, df.grant_type)
table

grant_type,G,N,R,R/G
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010,15,59,71,8
2011,15,59,71,8
2012,15,59,71,8
2013,15,59,71,8
2014,15,59,71,8
2015,15,59,71,8


### ACRO crosstab

In [None]:
safe_table = acro.crosstab(df.year, df.grant_type)
safe_table

INFO:acro:get_summary(): fail; threshold: 6 cells suppressed; 
INFO:acro:outcome_df:
---------------------------------------|
grant_type |G   |N   |R   |R/G         |
year       |    |    |    |            |
---------------------------------------|
2010       | ok | ok | ok | threshold; |
2011       | ok | ok | ok | threshold; |
2012       | ok | ok | ok | threshold; |
2013       | ok | ok | ok | threshold; |
2014       | ok | ok | ok | threshold; |
2015       | ok | ok | ok | threshold; |
---------------------------------------|

INFO:acro:records:add(): output_1


grant_type,G,N,R,R/G
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010,15,59,71,
2011,15,59,71,
2012,15,59,71,
2013,15,59,71,
2014,15,59,71,
2015,15,59,71,


### ACRO crosstab with supression

In [None]:
acro.suppress = True

safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc="mean")
safe_table

INFO:acro:get_summary(): fail; threshold: 6 cells suppressed; p-ratio: 2 cells suppressed; nk-rule: 1 cells suppressed; 
INFO:acro:outcome_df:
----------------------------------------------------------------|
grant_type |G   |N          |R   |R/G                           |
year       |    |           |    |                              |
----------------------------------------------------------------|
2010       | ok | p-ratio;  | ok | threshold; p-ratio; nk-rule; |
2011       | ok |        ok | ok |                   threshold; |
2012       | ok |        ok | ok |                   threshold; |
2013       | ok |        ok | ok |                   threshold; |
2014       | ok |        ok | ok |                   threshold; |
2015       | ok |        ok | ok |                   threshold; |
----------------------------------------------------------------|

INFO:acro:records:add(): output_2


grant_type,G,N,R,R/G
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010,9921906.0,,8402284.0,
2011,8502247.0,124013.859375,7716880.0,
2012,11458580.0,131859.0625,6958050.5,
2013,13557147.0,147937.796875,7202273.5,
2014,13748147.0,133198.25,8277525.5,
2015,11133433.0,146572.1875,10812888.0,


### ACRO crosstab with supression and totals

In [7]:
acro.suppress = False
table = acro.crosstab(
    df.year,
    [df.grant_type, df.survivor],
    values=df.inc_grants,
    aggfunc="count",
    margins=True,
)
print(table)

INFO:acro:get_summary(): fail; threshold: 26 cells may need suppressing; p-ratio: 8 cells may need suppressing; nk-rule: 7 cells may need suppressing; 


p-ratio 379        0.0
496        NaN
625    47690.0
Name: __dummy__, dtype: float32
p-ratio 142       93000.0
147    57600000.0
174    46628000.0
225           0.0
445        3000.0
537     3066000.0
695           0.0
786           0.0
831    14175000.0
840     8670000.0
855           0.0
916     8624000.0
Name: __dummy__, dtype: float32
p-ratio 11     NaN
18     NaN
26     NaN
42     NaN
50     NaN
60     NaN
69     0.0
75     NaN
90     NaN
101    NaN
103    0.0
130    NaN
136    NaN
157    NaN
164    NaN
191    NaN
202    NaN
211    NaN
218    NaN
239    NaN
245    NaN
249    NaN
283    NaN
301    NaN
309    0.0
325    NaN
347    NaN
350    NaN
387    NaN
392    NaN
401    NaN
407    NaN
435    NaN
451    NaN
458    NaN
473    NaN
482    NaN
510    NaN
519    NaN
533    NaN
571    NaN
576    0.0
588    NaN
597    NaN
619    NaN
631    NaN
652    NaN
663    NaN
677    NaN
678    0.0
697    NaN
705    NaN
710    NaN
714    NaN
734    NaN
747    NaN
803    NaN
870    NaN
897    NaN
Na

ValueError: An error occurred with the following details:
 Name: p-ratio
 Mask:            agg_p_percent                                           \
grant_type             G                           N            R   
survivor    Dead in 2015 Alive in 2015 Alive in 2015 Dead in 2015   
year                                                                
2010                True         False          True        False   
2011                True         False         False        False   
2012                True         False         False        False   
2013                True         False         False        False   
2014                True         False         False        False   
2015                True         False         False        False   
All                False         False         False        False   

                                               
grant_type                         R/G    All  
survivor   Alive in 2015 Alive in 2015         
year                                           
2010               False          True  False  
2011               False         False  False  
2012               False         False  False  
2013               False         False  False  
2014               False         False  False  
2015               False         False  False  
All                False         False  False  
 Table: grant_type            G                          N                          R  \
survivor   Dead in 2015 Alive in 2015 Dead in 2015 Alive in 2015 Dead in 2015   
year                                                                            
2010                  2            12          0.0             5           40   
2011                  3            12          0.0            58           45   
2012                  3            12          0.0            59           45   
2013                  3            12          0.0            59           47   
2014                  3            12          0.0            59           43   
2015                  3             9          0.0            58           28   
All                  17            69          NaN           298          248   

grant_type                        R/G                All  
survivor   Alive in 2015 Dead in 2015 Alive in 2015       
year                                                      
2010                  20          0.0             4   83  
2011                  24          0.0             8  150  
2012                  24          0.0             8  151  
2013                  24          0.0             8  153  
2014                  24          0.0             8  149  
2015                  23          0.0             8  129  
All                  139          NaN            44  815  

In [11]:
acro.suppress = False

### ACRO crosstab with aggregation function

In [12]:
safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc="mean")
safe_table

INFO:acro:get_summary(): fail; threshold: 6 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; 
INFO:acro:outcome_df:
----------------------------------------------------------------|
grant_type |G   |N          |R   |R/G                           |
year       |    |           |    |                              |
----------------------------------------------------------------|
2010       | ok | p-ratio;  | ok | threshold; p-ratio; nk-rule; |
2011       | ok |        ok | ok |                   threshold; |
2012       | ok |        ok | ok |                   threshold; |
2013       | ok |        ok | ok |                   threshold; |
2014       | ok |        ok | ok |                   threshold; |
2015       | ok |        ok | ok |                   threshold; |
----------------------------------------------------------------|

INFO:acro:records:add(): output_4


grant_type,G,N,R,R/G
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010,9921906.0,0.0,8402284.0,11636000.0
2011,8502247.0,124013.859375,7716880.0,16047500.0
2012,11458580.0,131859.0625,6958050.5,16810000.0
2013,13557147.0,147937.796875,7202273.5,16765625.0
2014,13748147.0,133198.25,8277525.5,17845750.0
2015,11133433.0,146572.1875,10812888.0,18278624.0


### ACRO crosstab with multiple aggregation functions and totals

In [13]:
safe_table = acro.crosstab(
    df.year, df.grant_type, values=df.inc_grants, aggfunc=["mean", "std"], margins=True
)
safe_table

INFO:acro:get_summary(): fail; threshold: 12 cells may need suppressing; p-ratio: 4 cells may need suppressing; nk-rule: 2 cells may need suppressing; 
INFO:acro:outcome_df:
------------------------------------------------------------------------------------------------------------------------|
           mean                                                   |std                                                  |
grant_type G    N          R   R/G                            All |G   N          R   R/G                            All|
year                                                              |                                                     |
------------------------------------------------------------------------------------------------------------------------|
2010        ok   p-ratio;   ok  threshold; p-ratio; nk-rule;   ok | ok  p-ratio;   ok  threshold; p-ratio; nk-rule;   ok|
2011        ok          ok  ok                    threshold;   ok | ok         ok  ok         

Unnamed: 0_level_0,mean,mean,mean,mean,mean,std,std,std,std,std
grant_type,G,N,R,R/G,All,G,N,R,R/G,All
year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2010,9921906.0,0.0,8402284.0,11636000.0,8308286.0,18550550.0,0.0,30595570.0,17010880.0,27273980.0
2011,8502247.0,124013.859375,7716880.0,16047500.0,5303808.0,16885950.0,205959.492903,29543220.0,15616380.0,21376580.0
2012,11458580.0,131859.0625,6958050.5,16810000.0,5259893.0,20610900.0,210476.539175,27211840.0,16464490.0,20264000.0
2013,13557147.0,147937.796875,7202273.5,16765625.0,5605045.5,24868440.0,203747.417017,29898330.0,16711120.0,22517870.0
2014,13748147.0,133198.25,8277525.5,17845750.0,6117054.5,31345590.0,181865.92558,35463480.0,17412510.0,26417220.0
2015,11133433.0,146572.1875,10812888.0,18278624.0,6509989.5,25539190.0,201602.800832,41309350.0,17304710.0,27846360.0
All,11412787.0,134431.890625,8098502.0,16648273.0,5997796.5,22832200.0,198873.726656,32044950.0,15835320.0,24053240.0


### ACRO crosstab with missing values

In [12]:
acro_tables.CHECK_MISSING_VALUES = True

missing = df.inc_grants.copy()
missing[0:10] = np.NaN

safe_table = acro.crosstab(
    df.year, df.grant_type, values=missing, aggfunc="mean", margins=True
)
safe_table

INFO:acro:get_summary(): review; missing values found
INFO:acro:outcome_df:
-------------------------------------------------------|
grant_type |G        |N        |R        |R/G      |All|
year       |         |         |         |         |   |
-------------------------------------------------------|
2010       | missing | missing | missing | missing |   |
2011       |         | missing | missing |         |   |
2012       |         |         | missing |         |   |
2013       |         | missing | missing |         |   |
2014       |         | missing | missing |         |   |
2015       | missing | missing | missing |         |   |
All        |         |         |         |         |   |
-------------------------------------------------------|

INFO:acro:records:add(): output_4


grant_type,G,N,R,R/G,All
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010,9921906.0,0.0,8420372.0,11636000.0,8320154.5
2011,8502247.0,125663.226562,7689140.5,16047500.0,5310392.0
2012,11458580.0,131859.0625,6896304.0,16810000.0,5220580.5
2013,13557147.0,150488.453125,7088096.0,16765625.0,5578657.0
2014,13748147.0,135494.78125,8118565.0,17845750.0,6072600.0
2015,11133433.0,149143.625,10596385.0,18278624.0,6442131.0
All,11412787.0,136158.859375,8006360.5,16648273.0,5968295.5


In [13]:
utils.CHECK_MISSING_VALUES = False

### ACRO crosstab with negative values

In [14]:
negative = df.inc_grants.copy()
negative[0:10] = -10

safe_table = acro.crosstab(df.year, df.grant_type, values=negative, aggfunc="mean")
safe_table

INFO:acro:get_summary(): review; negative values found
INFO:acro:outcome_df:
----------------------------------------|
grant_type |G |N         |R         |R/G|
year       |  |          |          |   |
----------------------------------------|
2010       |  |          | negative |   |
2011       |  | negative | negative |   |
2012       |  |          | negative |   |
2013       |  | negative | negative |   |
2014       |  | negative | negative |   |
2015       |  | negative | negative |   |
----------------------------------------|

INFO:acro:records:add(): output_5


grant_type,G,N,R,R/G
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010,9921906.0,0.0,8280032.5,11636000.0
2011,8502247.0,123496.445312,7577703.5,16047500.0
2012,11458580.0,131859.0625,6796357.5,16810000.0
2013,13557147.0,147937.625,6988263.5,16765625.0
2014,13748147.0,133198.078125,7997392.5,17845750.0
2015,11133433.0,146572.015625,10388613.0,18278624.0


### ACRO pivot_table

In [15]:
table = acro.pivot_table(
    df, index=["grant_type"], values=["inc_grants"], aggfunc=["mean", "std"]
)
table

INFO:acro:get_summary(): review; missing values found
INFO:acro:outcome_df:
---------------------------------|
           mean       |std       |
           inc_grants |inc_grants|
grant_type            |          |
---------------------------------|
G           missing   | missing  |
N           missing   | missing  |
R           missing   | missing  |
R/G         missing   | missing  |
---------------------------------|

INFO:acro:records:add(): output_6


Unnamed: 0_level_0,mean,std
Unnamed: 0_level_1,inc_grants,inc_grants
grant_type,Unnamed: 1_level_2,Unnamed: 2_level_2
G,11412790.0,22832200.0
N,134431.9,198873.7
R,8098502.0,32044950.0
R/G,16648270.0,15835320.0


### ACRO pivot_table with missing values

In [16]:
utils.CHECK_MISSING_VALUES = True

df.loc[0:10, "inc_grants"] = np.NaN

table = acro.pivot_table(
    df, index=["grant_type"], values=["inc_grants"], aggfunc=["mean", "std"]
)
table

INFO:acro:get_summary(): review; missing values found
INFO:acro:outcome_df:
---------------------------------|
           mean       |std       |
           inc_grants |inc_grants|
grant_type            |          |
---------------------------------|
G           missing   | missing  |
N           missing   | missing  |
R           missing   | missing  |
R/G         missing   | missing  |
---------------------------------|

INFO:acro:records:add(): output_7


Unnamed: 0_level_0,mean,std
Unnamed: 0_level_1,inc_grants,inc_grants
grant_type,Unnamed: 1_level_2,Unnamed: 2_level_2
G,11412790.0,22832200.0
N,136470.0,199933.5
R,8006360.0,32282160.0
R/G,16648270.0,15835320.0


In [17]:
utils.CHECK_MISSING_VALUES = False

### ACRO pivot_table with negative values

In [18]:
df.loc[0:10, "inc_grants"] = -10

table = acro.pivot_table(
    df, index=["grant_type"], values=["inc_grants"], aggfunc=["mean", "std"]
)
table

INFO:acro:get_summary(): review; negative values found
INFO:acro:outcome_df:
---------------------------------|
           mean       |std       |
           inc_grants |inc_grants|
grant_type            |          |
---------------------------------|
G                     |          |
N           negative  | negative |
R           negative  | negative |
R/G                   |          |
---------------------------------|

INFO:acro:records:add(): output_8


Unnamed: 0_level_0,mean,std
Unnamed: 0_level_1,inc_grants,inc_grants
grant_type,Unnamed: 1_level_2,Unnamed: 2_level_2
G,11412790.0,22832200.0
N,134180.0,199019.6
R,7882231.0,32045580.0
R/G,16648270.0,15835320.0


### ACRO OLS

In [19]:
new_df = df[["inc_activity", "inc_grants", "inc_donations", "total_costs"]]
new_df = new_df.dropna()

y = new_df["inc_activity"]
x = new_df[["inc_grants", "inc_donations", "total_costs"]]
x = add_constant(x)

results = acro.ols(y, x)
results.summary()

INFO:acro:ols() outcome: pass; dof=807.0 >= 10


INFO:acro:records:add(): output_9


0,1,2,3
Dep. Variable:,inc_activity,R-squared:,0.894
Model:,OLS,Adj. R-squared:,0.893
Method:,Least Squares,F-statistic:,2261.0
Date:,"Tue, 26 Sep 2023",Prob (F-statistic):,0.0
Time:,16:44:14,Log-Likelihood:,-14495.0
No. Observations:,811,AIC:,29000.0
Df Residuals:,807,BIC:,29020.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,3.01e+05,5.33e+05,0.565,0.572,-7.45e+05,1.35e+06
inc_grants,-0.8846,0.025,-35.956,0.000,-0.933,-0.836
inc_donations,-0.6647,0.016,-40.721,0.000,-0.697,-0.633
total_costs,0.8313,0.011,78.674,0.000,0.811,0.852

0,1,2,3
Omnibus:,1339.956,Durbin-Watson:,1.414
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1253317.706
Skew:,9.899,Prob(JB):,0.0
Kurtosis:,194.566,Cond. No.,105000000.0


### ACRO OLSR

In [20]:
results = acro.olsr(
    formula="inc_activity ~ inc_grants + inc_donations + total_costs", data=new_df
)
results.summary()

INFO:acro:olsr() outcome: pass; dof=807.0 >= 10


INFO:acro:records:add(): output_10


0,1,2,3
Dep. Variable:,inc_activity,R-squared:,0.894
Model:,OLS,Adj. R-squared:,0.893
Method:,Least Squares,F-statistic:,2261.0
Date:,"Tue, 26 Sep 2023",Prob (F-statistic):,0.0
Time:,16:44:14,Log-Likelihood:,-14495.0
No. Observations:,811,AIC:,29000.0
Df Residuals:,807,BIC:,29020.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,3.01e+05,5.33e+05,0.565,0.572,-7.45e+05,1.35e+06
inc_grants,-0.8846,0.025,-35.956,0.000,-0.933,-0.836
inc_donations,-0.6647,0.016,-40.721,0.000,-0.697,-0.633
total_costs,0.8313,0.011,78.674,0.000,0.811,0.852

0,1,2,3
Omnibus:,1339.956,Durbin-Watson:,1.414
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1253317.706
Skew:,9.899,Prob(JB):,0.0
Kurtosis:,194.566,Cond. No.,105000000.0


### ACRO Probit

In [21]:
new_df = df[["survivor", "inc_activity", "inc_grants", "inc_donations", "total_costs"]]
new_df = new_df.dropna()

y = new_df["survivor"].astype("category").cat.codes  # numeric
y.name = "survivor"
x = new_df[["inc_activity", "inc_grants", "inc_donations", "total_costs"]]
x = add_constant(x)

results = acro.probit(y, x)
results.summary()

INFO:acro:probit() outcome: pass; dof=806.0 >= 10
INFO:acro:records:add(): output_11


Optimization terminated successfully.
         Current function value: 0.493791
         Iterations 10


0,1,2,3
Dep. Variable:,survivor,No. Observations:,811.0
Model:,Probit,Df Residuals:,806.0
Method:,MLE,Df Model:,4.0
Date:,"Tue, 26 Sep 2023",Pseudo R-squ.:,0.214
Time:,16:44:15,Log-Likelihood:,-400.46
converged:,True,LL-Null:,-509.5
Covariance Type:,nonrobust,LLR p-value:,4.875e-46

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0474,0.057,0.838,0.402,-0.063,0.158
inc_activity,1.836e-07,5.16e-08,3.559,0.000,8.25e-08,2.85e-07
inc_grants,8.576e-08,3.9e-08,2.197,0.028,9.25e-09,1.62e-07
inc_donations,2.406e-07,4.54e-08,5.297,0.000,1.52e-07,3.3e-07
total_costs,-8.644e-08,3.68e-08,-2.351,0.019,-1.59e-07,-1.44e-08


### ACRO Logit

In [22]:
results = acro.logit(y, x)
results.summary()

INFO:acro:logit() outcome: pass; dof=806.0 >= 10
INFO:acro:records:add(): output_12


Optimization terminated successfully.
         Current function value: 0.490836
         Iterations 12


0,1,2,3
Dep. Variable:,survivor,No. Observations:,811.0
Model:,Logit,Df Residuals:,806.0
Method:,MLE,Df Model:,4.0
Date:,"Tue, 26 Sep 2023",Pseudo R-squ.:,0.2187
Time:,16:44:15,Log-Likelihood:,-398.07
converged:,True,LL-Null:,-509.5
Covariance Type:,nonrobust,LLR p-value:,4.532e-47

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0512,0.091,0.561,0.575,-0.128,0.230
inc_activity,2.981e-07,8.95e-08,3.330,0.001,1.23e-07,4.74e-07
inc_grants,1.351e-07,6.67e-08,2.026,0.043,4.39e-09,2.66e-07
inc_donations,5.123e-07,1.04e-07,4.927,0.000,3.08e-07,7.16e-07
total_costs,-1.442e-07,6.26e-08,-2.304,0.021,-2.67e-07,-2.15e-08


### List current ACRO outputs

In [23]:
results_str = acro.print_outputs()

uid: output_0
status: fail
type: table
properties: {'method': 'crosstab'}
sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 6, 'p-ratio': 0, 'nk-rule': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [], 'nk-rule': []}}
command: safe_table = acro.crosstab(df.year, df.grant_type)
summary: fail; threshold: 6 cells may need suppressing; 
outcome: grant_type   G   N   R          R/G
year                               
2010        ok  ok  ok  threshold; 
2011        ok  ok  ok  threshold; 
2012        ok  ok  ok  threshold; 
2013        ok  ok  ok  threshold; 
2014        ok  ok  ok  threshold; 
2015        ok  ok  ok  threshold; 
output: [grant_type   G   N   R  R/G
year                       
2010        15  59  71    8
2011        15  59  71    8
2012        15  59  71    8
2013        15  59  71    8
2014        15  59  71    8
2015        15  59  71    8]
timestamp: 2023-09-26T16:43:37

### Remove some ACRO outputs before finalising

In [24]:
acro.remove_output("output_1")
acro.remove_output("output_4")

INFO:acro:records:remove(): output_1 removed
INFO:acro:records:remove(): output_4 removed


### Rename ACRO outputs before finalising

In [25]:
acro.rename_output("output_2", "pivot_table")

INFO:acro:records:rename_output(): output_2 renamed to pivot_table


### Add a comment to output

In [26]:
acro.add_comments("output_0", "This is a cross table between year and grant_type")
acro.add_comments("output_0", "6 cells were suppressed in this table")

INFO:acro:records:a comment was added to output_0
INFO:acro:records:a comment was added to output_0


### Add an unsupported output to the list of outputs

In [27]:
acro.custom_output(
    "XandY.jpeg", "This output is an image showing the relationship between X and Y"
)

INFO:acro:records:add_custom(): output_13


### Request an exception for some of the outputs

In [28]:
acro.add_exception("output_0", "I really need this.")
acro.add_exception("output_3", "This one is safe. Trust me, I'm a professor.")
acro.add_exception("output_5", "It's not disclosive, I promise.")
acro.add_exception("output_6", "I need this one too")

INFO:acro:records:exception request was added to output_0
INFO:acro:records:exception request was added to output_3
INFO:acro:records:exception request was added to output_5
INFO:acro:records:exception request was added to output_6


### Finalise ACRO

In [29]:
SAVE_PATH = "ACRO_RES"

# output = acro.finalise(SAVE_PATH, "xlsx")
output = acro.finalise(SAVE_PATH, "json")

INFO:acro:records:
uid: output_7
status: review
type: table
properties: {'method': 'pivot_table'}
sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 8, 'threshold': 0, 'p-ratio': 0, 'nk-rule': 0}, 'cells': {'negative': [], 'missing': [[0, 0], [0, 1], [1, 0], [1, 1], [2, 0], [2, 1], [3, 0], [3, 1]], 'threshold': [], 'p-ratio': [], 'nk-rule': []}}
command: table = acro.pivot_table(
summary: review; missing values found
outcome:                  mean        std
           inc_grants inc_grants
grant_type                      
G             missing    missing
N             missing    missing
R             missing    missing
R/G           missing    missing
output: [                    mean           std
              inc_grants    inc_grants
grant_type                            
G           1.141279e+07  2.283220e+07
N           1.364700e+05  1.999335e+05
R           8.006360e+06  3.228216e+07
R/G         1.664827e+07  1.583532e+07]
timestamp: 2023-09-26T16:44:14.514747
comm

### List files generated

In [30]:
files = []
for name in os.listdir(SAVE_PATH):
    if os.path.isfile(os.path.join(SAVE_PATH, name)):
        files.append(name)
files.sort()
for f in files:
    print(f)

XandY.jpeg
config.json
output_0_0.csv
output_10_0.csv
output_10_1.csv
output_10_2.csv
output_11_0.csv
output_11_1.csv
output_12_0.csv
output_12_1.csv
output_3_0.csv
output_5_0.csv
output_6_0.csv
output_7_0.csv
output_8_0.csv
output_9_0.csv
output_9_1.csv
output_9_2.csv
pivot_table_0.csv
results.json


### Checksums

In [31]:
files = []
checksum_dir = os.path.join(SAVE_PATH, "checksums")
for name in os.listdir(checksum_dir):
    if os.path.isfile(os.path.join(checksum_dir, name)):
        files.append(name)
files.sort()
for f in files:
    print(f)

XandY.jpeg.txt
output_0_0.csv.txt
output_10_0.csv.txt
output_10_1.csv.txt
output_10_2.csv.txt
output_11_0.csv.txt
output_11_1.csv.txt
output_12_0.csv.txt
output_12_1.csv.txt
output_3_0.csv.txt
output_5_0.csv.txt
output_6_0.csv.txt
output_7_0.csv.txt
output_8_0.csv.txt
output_9_0.csv.txt
output_9_1.csv.txt
output_9_2.csv.txt
pivot_table_0.csv.txt
results.json.txt


### Inspect metadata

In [32]:
results_json = os.path.normpath(f"{SAVE_PATH}/results.json")
with open(results_json, "r") as f:
    print(f.read())

{
    "version": "0.4.3",
    "results": {
        "output_0": {
            "uid": "output_0",
            "status": "fail",
            "type": "table",
            "properties": {
                "method": "crosstab"
            },
            "files": [
                {
                    "name": "output_0_0.csv",
                    "sdc": {
                        "summary": {
                            "suppressed": false,
                            "negative": 0,
                            "missing": 0,
                            "threshold": 6,
                            "p-ratio": 0,
                            "nk-rule": 0
                        },
                        "cells": {
                            "negative": [],
                            "missing": [],
                            "threshold": [
                                [
                                    0,
                                    3
                                ],
             