# ACRO Tests

In [1]:
import os
import sys
import pandas as pd
import numpy as np

In [2]:
sys.path.insert(0, os.path.abspath(".."))

In [3]:
from acro import ACRO, add_constant, utils

### Instantiate ACRO

In [4]:
acro = ACRO()

INFO:acro:config: {'safe_threshold': 10, 'safe_dof_threshold': 10, 'safe_nk_n': 2, 'safe_nk_k': 0.9, 'safe_pratio_p': 0.1, 'check_missing_values': False}


### Load test data

In [5]:
# path = os.path.join("../data", "test_data.dta")
# df = pd.read_stata(path)
# df.head()
from sklearn.datasets import fetch_openml

data = fetch_openml(data_id=26, as_frame=True)
df = data.data
df["recommend"] = data.target
print(df.describe())

       parents has_nurs      form children     housing     finance   social  \
count    12960    12960     12960    12960       12960       12960    12960   
unique       3        5         4        4           3           2        3   
top      usual   proper  complete        1  convenient  convenient  nonprob   
freq      4320     2592      3240     3240        4320        6480     4320   

             health  recommend  
count         12960      12960  
unique            3          5  
top     recommended  not_recom  
freq           4320       4320  


### convert 'more than 3' children to random between 4 and 10

In [6]:
print(df["children"].unique())
df["children"].replace(to_replace={"more": "4"}, inplace=True)
df["children"] = pd.to_numeric(df["children"])
print(f"values before apply transforamtion: {df['children'].unique()}")

df["children"] = df.apply(
    lambda row: row["children"]
    if row["children"] in (1, 2, 3)
    else np.random.randint(4, 10),
    axis=1,
)
print(f"values after apply transformation: {df['children'].unique()}")

df.describe()

['1', '2', '3', 'more']
Categories (4, object): ['1', '2', '3', 'more']
values before apply transforamtion: [1 2 3 4]
values after apply transformation: [1 2 3 9 5 8 7 4 6]


Unnamed: 0,children
count,12960.0
mean,3.124306
std,2.243437
min,1.0
25%,1.75
50%,2.5
75%,3.25
max,9.0


### Pandas crosstab

In [7]:
table = pd.crosstab(df.recommend, df.parents)
table

parents,usual,pretentious,great_pret
recommend,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
not_recom,1440,1440,1440
recommend,2,0,0
very_recom,196,132,0
priority,1924,1484,858
spec_prior,758,1264,2022


### ACRO crosstab

In [8]:
safe_table = acro.crosstab(df.recommend, df.parents)
safe_table

INFO:acro:outcome_df:
parents           usual  pretentious   great_pret
recommend                                        
not_recom            ok           ok           ok
recommend   threshold;   threshold;   threshold; 
very_recom           ok           ok  threshold; 
priority             ok           ok           ok
spec_prior           ok           ok           ok
INFO:acro:get_summary(): fail; threshold: 4 cells suppressed; 
INFO:acro:add_output(): output_0_2023-04-26-21301610


parents,usual,pretentious,great_pret
recommend,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
not_recom,1440.0,1440.0,1440.0
recommend,,,
very_recom,196.0,132.0,
priority,1924.0,1484.0,858.0
spec_prior,758.0,1264.0,2022.0


### ACRO crosstab with aggregation function

In [9]:
safe_table = acro.crosstab(df.recommend, df.parents, values=df.children, aggfunc="mean")
safe_table

INFO:acro:outcome_df:
parents                             usual                    pretentious  \
recommend                                                                  
not_recom                              ok                             ok   
recommend   threshold; p-ratio; nk-rule;   threshold; p-ratio; nk-rule;    
very_recom                             ok                             ok   
priority                               ok                             ok   
spec_prior                             ok                             ok   

parents                        great_pret  
recommend                                  
not_recom                              ok  
recommend   threshold; p-ratio; nk-rule;   
very_recom  threshold; p-ratio; nk-rule;   
priority                               ok  
spec_prior                             ok  
INFO:acro:get_summary(): fail; threshold: 4 cells suppressed; p-ratio: 4 cells suppressed; nk-rule: 4 cells suppressed; 
INFO:acro:add_ou

parents,usual,pretentious,great_pret
recommend,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
not_recom,3.128472,3.119444,3.075694
recommend,,,
very_recom,2.260204,2.166667,
priority,3.136694,3.030997,2.611888
spec_prior,3.377309,3.321203,3.363996


### ACRO crosstab with missing values

In [None]:
utils.CHECK_MISSING_VALUES = True

missing = df.children()
missing[0:10] = np.NaN

safe_table = acro.crosstab(df.year, df.grant_type, values=missing, aggfunc="mean")
safe_table

In [None]:
utils.CHECK_MISSING_VALUES = False

### ACRO crosstab with negative values

In [None]:
negative = df.inc_grants.copy()
negative[0:10] = -10

safe_table = acro.crosstab(df.year, df.grant_type, values=negative, aggfunc="mean")
safe_table

### ACRO pivot_table

In [None]:
table = acro.pivot_table(
    df, index=["grant_type"], values=["inc_grants"], aggfunc=["mean", "std"]
)
table

### ACRO pivot_table with missing values

In [None]:
utils.CHECK_MISSING_VALUES = True

df.loc[0:10, "inc_grants"] = np.NaN

table = acro.pivot_table(
    df, index=["grant_type"], values=["inc_grants"], aggfunc=["mean", "std"]
)
table

In [None]:
utils.CHECK_MISSING_VALUES = False

### ACRO pivot_table with negative values

In [None]:
df.loc[0:10, "inc_grants"] = -10

table = acro.pivot_table(
    df, index=["grant_type"], values=["inc_grants"], aggfunc=["mean", "std"]
)
table

### ACRO OLS

In [None]:
new_df = df[["inc_activity", "inc_grants", "inc_donations", "total_costs"]]
new_df = new_df.dropna()

y = new_df["inc_activity"]
x = new_df[["inc_grants", "inc_donations", "total_costs"]]
x = add_constant(x)

results = acro.ols(y, x)
results.summary()

### ACRO OLSR

In [None]:
results = acro.olsr(
    formula="inc_activity ~ inc_grants + inc_donations + total_costs", data=new_df
)
results.summary()

### ACRO Probit

In [None]:
new_df = df[["survivor", "inc_activity", "inc_grants", "inc_donations", "total_costs"]]
new_df = new_df.dropna()

y = new_df["survivor"].astype("category").cat.codes  # numeric
y.name = "survivor"
x = new_df[["inc_activity", "inc_grants", "inc_donations", "total_costs"]]
x = add_constant(x)

results = acro.probit(y, x)
results.summary()

### ACRO Logit

In [None]:
results = acro.logit(y, x)
results.summary()

### List current ACRO outputs

In [None]:
acro.print_outputs()

### Remove some ACRO outputs before finalising

In [None]:
output_1 = list(acro.results.keys())[1]
output_4 = list(acro.results.keys())[4]

acro.remove_output(output_1)
acro.remove_output(output_4)

### Rename ACRO outputs before finalising

In [None]:
acro.rename_output(list(acro.results.keys())[2], "pivot_table")

### Add a comment to output

In [None]:
acro.add_comments(
    list(acro.results.keys())[0], "This is a cross table between year and grant_type"
)
acro.add_comments(list(acro.results.keys())[0], "6 cells were supressed in this table")

### Add an unsupported output to the list of outputs

In [None]:
acro.custom_output(
    "XandY.jfif", "This output is an image showing the relationship between X and Y"
)

### Finalise ACRO

In [None]:
# output = acro.finalise("test_results.xlsx")
output = acro.finalise("test_results.json")