# ACRO Tests

In [None]:
import os
import sys
import pandas as pd
import numpy as np

In [None]:
# uncomment this line if acro is not installed
# ie you are in development mode
sys.path.insert(0, os.path.abspath(".."))

In [None]:
from acro import ACRO, acro_tables, add_constant, utils

### Instantiate ACRO

In [None]:
acro = ACRO(suppress=False)

### Load test data

In [None]:
path = os.path.join("../data", "test_data.dta")
df = pd.read_stata(path)
df.head()

### Pandas crosstab

In [None]:
table = pd.crosstab(df.year, [df.survivor, df.grant_type])
table

### ACRO crosstab

In [None]:
safe_table = acro.crosstab(df.year, [df.survivor, df.grant_type])
safe_table

### same table with column hierarchy reversed to make sure spaces in variable name. dealt with properly

In [None]:
safe_table = acro.crosstab(df.year, [df.grant_type, df.survivor])
safe_table

### checking for testing purposes

In [None]:
mydata = df[(df["charity"].str[0] == "W")]
mydata = mydata[mydata["year"] < 2012]
acro.crosstab(mydata.year, mydata.survivor)

### ACRO crosstab with suppression

In [None]:
acro.suppress = True

safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc="mean")
safe_table

### ACRO crosstab with suppression and totals

In [None]:
acro.suppress = False
table = acro.crosstab(
    df.year,
    [df.grant_type, df.survivor],
    values=df.inc_grants,
    aggfunc="count",
    margins=True,
)
print(table)

In [None]:
acro.suppress = False

### ACRO crosstab with aggregation function

In [None]:
safe_table = acro.crosstab(df.year, df.survivor, values=df.inc_grants, aggfunc="mean")
safe_table

### ACRO crosstab with multiple aggregation functions and totals

In [None]:
safe_table = acro.crosstab(
    df.year, df.grant_type, values=df.inc_grants, aggfunc=["mean", "std"], margins=True
)
safe_table

### ACRO crosstab with missing values
- This is an example of a crosstab where there are missing values that have not been filled or dealt with in the data.
- Note that you need to change the value of the CHECK_MISSING_VALUES variable in the acro object to True. Then run the crosstab command.
- In this example, ten values in the column inc_grants were set to Nan to represent missing data.
- In this version of acro checking the disclosiveness of an output with missing values is not supported.
- The status of the command will be "review" to indicate that the output needs to be checked by the output checker to review if the output is disclosive or not.
- In the outcome_df each cell with missing value/values will be shown as missing.
- The output hist will not be suppressed even if the suppress=True.

In [None]:
utils.CHECK_MISSING_VALUES = True

missing = df.inc_grants.copy()
missing[0:10] = np.NaN

safe_table = acro.crosstab(
    df.year, df.grant_type, values=missing, aggfunc="mean", margins=True
)
safe_table

In [None]:
utils.CHECK_MISSING_VALUES = False

### ACRO crosstab with negative values

In [None]:
negative = df.inc_grants.copy()
negative[0:10] = -10

safe_table = acro.crosstab(df.year, df.grant_type, values=negative, aggfunc="mean")
safe_table

### ACRO pivot_table

In [None]:
table = acro.pivot_table(
    df,
    index=["grant_type"],
    columns=["year"],
    values=["inc_grants"],
    margins=True,
    aggfunc="sum",
)
table

In [None]:
table = acro.pivot_table(
    df, index=["grant_type"], values=["inc_grants"], aggfunc=["mean", "std"]
)
table

### ACRO pivot_table with missing values

In [None]:
utils.CHECK_MISSING_VALUES = True

df.loc[0:10, "inc_grants"] = np.NaN

table = acro.pivot_table(
    df, index=["grant_type"], values=["inc_grants"], aggfunc=["mean", "std"]
)
table

In [None]:
utils.CHECK_MISSING_VALUES = False

### ACRO pivot_table with negative values

In [None]:
df.loc[0:10, "inc_grants"] = -10

table = acro.pivot_table(
    df, index=["grant_type"], values=["inc_grants"], aggfunc=["mean", "std"]
)
table

### ACRO OLS

In [None]:
new_df = df[["inc_activity", "inc_grants", "inc_donations", "total_costs"]]
new_df = new_df.dropna()

y = new_df["inc_activity"]
x = new_df[["inc_grants", "inc_donations", "total_costs"]]
x = add_constant(x)

results = acro.ols(y, x)
results.summary()

### ACRO OLSR

In [None]:
results = acro.olsr(
    formula="inc_activity ~ inc_grants + inc_donations + total_costs", data=new_df
)
results.summary()

### ACRO Probit

In [None]:
new_df = df[["survivor", "inc_activity", "inc_grants", "inc_donations", "total_costs"]]
new_df = new_df.dropna()

y = new_df["survivor"].astype("category").cat.codes  # numeric
y.name = "survivor"
x = new_df[["inc_activity", "inc_grants", "inc_donations", "total_costs"]]
x = add_constant(x)

results = acro.probit(y, x)
results.summary()

### ACRO Logit

In [None]:
results = acro.logit(y, x)
results.summary()

### ACRO Histogram without suppression

In [None]:
hist = acro.hist(df, "inc_grants")
print(hist)

### ACRO Histogram with suppression

In [None]:
acro.suppress = True
hist = acro.hist(df, "inc_grants")

In [None]:
acro.suppress = False

### List current ACRO outputs

In [None]:
results_str = acro.print_outputs()

### Remove some ACRO outputs before finalising

In [None]:
acro.remove_output("output_1")
acro.remove_output("output_4")

### Rename ACRO outputs before finalising

In [None]:
acro.rename_output("output_2", "pivot_table")

### Add a comment to output

In [None]:
acro.add_comments("output_0", "This is a cross table between year and grant_type")
acro.add_comments("output_0", "6 cells were suppressed in this table")

### Add an unsupported output to the list of outputs

In [None]:
acro.custom_output(
    "XandY.jpeg", "This output is an image showing the relationship between X and Y"
)

### Request an exception for some of the outputs

In [None]:
acro.add_exception("output_0", "I really need this.")
acro.add_exception("output_3", "This one is safe. Trust me, I'm a professor.")
acro.add_exception("output_5", "It's not disclosive, I promise.")
acro.add_exception("output_6", "I need this one too")

### Finalise ACRO

In [None]:
SAVE_PATH = "ACRO_RES"

# output = acro.finalise(SAVE_PATH, "xlsx")
output = acro.finalise(SAVE_PATH, "json")

### List files generated

In [None]:
files = []
for name in os.listdir(SAVE_PATH):
    if os.path.isfile(os.path.join(SAVE_PATH, name)):
        files.append(name)
files.sort()
for f in files:
    print(f)

### Checksums

In [None]:
files = []
checksum_dir = os.path.join(SAVE_PATH, "checksums")
for name in os.listdir(checksum_dir):
    if os.path.isfile(os.path.join(checksum_dir, name)):
        files.append(name)
files.sort()
for f in files:
    print(f)