In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import os
import warnings
warnings.filterwarnings('ignore')
os.environ["PYTHONWARNINGS"] = "ignore"

In [3]:
cur_folder_name = os.getcwd().split('/')[-1]
if cur_folder_name != "Virny":
    os.chdir("../..")

print('Current location: ', os.getcwd())

Current location:  /Users/denys_herasymuk/UCU/4course_2term/Bachelor_Thesis/Code/Virny


# Multiple Models Interface Usage

In [4]:
import os
import pandas as pd

from virny.datasets import ACSIncomeDataset
from virny.custom_classes.metrics_composer import MetricsComposer
from virny.custom_classes.metrics_interactive_visualizer import MetricsInteractiveVisualizer

In [5]:
data_loader = ACSIncomeDataset(state=['GA'], year=2018, with_nulls=False, subsample_size=15_000, subsample_seed=42)
sensitive_attributes_dct = {'SEX': '2', 'RAC1P': ['2', '3', '4', '5', '6', '7', '8', '9'], 'SEX&RAC1P': None}

In [6]:
ROOT_DIR = os.path.join('docs', 'examples')
subgroup_metrics_df = pd.read_csv(os.path.join(ROOT_DIR, 'income_subgroup_metrics.csv'), header=0)
subgroup_metrics_df['Model_Name'] = (subgroup_metrics_df['Model_Name'] + '__alpha=' +
                                     subgroup_metrics_df['Intervention_Param'].astype(str))

In [7]:
model_names = subgroup_metrics_df['Model_Name'].unique()
models_metrics_dct = dict()
for model_name in model_names:
    models_metrics_dct[model_name] = subgroup_metrics_df[subgroup_metrics_df['Model_Name'] == model_name]

metrics_composer = MetricsComposer(models_metrics_dct, sensitive_attributes_dct)
models_composed_metrics_df = metrics_composer.compose_metrics()
models_composed_metrics_df.head()

Unnamed: 0,Metric,SEX,RAC1P,SEX&RAC1P,Model_Name
0,Accuracy_Parity,0.047756,0.074977,0.065217,LGBMClassifier__alpha=0.7
1,Aleatoric_Uncertainty_Parity,-0.039005,-0.011947,-0.009222,LGBMClassifier__alpha=0.7
2,Aleatoric_Uncertainty_Ratio,0.935159,0.979638,0.98422,LGBMClassifier__alpha=0.7
3,Equalized_Odds_FNR,0.030793,-0.110745,-0.052498,LGBMClassifier__alpha=0.7
4,Equalized_Odds_FPR,-0.021317,0.000952,-0.007008,LGBMClassifier__alpha=0.7


In [8]:
models_metrics_dct = dict()
for model_name in subgroup_metrics_df['Model_Name'].unique():
    models_metrics_dct[model_name] = subgroup_metrics_df[subgroup_metrics_df['Model_Name'] == model_name]

In [9]:
models_metrics_dct.keys()

dict_keys(['LGBMClassifier__alpha=0.7', 'LGBMClassifier__alpha=0.0', 'LGBMClassifier__alpha=0.4', 'LogisticRegression__alpha=0.0', 'LogisticRegression__alpha=0.7', 'LogisticRegression__alpha=0.4', 'MLPClassifier__alpha=0.0', 'MLPClassifier__alpha=0.7', 'MLPClassifier__alpha=0.4', 'RandomForestClassifier__alpha=0.4', 'RandomForestClassifier__alpha=0.7', 'RandomForestClassifier__alpha=0.0'])

## Metrics Visualization and Reporting

In [73]:
visualizer = MetricsInteractiveVisualizer(data_loader.X_data, data_loader.y_data,
                                          models_metrics_dct, models_composed_metrics_df,
                                          sensitive_attributes_dct=sensitive_attributes_dct)

In [None]:
visualizer.start_web_app()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


Traceback (most recent call last):
  File "/Users/denys_herasymuk/UCU/4course_2term/Bachelor_Thesis/Code/Virny/virny_env/lib/python3.9/site-packages/gradio/routes.py", line 538, in predict
    output = await route_utils.call_process_api(
  File "/Users/denys_herasymuk/UCU/4course_2term/Bachelor_Thesis/Code/Virny/virny_env/lib/python3.9/site-packages/gradio/route_utils.py", line 217, in call_process_api
    output = await app.get_blocks().process_api(
  File "/Users/denys_herasymuk/UCU/4course_2term/Bachelor_Thesis/Code/Virny/virny_env/lib/python3.9/site-packages/gradio/blocks.py", line 1553, in process_api
    result = await self.call_function(
  File "/Users/denys_herasymuk/UCU/4course_2term/Bachelor_Thesis/Code/Virny/virny_env/lib/python3.9/site-packages/gradio/blocks.py", line 1191, in call_function
    prediction = await anyio.to_thread.run_sync(
  File "/Users/denys_herasymuk/UCU/4course_2term/Bachelor_Thesis/Code/Virny/virny_env/lib/python3.9/site-packages/anyio/to_thread.py", li

In [12]:
visualizer.stop_web_app()

Closing server running on port: 7860


In [63]:
def rank_with_tolerance(pd_series: pd.Series, tolerance: float = 0.01, method: str = 'dense'):
    """
    Rank a pandas series with defined tolerance.
    Ref: https://stackoverflow.com/questions/72956450/pandas-ranking-with-tolerance

    Parameters
    ----------
    pd_series
        A pandas series to rank
    tolerance
        A float value for ranking
    method
        Ranking methods for numpy.rank()

    Returns
    -------
    A pandas series with dense ranks for the input pd series.

    """
    tolerance += 1e-10 # Add 0.0000000001 for correct comparison of float numbers
    vals = pd.Series(pd_series.unique()).sort_values()
    vals.index = vals
    print('vals1 -- ', vals)
    vals = vals.mask(vals - vals.shift(1) < tolerance, vals.shift(1))
    print('vals2 -- ', vals)

    return pd_series.map(vals).fillna(pd_series).rank(method=method)

In [70]:
import pandas as pd

# df = pd.Series([0.002, 0.003, 0.002, 0.005])
df = pd.Series([0.002102,0.003088,0.002214,0.004906])
rank_with_tolerance(df, tolerance=0.005)

vals1 --  0.002102    0.002102
0.002214    0.002214
0.003088    0.003088
0.004906    0.004906
dtype: float64
vals2 --  0.002102    0.002102
0.002214    0.002102
0.003088    0.002214
0.004906    0.003088
dtype: float64


0    1.0
1    2.0
2    1.0
3    3.0
dtype: float64