# Аудит и сравнение моделей

In [4]:
from typing import Callable
from warnings import catch_warnings, filterwarnings

from aif360.sklearn.metrics import difference, ratio
from aif360.sklearn.metrics import average_odds_difference, equal_opportunity_difference, statistical_parity_difference, disparate_impact_ratio, consistency_score, generalized_entropy_index, generalized_entropy_error, between_group_generalized_entropy_error, theil_index, coefficient_of_variation

from numbers import Number
import numpy as np
import pandas as pd
import sklearn.metrics as sk_metric
from typing import List, Tuple, Type, Union

import matplotlib

ArrayLike = Type[Union[List, Tuple, np.ndarray, pd.Series, pd.DataFrame]]

matplotlib.use('TkAgg')

#### Базовые метрики, отсутствующие в библиотеках

In [2]:
def binary_prediction_results(y_true: ArrayLike, y_pred: ArrayLike):
    tn, fp, fn, tp = sk_metric.confusion_matrix(y_true, y_pred, labels=[0, 1]).ravel()
    counts = {"TP": tp, "FP": fp, "TN": tn, "FN": fn}
    return counts

def epsilon():
    return np.finfo(np.float64).eps

# Проверяет, находится ли результат в ожидаемом диапазоне для метрики и возвращает этот результат, если он действителен
def check_result(res: Number, metric_name: str, custom_lower: Number = None, custom_upper: Number = None,):
    if np.isnan(res):
        return res
    else:
        lower = 0 - 100 * epsilon() if custom_lower is None else custom_lower
        upper = 1 + 100 * epsilon() if custom_upper is None else custom_upper
        if not lower < res < upper:
            raise ValueError(f"{metric_name} result out of range ({res})")
        else:
            return res

# возвращает соотношение (исключая деление на ноль)
def ratio(numerator: Number, denominator: Number):
    if denominator == 0:
        return numerator / epsilon()
    else:
        return numerator / denominator


""" Метрики """

# accuracy
def accuracy(y_true: ArrayLike, y_pred: ArrayLike):
    rprt = binary_prediction_results(y_true, y_pred)
    res = ratio(rprt["TP"] + rprt["TN"], y_true.shape[0])
    return check_result(res, "Accuracy")

# balanced accuracy
def balanced_accuracy(y_true: ArrayLike, y_pred: ArrayLike):
    sens = true_positive_rate(y_true, y_pred)
    spec = true_negative_rate(y_true, y_pred)
    res = ratio(sens + spec, 2)
    return check_result(res, "Balanced Accuracy")

# false negative rate
def false_negative_rate(y_true: ArrayLike, y_pred: ArrayLike):
    rprt = binary_prediction_results(y_true, y_pred)
    res = ratio(rprt["FN"], rprt["FN"] + rprt["TP"])
    return check_result(res, "FNR")

# false positive rate
def false_positive_rate(y_true: ArrayLike, y_pred: ArrayLike):
    rprt = binary_prediction_results(y_true, y_pred)
    res = ratio(rprt["FP"], rprt["FP"] + rprt["TN"])
    return check_result(res, "FPR}")

# F1 Score
def f1_score(y_true: ArrayLike, y_pred: ArrayLike):
    pre = precision(y_true, y_pred)
    rec = true_positive_rate(y_true, y_pred)
    res = 2 * ratio(pre * rec, pre + rec)
    return check_result(res, "F1 Score")

#TN/(TN+FN)
def negative_predictive_value(y_true: ArrayLike, y_pred: ArrayLike):
    rprt = binary_prediction_results(y_true, y_pred)
    res = ratio(rprt["TN"], rprt["TN"] + rprt["FN"])
    return res

#Receiver Operating Characteristic Area Under the Curve
def roc_auc_score(y_true: ArrayLike, y_pred: ArrayLike):
    try:
        res = sk_metric.roc_auc_score(y_true, y_pred)
    except ValueError:
        res = 0
    return check_result(res, "ROC AUC Score")

#Precision-Recall Area Under the Curve
def pr_auc_score(y_true: ArrayLike, y_pred: ArrayLike):
    try:
        prc, rec, _ = sk_metric.precision_recall_curve(y_true, y_pred)
        res = sk_metric.auc(prc, rec)
    except ValueError:
        res = np.nan
    return check_result(res, "PR AUC Score")

#PPV=TP/(TP+FP)
def precision(y_true: ArrayLike, y_pred: ArrayLike):
    rprt = binary_prediction_results(y_true, y_pred)
    res = ratio(rprt["TP"], rprt["TP"] + rprt["FP"])
    return check_result(res, "Precision")

#TN/(TN+FP)
def true_negative_rate(y_true: ArrayLike, y_pred: ArrayLike):
    rprt = binary_prediction_results(y_true, y_pred)
    res = ratio(rprt["TN"], rprt["FP"] + rprt["TN"])
    return check_result(res, "TNR")

#TP/(TP+FN)
def true_positive_rate(y_true: ArrayLike, y_pred: ArrayLike):
    rprt = binary_prediction_results(y_true, y_pred)
    res = ratio(rprt["TP"], rprt["FN"] + rprt["TP"])
    return check_result(res, "TPR")

# Обертывает функции отношения, чтобы возвращать значения NaN вместо 0,0 в случаях где отношение не определено
def __manage_undefined_ratios(func: Callable):
    def wrapper(*args, **kwargs):
        funcname = getattr(func, "__name__", "an unknown function")
        msg = (
            "The ratio is ill-defined and being set to 0.0 because"
            + f" '{funcname}' for privileged samples is 0."
        )
        with catch_warnings(record=True) as w:
            filterwarnings("ignore", message=msg)
            res = func(*args, **kwargs)
        if len(w) > 0:
            return np.nan
        else:
            return res

    return wrapper

    #y_true (pd.Series): истинные целевые значения
    #y_pred (pd.Series): прогнозируемые целевые значения
    #prtc_attr (str): имя защищенного атрибута
    #priv_grp (int, optional): привилегированная группа

# Возвращает межгрупповое соотношение Postive Predictive Values
@__manage_undefined_ratios
def ppv_ratio(y_true: pd.Series, y_pred: pd.Series, pa_name: str, priv_grp: int = 1):
    return ratio(precision, y_true, y_pred, prot_attr=pa_name, priv_group=priv_grp)

# Возвращает межгрупповое соотношение True Positive Rates
@__manage_undefined_ratios
def tpr_ratio(y_true: pd.Series, y_pred: pd.Series, pa_name: str, priv_grp: int = 1):
    return ratio(true_positive_rate, y_true, y_pred, prot_attr=pa_name, priv_group=priv_grp)

# Возвращает межгрупповое соотношение False Positive Rates
@__manage_undefined_ratios
def fpr_ratio(y_true: pd.Series, y_pred: pd.Series, pa_name: str, priv_grp: int = 1):
    return ratio(false_positive_rate, y_true, y_pred, prot_attr=pa_name, priv_group=priv_grp)

# Возвращает межгрупповое соотношение True Negative Rates
@__manage_undefined_ratios
def tnr_ratio(y_true: pd.Series, y_pred: pd.Series, pa_name: str, priv_grp: int = 1):
    return ratio(true_negative_rate, y_true, y_pred, prot_attr=pa_name, priv_group=priv_grp)

# Возвращает межгрупповое соотношение False Negative Rates
@__manage_undefined_ratios
def fnr_ratio(y_true: pd.Series, y_pred: pd.Series, pa_name: str, priv_grp: int = 1):
    return ratio(false_negative_rate, y_true, y_pred, prot_attr=pa_name, priv_group=priv_grp)

# Возвращает разницу между группами True Positive Rates
def tpr_diff(y_true: pd.Series, y_pred: pd.Series, pa_name: str, priv_grp: int = 1):
    return difference(true_positive_rate, y_true, y_pred, prot_attr=pa_name, priv_group=priv_grp)

# Возвращает разницу между группами False Positive Rates
def fpr_diff(y_true: pd.Series, y_pred: pd.Series, pa_name: str, priv_grp: int = 1):
    return difference(false_positive_rate, y_true, y_pred, prot_attr=pa_name, priv_group=priv_grp)

# Возвращает разницу между группами True Negative Rates
def tnr_diff(y_true: pd.Series, y_pred: pd.Series, pa_name: str, priv_grp: int = 1):
    return difference(true_negative_rate, y_true, y_pred, prot_attr=pa_name, priv_group=priv_grp)

# Возвращает разницу между группами False Negative Rates
def fnr_diff(y_true: pd.Series, y_pred: pd.Series, pa_name: str, priv_grp: int = 1):
    return difference(false_negative_rate, y_true, y_pred, prot_attr=pa_name, priv_group=priv_grp)


    # Составные метрики (предвзятость) #

# Возвращает наибольшее расхождение между разницей FPR между группами и разницей TPR между группами
def eq_odds_diff(y_true: pd.Series, y_pred: pd.Series, pa_name: str, priv_grp: int = 1):
    fprD = fpr_diff(y_true, y_pred, pa_name=pa_name, priv_grp=priv_grp)
    tprD = tpr_diff(y_true, y_pred, pa_name=pa_name, priv_grp=priv_grp)
    if abs(fprD) > abs(tprD):
        return fprD
    else:
        return tprD

# Возвращает наибольшее несоответствие между соотношением FPR между группами и соотношением TPR между группами
def eq_odds_ratio(y_true: pd.Series, y_pred: pd.Series, pa_name: str, priv_grp: int = 1):
    fprR = fpr_ratio(y_true, y_pred, pa_name=pa_name, priv_grp=priv_grp)
    tprR = tpr_ratio(y_true, y_pred, pa_name=pa_name, priv_grp=priv_grp)
    if np.isnan(fprR) or np.isnan(tprR):
        return np.nan
    elif round(abs(fprR - 1), 6) > round(abs(tprR - 1), 6):
        return fprR
    else:
        return tprR

# Возвращает разницу PPV значений между группами
def ppv_diff(y_true: pd.Series, y_pred: pd.Series, pa_name: str, priv_grp: int = 1):
    return difference(precision, y_true, y_pred, prot_attr=pa_name, priv_group=priv_grp)

# Возвращает разницу balanced_accuracy значений между группами
def bal_diff(y_true: pd.Series, y_pred: pd.Series, pa_name: str, priv_grp: int = 1):
    return difference(balanced_accuracy, y_true, y_pred, prot_attr=pa_name, priv_group=priv_grp)

#### Сводная таблица показателей
    Args:
        X (pandas DataFrame): Sample features
        prtc_attr (named array-like): значения защищенного атрибута (защищенный атрибут может также присутствовать в X)
        y_true (pandas DataFrame): действительные
        y_pred (pandas DataFrame): предсказанные
        y_prob (pandas DataFrame, optional): Выборочные целевые вероятности. По умолчанию None.
    Returns:
        [type]: [description]

In [71]:
def group_fairness(X, prtc_attr, y_true, y_pred, y_prob=None, priv_grp=1):
    #pa_names = prtc_attr.columns.tolist()
    pa_names = prtc_attr
    gf_vals = {}
    gf_key = 'Group Fairness'
    
    #classified_metric_pred = ClassificationMetric(dataset_true,dataset_pred,unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

    gf_vals['Equal Opportunity Difference'] = equal_opportunity_difference(y_true, y_pred, prot_attr=pa_names)
    if not len(pa_names) > 1:
        gf_vals['Statictical Parity Difference'] = statistical_parity_difference(y_true, y_pred, sensitive_features=prtc_attr)
    gf_vals['Average Odds Difference'] = average_odds_difference(y_true, y_pred, prot_attr=pa_names)
    gf_vals['Disparate Impact Ratio'] = disparate_impact_ratio(y_true, y_pred, prot_attr=pa_names)

    if not len(pa_names) > 1:
        gf_vals['Equal Odds Difference'] = eq_odds_diff(y_true, y_pred, prtc_attr)
        gf_vals['Equal Odds Ratio'] = eq_odds_ratio(y_true, y_pred, prtc_attr)

    gf_vals['Positive Predictive Parity Difference'] = ppv_diff(y_true, y_pred, pa_names, priv_grp)
    gf_vals['Balanced Accuracy Difference'] = bal_diff(y_true, y_pred, pa_names, priv_grp)

    return (gf_key, gf_vals)


def individual_fairness(X, prtc_attr, y_true, y_pred):
    #pa_names = prtc_attr.columns.tolist()
    pa_names = prtc_attr
    if_vals = {}
    if_key = 'Individual Fairness'

    if_vals['Consistency Score'] = consistency_score(X, y_pred)
    #if_vals['Generalized Entropy'] = generalized_entropy_index()
    #if_vals['Theil Index'] = theil_index()
    #if_vals['Coefficient of Variation'] = coefficient_of_variation()
    #if_vals['Generalized Entropy Error'] = generalized_entropy_error(y_true, y_pred)
    #if_vals['Between-Group Generalized Entropy Error'] = between_group_generalized_entropy_error(y_true, y_pred, prot_attr=pa_names)
    return (if_key, if_vals)


def performance_measures(y_true, y_pred):
    #n_class = y_true.append(y_pred).iloc[:, 0].nunique()
    #target_labels = [f"target = {t}" for t in set(np.unique(y_true))]
    #rprt = classification_performance(y_true.iloc[:, 0], y_pred.iloc[:, 0], target_labels)
    #avg_lbl = "weighted avg" if n_class > 2 else target_labels[-1]
    #
    mp_vals = {}
    mp_key = 'Model Performance'
    #for score in ['precision', 'recall', 'f1-score']:
        #mp_vals[score.title()] = rprt.loc[avg_lbl, score]
    #mp_vals['Accuracy'] = rprt.loc['accuracy', 'accuracy']
    mp_vals['Accuracy'] = accuracy(y_true, y_pred)
    mp_vals['F1-Score'] = f1_score(y_true, y_pred)
    mp_vals['FPR'] = false_positive_rate(y_true, y_pred)
    mp_vals['TPR'] = true_positive_rate(y_true, y_pred)
    mp_vals['Precision'] = precision(y_true, y_pred)

    return (mp_key, mp_vals)

In [30]:
def format_fairtest_input(X, prtc_attr, y_true, y_pred, y_prob=None):
    valid_data_types = (pd.DataFrame, pd.Series, np.ndarray)
    for data in [X, prtc_attr, y_true, y_pred]:
        if not isinstance(data, valid_data_types):
            raise TypeError("input data is invalid type")
        if not data.shape[0] > 1:
            raise ValueError("input data is too small to measure")
    if y_prob is not None:
        if not isinstance(y_prob, valid_data_types):
            raise TypeError("y_prob is invalid type")

    # Format inputs to required datatypes
    if isinstance(X, np.ndarray):
        X = pd.DataFrame(X)
    if isinstance(prtc_attr, (np.ndarray, pd.Series)):
        if isinstance(prtc_attr, pd.Series):
            prtc_attr = pd.DataFrame(prtc_attr, columns=[prtc_attr.name])
        else:
            prtc_attr = pd.DataFrame(prtc_attr)
    if isinstance(y_true, (np.ndarray, pd.Series)):
        y_true = pd.DataFrame(y_true)
    if isinstance(y_pred, np.ndarray):
        y_pred = pd.DataFrame(y_pred)
    if isinstance(y_prob, np.ndarray):
        y_prob = pd.DataFrame(y_prob)
    for data in [y_true, y_pred, y_prob]:
        if data is not None and data.shape[1] > 1:
            raise TypeError("targets and predictions must be 1-Dimensional")

    # Format and set sensitive attributes as index for y dataframes
    pa_name = prtc_attr.columns.tolist()
    prtc_attr.reset_index(inplace=True, drop=True)
    y_true = pd.concat([prtc_attr, y_true.reset_index(drop=True)], axis=1).set_index(pa_name)
    y_pred = pd.concat([prtc_attr, y_pred.reset_index(drop=True)], axis=1).set_index(pa_name)
    y_pred.columns = y_true.columns
    if y_prob is not None:
        y_prob = pd.concat([prtc_attr, y_prob.reset_index(drop=True)], axis=1).set_index(pa_name)
        y_prob.columns = y_true.columns

    # Ensure that protected attributes are integer-valued
    pa_cols = prtc_attr.columns.tolist()
    for c in pa_cols:
        binary = (set(prtc_attr[c].astype(int)) == set(prtc_attr[c]))
        boolean = (prtc_attr[c].dtype == bool)
        two_valued = (set(prtc_attr[c].astype(int)) == {0,1})
        if not two_valued and (binary or boolean):
            raise ValueError("prtc_attr must be binary or boolean and heterogeneous")
        prtc_attr.loc[:, c] = prtc_attr[c].astype(int)
        if isinstance(c, int):
            prtc_attr.rename(columns={c: f"prtc_attribute_{c}"}, inplace=True)

    return (X, prtc_attr, y_true, y_pred, y_prob)

In [69]:
# Возвращает dataframe, содержащий меры справедливости для модели.
def classification_fairness(X, prtc_attr, y_true, y_pred, y_prob=None,priv_grp=1):
    #X, prtc_attr, y_true, y_pred, y_prob = format_fairtest_input(X, prtc_attr, y_true, y_pred, y_prob)

    # Generate dict of group fairness measures, if applicable
    #n_class = y_true.append(y_pred).iloc[:, 0].nunique()
    #if n_class == 2:
        #gf_key, gf_vals = group_fairness(X, prtc_attr, y_true, y_pred, y_prob, priv_grp)

    gf_key, gf_vals = group_fairness(X, prtc_attr, y_true, y_pred, y_prob, priv_grp)
    
    #
    if_key, if_vals = individual_fairness(X, prtc_attr, y_true, y_pred)

    #
    mp_key, mp_vals = performance_measures(y_true, y_pred)

    # Convert scores to a formatted dataframe and return
    measures = {gf_key: gf_vals, if_key: if_vals, mp_key: mp_vals}
    df = pd.DataFrame.from_dict(measures, orient="index").stack().to_frame()
    df = pd.DataFrame(df[0].values.tolist(), index=df.index)
    df.columns = ['Value']
    df['Value'] = df.loc[:, 'Value'].round(4)
    df.fillna("", inplace=True)
    return df

# Тестирование

In [5]:
import pandas as pd
import numpy as np
from fairml import audit_model
from fairml import plot_dependencies
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import tkinter
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [6]:
# Обучающие данные
df_train = pd.read_csv("./input/Webpages_Classification_train_data.csv/Webpages_Classification_train_data.csv")
df_train.drop(columns = "Unnamed: 0", inplace = True)

# Тренировачные данные
df_test = pd.read_csv("./input/Webpages_Classification_test_data.csv/Webpages_Classification_test_data.csv")
df_test.drop(columns = "Unnamed: 0", inplace = True)

# Кодировка стран
count = pd.read_csv('./input/tableconvert_csv_pkcsig.csv')

In [7]:
countries = dict(zip(count['Country'], count['Alpha-3 code']))

df_train['iso_3'] = df_train['geo_loc']
df_train['iso_3'].replace(countries, inplace = True)

df_train.https.replace({'yes' : 'HTTPS', 'no' : 'HTTP'}, inplace = True)

# Создание новых признаков
# Подсчет специальных символов в содержимом
def count_special(string):
    count = 0
    for char in string:
        if not(char.islower()) and not(char.isupper()) and not(char.isdigit()):
            if char != ' ':
                count += 1
    return count

# Определение типа сети [A, B, C]
def network_type(ip):
    ip_str = ip.split(".")
    ip = [int(x) for x in ip_str]

    if ip[0]>=0 and ip[0]<=127:
        return (ip_str[0], "A")
    elif ip[0]>=128 and ip[0]<=191:
        return (".".join(ip_str[0:2]), "B")
    else:
        return (".".join(ip_str[0:3]), "C")
    
# Добавление признака с типом сети
df_train['Network']= df_train['ip_add'].apply(lambda x : network_type(x))
df_train['net_part'], df_train['net_type'] = zip(*df_train.Network)
df_train.drop(columns = ['Network'], inplace = True)

# Признак с количеством специальных знаков
df_train['special_char'] = df_train['content'].apply(lambda x: count_special(x))

# Длина признака Content
df_train['content_len'] = df_train['content'].apply(lambda x: len(x))

df_train.label.replace({'bad' : 'Malicious', 'good' : 'Benign'}, inplace = True)

df_train.label.replace({'Malicious' : 1, 'Benign' : 0}, inplace = True)

ls = ['geo_loc', 'tld', 'who_is', 'https', 'net_type']
le_dict = {}

for feature in ls:
    le = LabelEncoder()
    le_dict[feature] = le
    df_train[feature] = le.fit_transform(df_train[feature])
    
# Конечные атрибуты, которые пойду в обучающую выборку
df_train = df_train[['url_len', 'geo_loc', 'tld', 'who_is', 'https', 'js_len', 'js_obf_len', 'label', 'net_type', 'special_char', 'content_len']]

ss_dict = {}

for feature in ['content_len', 'special_char']:
    ss = StandardScaler()
    ss_fit = ss.fit(df_train[feature].values.reshape(-1, 1))
    ss_dict[feature] = ss_fit
    d = ss_fit.transform(df_train[feature].values.reshape(-1, 1))
    df_train[feature] = pd.DataFrame(d, index = df_train.index, columns = [feature])

KeyboardInterrupt: 

In [8]:
df_test.https.replace({'yes' : 'HTTPS', 'no' : 'HTTP'}, inplace = True)
df_test.label.replace({'bad' : 'Malicious', 'good' : 'Benign'}, inplace = True)

df_test['Network']= df_test['ip_add'].apply(lambda x : network_type(x))
df_test['net_part'], df_test['net_type'] = zip(*df_test.Network)
df_test.drop(columns = ['Network'], inplace = True)

df_test['special_char'] = df_test['content'].apply(lambda x: count_special(x))

df_test['content_len'] = df_test['content'].apply(lambda x: len(x))

for feature in ls:
    le = le_dict[feature]
    df_test[feature] = le.fit_transform(df_test[feature])

df_test.label.replace({'Malicious' : 1, 'Benign' : 0}, inplace = True)

ss_fit = ss_dict['content_len']
d = ss_fit.transform(df_test['content_len'].values.reshape(-1, 1))
df_test['content_len'] = pd.DataFrame(d, index = df_test.index, columns = ['content_len'])

ss_fit = ss_dict['special_char']
d = ss_fit.transform(df_test['special_char'].values.reshape(-1, 1))
df_test['special_char'] = pd.DataFrame(d, index = df_test.index, columns = ['special_char'])

df_test = df_test[['url_len', 'geo_loc', 'tld', 'who_is', 'https', 'js_len', 'js_obf_len', 'label', 'net_type', 'special_char', 'content_len']]

In [9]:
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

In [10]:
train= df_train.iloc[:500000,]
test= df_test.iloc[:,]

print(len(train), 'train examples')
print(len(test), 'test examples')

500000 train examples
361934 test examples


In [11]:
y_train = train['label'] 
X_train = train.drop(columns=['label'])

y_test = test['label'] 
X_test = test.drop(columns=['label'])

In [13]:
clf = RandomForestClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)


priv_groups = [{'https': 1}]
unpriv_groups = [{'https': 0}]
#dataset = pd.concat([X_test, y_test], axis=1)
#dataset = StandardDataset(dataset, label_name='label', favorable_classes=[0], protected_attribute_names=['https'], privileged_classes=[[1]])


In [22]:
X_test.head()

Unnamed: 0,url_len,geo_loc,tld,who_is,https,js_len,js_obf_len,net_type,special_char,content_len
0,36,41,136,0,1,38.5,0.0,1,-0.281118,-0.303038
1,32,187,136,1,1,187.0,0.0,1,0.588966,1.828473
2,27,67,136,0,1,31.0,0.0,2,-1.129449,-0.911244
3,56,11,276,0,1,152.0,0.0,0,0.208304,-0.189581
4,40,41,136,0,1,150.0,0.0,2,0.1648,0.038264


In [2]:
from fairlearn.reductions import GridSearch, DemographicParity
from sklearn.ensemble import RandomForestClassifier

# Model using GridSearch to optimize for demographic parity
fairGridSearch = GridSearch(RandomForestClassifier(),
                           constraints=DemographicParity(),
                           grid_size=20)


In [3]:
fairGridSearch.fit(X_train, y_train, sensitive_features=X_train['https'])

NameError: name 'X_train' is not defined