In [53]:
import json
import numpy as np
import matplotlib.pyplot as plt
import sklearn

from fairlearn.metrics import group_summary
from fairlearn.metrics import selection_rate_group_summary

from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

with open("augmented_data.json", "rb") as fp:
    data = json.load(fp)
    
corpus = list(map(lambda t: t["sample"]["text"] + " " + t["metadata"]["gender"], data))
y = list(map(lambda t: t["sample"]["intent"], data))

corpus_train, corpus_test, y_train, y_test = train_test_split(corpus, y, test_size=0.2, random_state=7)

vectorizer = TfidfVectorizer(min_df=10)

X_train = vectorizer.fit_transform(corpus_train)
X_test = vectorizer.transform(corpus_test)

model = sklearn.linear_model.LogisticRegression(C=0.1)
model.fit(X_train, y_train)




LogisticRegression(C=0.1)

In [54]:
to_unigram = list(map(lambda x: (" to" in x.lower()), corpus_test))
gender = list(map(lambda x: "male" if " male" in x.lower() else "female" , corpus_test))

y_pred = model.predict(X_test)
group_summary(accuracy_score, y_test, y_pred, sensitive_features=to_unigram)

{'overall': 0.905,
 'by_group': {False: 0.8761904761904762, True: 0.9368421052631579}}

In [55]:
group_summary(accuracy_score, y_test, y_pred, sensitive_features=gender)

{'overall': 0.905,
 'by_group': {'female': 0.900990099009901, 'male': 0.9090909090909091}}

In [56]:
from fairlearn.widget import FairlearnDashboard
FairlearnDashboard(sensitive_features=gender,
                       sensitive_feature_names=['gender'],
                       y_true=y_test,
                       y_pred={"initial model": y_pred})


FairlearnWidget(value={'true_y': ['GetDirections', 'ComparePlaces', 'GetPlaceDetails', 'RequestRide', 'Request…

<fairlearn.widget._fairlearn_dashboard.FairlearnDashboard at 0x12c47d850>