In [82]:
import numpy as np
import pandas as pd
import aif360 as aif
from aif360.datasets import GermanDataset
from aif360.metrics import BinaryLabelDatasetMetric
from sklearn.impute import SimpleImputer
from aif360.sklearn import metrics
from scipy.stats import chisquare
from scipy.stats import f_oneway

In [27]:
df = pd.read_csv('resume_extracted_data.csv')

In [28]:
df.head()

Unnamed: 0,Name,Age,Religion,Job Title,Gender,Experience,Home City,UG Institute,PG Institute,PHD Institute
0,Akshatha,40.0,Hindu,Business Analytics,F,9.5,Bangalore,St.Agnes College,Mangalore University,
1,Vinod Kumar S,46.0,Hindu,Business Analytics,M,14.0,Bangalore,,,
2,Tushar Kumar Saxena,41.0,Hindu,Business Analytics,M,16.0,,DAV College,Birla Institute of Management Technology,
3,ANUJ MALHOTRA,38.0,Hindu,Business Analytics,M,17.0,New Delhi,Delhi University,IICT Lucknow,
4,SVM Ajay SriPennada,36.0,Hindu,Business Analytics,M,4.6,,"VIT, Vellore",Great lakes Institute of Management,


In [29]:
df.drop(columns=['PHD Institute','UG Institute','PG Institute','Home City'], inplace=True)

In [30]:
df.shape

(78, 6)

In [31]:
selected = np.random.randint(2, size=df.shape[0])


In [32]:
Selected = pd.DataFrame(selected, columns=['Selected'])

In [33]:
df['Selected'] = Selected

In [34]:
df.head()

Unnamed: 0,Name,Age,Religion,Job Title,Gender,Experience,Selected
0,Akshatha,40.0,Hindu,Business Analytics,F,9.5,0
1,Vinod Kumar S,46.0,Hindu,Business Analytics,M,14.0,1
2,Tushar Kumar Saxena,41.0,Hindu,Business Analytics,M,16.0,0
3,ANUJ MALHOTRA,38.0,Hindu,Business Analytics,M,17.0,0
4,SVM Ajay SriPennada,36.0,Hindu,Business Analytics,M,4.6,0


In [35]:
df.isna().sum()

Name          0
Age           2
Religion      1
Job Title     0
Gender        0
Experience    1
Selected      0
dtype: int64

In [42]:
df.dropna(inplace=True)

In [55]:
metrics.statistical_parity_difference(df['Selected'], prot_attr=df['Gender'], priv_group='M')

0.05714285714285716

In [57]:
metrics.statistical_parity_difference(df['Selected'], prot_attr=df['Religion'], priv_group='Hindu')

-0.13025210084033617

In [58]:
metrics.disparate_impact_ratio(df['Selected'], prot_attr=df['Gender'], priv_group='M')

1.105263157894737

In [59]:
metrics.disparate_impact_ratio(df['Selected'], prot_attr=df['Religion'], priv_group='Hindu')

0.7669172932330827

In [78]:
metrics.class_imbalance(y_true=df['Selected'], prot_attr=df['Gender'], priv_group='M')

-0.8666666666666667

In [77]:
metrics.class_imbalance(y_true=df['Selected'], prot_attr=df['Religion'], priv_group='Hindu')

-0.8133333333333334

In [70]:
metrics.kl_divergence(y_true=df['Selected'], prot_attr=df['Gender'], priv_group='M')

0.006711901983134094

In [62]:
aif.detectors.bias_scan(df.drop(columns=['Selected']), df['Selected'])

({'Name': ['ANUJ MALHOTRA',
   'ASHISH KUMAR KHAMARI',
   'Aabid A.S. Mulani',
   'Akshatha',
   'Anil Kumar',
   'Arunesh Sharma',
   'Charan Kumar Reddy',
   'Jayakkumar Krishnasamy',
   'Madan Mohan',
   'Mudassar L Shaikh\t',
   'NITIN CHANDORKAR',
   'Neelakanta R',
   'PRAHALADHA',
   'SVM Ajay SriPennada',
   'Sabeeh Ashhar ',
   'Sachindra D',
   'Sakthivadivel Ganesan',
   'Sandeep Cheedepudi',
   'Sankalp S. Rao',
   'Sathyandran S K',
   'Saurabh Suhasaria',
   'Thilagavathi Saravanan',
   'Tushar Kumar Saxena'],
  'Age': [32.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 45.0, 46.0],
  'Job Title': ['Android', 'Business Analytics', 'QA Testing', 'Sales', 'UI'],
  'Experience': [4.6,
   6.0,
   9.5,
   11.0,
   12.0,
   12.7,
   13.0,
   14.0,
   15.0,
   16.0,
   17.0,
   18.0,
   19.0,
   20.0,
   21.0],
  'Gender': ['F', 'M'],
  'Religion': ['Hindu', 'Muslim']},
 18.155)

In [80]:
def standard_deviation_ratio(scores, groups):
    std_dev_group_A = np.std(scores[groups[0]])
    std_dev_group_B = np.std(scores[groups[1]])
    return std_dev_group_A / std_dev_group_B

def standard_deviation_disparity(scores, groups):
    std_dev_group_A = np.std(scores[groups[0]])
    std_dev_group_B = np.std(scores[groups[1]])
    std_dev_overall = np.std(scores)
    return (std_dev_group_A - std_dev_group_B) / std_dev_overall

def equal_opportunity(true_positives, total_positives):
    return true_positives / total_positives


def chi2_test(observed, alpha=0.05):
    expected = [sum(observed) / len(observed)] * 6
    chi2, p = chisquare(observed, f_exp=expected)
    return p < alpha

def anova_bias(col1, col2):
    res = f_oneway(col1.tolist(),col2.tolist())
    print("H0 Accepted") if res.pvalue > 0.05 else print("H0 Rejected")
    
def demographic_parity(col1, col2,threshold):
    ratio1 = sum(col1) / len(col1)
    ratio2 = sum(col2) / len(col2)
    print("Bias exists") if(abs(ratio1-ratio2) > threshold) else print("No bias")