### Detect Bias in Data
**Description**: Use statistical tests to detect bias in data, which can affect AI model fairness.

In [2]:
import pandas as pd
import numpy as np
from scipy import stats

#CreateSampleDatasetwithBias
np.random.seed(42)
data={
'age':np.random.randint(20,60,1000),
'gender':np.random.choice(['Male','Female'],size=1000,p=[0.5,0.5]),
'income':np.random.normal(loc=50000,scale=10000,size=1000),
'loan_approval':np.random.choice([0,1],size=1000,p=[0.7,0.3])
}
df=pd.DataFrame(data)

#IntroduceBias:LowerloanapprovalforFemales
female_indices=df[df['gender']=='Female'].index
num_females_to_change=int(len(female_indices)*0.2)
rejected_females_indices=np.random.choice(df[df['gender']=='Female'][df['loan_approval']==1].index,size=num_females_to_change,replace=False)
df.loc[rejected_females_indices,'loan_approval']=0

#IntroduceBias:LowerAverageIncomeforFemales
df.loc[df['gender']=='Female','income']=np.random.normal(loc=45000,scale=8000,size=len(df[df['gender']=='Female']))

#DetectBiasusingStatisticalTests

#Bias1:LoanApprovalRatebyGender(Categorical-Categorical)-Chi-squaredtest
contingency_table=pd.crosstab(df['gender'],df['loan_approval'])
chi2,p_val,dof,expected=stats.chi2_contingency(contingency_table)
print(f"Chi-squaredtestforLoanApprovalbyGender:")
print(f"Chi2Statistic:{chi2:.4f}")
print(f"P-value:{p_val:.4f}")
if p_val<0.05:
    print("SignificantassociationfoundbetweenGenderandLoanApproval(evidenceofbias).")
else:
    print("NosignificantassociationfoundbetweenGenderandLoanApproval.")

#Displayactualapprovalrates
approval_rates=df.groupby('gender')['loan_approval'].mean()
print("LoanApprovalRatesbyGender:\n",approval_rates)

#Bias2:IncomedifferencebyGender(Numerical-Categorical)-T-test
male_income=df[df['gender']=='Male']['income']
female_income=df[df['gender']=='Female']['income']
t_stat,p_val_ttest=stats.ttest_ind(male_income,female_income,equal_var=False)
print(f"\nT-testforIncomedifferencebyGender:")
print(f"T-Statistic:{t_stat:.4f}")
print(f"P-value:{p_val_ttest:.4f}")
if p_val_ttest<0.05:
    print("SignificantdifferencenfoundinIncomebetweenGendergroups(evidenceofbias).")
else:
    print("NosignificantdifferencefoundinIncomebetweenGendergroups.")

#Displayaverageincomes
average_incomes=df.groupby('gender')['income'].mean()
print("AverageIncomesbyGender:\n",average_incomes)

Chi-squaredtestforLoanApprovalbyGender:
Chi2Statistic:100.2461
P-value:0.0000
SignificantassociationfoundbetweenGenderandLoanApproval(evidenceofbias).
LoanApprovalRatesbyGender:
 gender
Female    0.074364
Male      0.329243
Name: loan_approval, dtype: float64

T-testforIncomedifferencebyGender:
T-Statistic:9.3792
P-value:0.0000
SignificantdifferencenfoundinIncomebetweenGendergroups(evidenceofbias).
AverageIncomesbyGender:
 gender
Female    44577.797330
Male      50075.730503
Name: income, dtype: float64


  rejected_females_indices=np.random.choice(df[df['gender']=='Female'][df['loan_approval']==1].index,size=num_females_to_change,replace=False)
