In [5]:
!pip install fairlearn



# Import The Necessary Libarary

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,confusion_matrix
from fairlearn.reductions import GridSearch,DemographicParity

# #Synthetic Movie Recommendation Dataset

In [7]:
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.metrics import accuracy_score, confusion_matrix
# from fairlearn.reductions import GridSearch, DemographicParity

# Sample data
data = {
    'User ID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'Age': [25, 30, 35, 22, 40, 25, 28, 32, 50, 27],
    'Gender': ['M', 'F', 'M', 'F', 'M', 'M', 'F', 'M', 'F', 'M'],
    'Liked Movie': [1, 0, 1, 0, 1, 1, 0, 1, 0, 1],
}

# Create DataFrame
df = pd.DataFrame(data)

# Data Preprocessing
X = df[['Age']]
y = df['Liked Movie']  # Fixed column name to match the DataFrame

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Ensure sensitive features match the number of samples in X_train
sensitive_features_train = df.loc[X_train.index]['Gender']

# Train a Random Forest Classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Model Evaluation
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Define a fairness constraint (Demographic Parity)
fairness_constraint = DemographicParity()

# Train a fair model using GridSearch with fairness constraints
fair_model = GridSearch(estimator=model, constraints=fairness_constraint, grid_size=2)
fair_model.fit(X_train, y_train, sensitive_features=sensitive_features_train)

# Evaluate the fair model
fair_y_pred = fair_model.predict(X_test)
print("Fair Model Accuracy:", accuracy_score(y_test, fair_y_pred))
print("Fair Model Confusion Matrix:\n", confusion_matrix(y_test, fair_y_pred))


Accuracy: 0.5
Confusion Matrix:
 [[1 1]
 [0 0]]
Fair Model Accuracy: 0.5
Fair Model Confusion Matrix:
 [[1 1]
 [0 0]]


In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from fairlearn.reductions import GridSearch, EqualizedOdds
import numpy as np

# Sample data
data = {
    'User ID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'Age': [25, 30, 35, 22, 40, 25, 28, 32, 50, 27],
    'Gender': ['M', 'F', 'M', 'F', 'M', 'M', 'F', 'M', 'F', 'M'],
    'Liked Movie': [1, 0, 1, 0, 1, 1, 0, 1, 0, 1],
}

# Create DataFrame
df = pd.DataFrame(data)

# Data Preprocessing
X = df[['Age', 'Gender']]  # Include both Age and Gender as features
y = df['Liked Movie']

# One-hot encode categorical variable 'Gender'
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['Age']),    # Scale Age
        ('cat', OneHotEncoder(), ['Gender'])   # One-hot encode Gender
    ]
)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocess the data
X_train_transformed = preprocessor.fit_transform(X_train)
X_test_transformed = preprocessor.transform(X_test)

# Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
model.fit(X_train_transformed, y_train)

# Evaluate the model
y_pred = model.predict(X_test_transformed)
print("Improved Model Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Ensure sensitive features match the number of samples in X_train
sensitive_features_train = df.loc[X_train.index]['Gender']

# Define a fairness constraint (Equalized Odds - less strict than Demographic Parity)
fairness_constraint = EqualizedOdds()

# Train a fair model using GridSearch with fairness constraints (reduced grid size)
fair_model = GridSearch(estimator=model, constraints=fairness_constraint, grid_size=2)
fair_model.fit(X_train_transformed, y_train, sensitive_features=sensitive_features_train)

# Evaluate the fair model
fair_y_pred = fair_model.predict(X_test_transformed)
print("Fair Model Accuracy:", accuracy_score(y_test, fair_y_pred))
print("Fair Model Confusion Matrix:\n", confusion_matrix(y_test, fair_y_pred))




Improved Model Accuracy: 1.0
Confusion Matrix:
 [[2]]
Fair Model Accuracy: 1.0
Fair Model Confusion Matrix:
 [[2]]
