In [67]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from diffprivlib.models import LogisticRegression as DPLogisticRegression

In [68]:
# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

In [69]:
# Convert labels to binary (for simplicity)
y = (y == 0).astype(int)

In [70]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [71]:
# Train and evaluate the non-private logistic regression model
non_private_model = LogisticRegression(max_iter=1000, random_state=42)
non_private_model.fit(X_train, y_train)

y_pred = non_private_model.predict(X_test)
accuracy_non_private = accuracy_score(y_test, y_pred)
print(f'Accuracy of the non-private logistic regression model: {accuracy_non_private:.2f}')

Accuracy of the non-private logistic regression model: 1.00


In [91]:
# Train the differentially private logistic regression model using diffprivlib
dp_model = DPLogisticRegression(epsilon=10.0, data_norm=2.0, max_iter=1000, random_state=42)
dp_model.fit(X_train, y_train)

y_pred_private = dp_model.predict(X_test)
accuracy_private = accuracy_score(y_test, y_pred_private)
print(f'Accuracy of the differentially private logistic regression model with epsilon 10: {accuracy_private:.2f}')

Accuracy of the differentially private logistic regression model with epsilon 10: 0.42


In [92]:
# Train the differentially private logistic regression model using diffprivlib
dp_model = DPLogisticRegression(epsilon=60.0, data_norm=2.0, max_iter=1000, random_state=42)
dp_model.fit(X_train, y_train)

y_pred_private = dp_model.predict(X_test)
accuracy_private = accuracy_score(y_test, y_pred_private)
print(f'Accuracy of the differentially private logistic regression model with epsilon 60: {accuracy_private:.2f}')

Accuracy of the differentially private logistic regression model with epsilon 60: 0.84


#### Summary
- The non-private model performs exceptionally well with perfect accuracy, but without any privacy guarantees
- Introducing differential privacy with ϵ=10 significantly degrades model performance due to the strong privacy constraints.
- Increasing ϵ to 60 improves the model's accuracy considerably, demonstrating a more balanced trade-off between privacy and utility.