In [1]:
# Ethics & Optimization Analysis for ML Models
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras import layers, models
import warnings
warnings.filterwarnings('ignore')

print("=" * 80)
print("ETHICS & OPTIMIZATION ANALYSIS FOR ML MODELS")
print("=" * 80)

# ============================================================================
# PART 1: ETHICAL CONSIDERATIONS
# ============================================================================

print("\n1. ETHICAL CONSIDERATIONS")
print("-" * 50)

print("\nA. POTENTIAL BIASES IN MNIST MODEL:")
print("   • Demographic bias: MNIST contains handwritten digits from specific")
print("     populations and may not generalize to different writing styles")
print("   • Cultural bias: Different cultures may write digits differently")
print("   • Age bias: Children vs adults have different handwriting patterns")
print("   • Quality bias: Dataset may favor certain image qualities/conditions")
print("   • Selection bias: Historical data collection methods may not be representative")

print("\nB. POTENTIAL BIASES IN AMAZON REVIEWS MODEL:")
print("   • Language bias: English-only reviews exclude non-English speakers")
print("   • Socioeconomic bias: Reviews may favor products accessible to certain income levels")
print("   • Platform bias: Amazon-specific user behavior patterns")
print("   • Temporal bias: Reviews from different time periods may have different patterns")
print("   • Product category bias: Some categories may have more polarized reviews")
print("   • Demographic bias: Age, gender, and cultural differences in review writing")

# Sample bias analysis for Amazon reviews
print("\nC. BIAS ANALYSIS EXAMPLE - Amazon Reviews:")

# Create sample data with potential biases
biased_reviews = [
    {'text': 'This luxury product is amazing!', 'rating': 5, 'price_range': 'high', 'demographic': 'affluent'},
    {'text': 'Cheap quality, not worth it', 'rating': 1, 'price_range': 'low', 'demographic': 'budget'},
    {'text': 'Great value for money', 'rating': 4, 'price_range': 'medium', 'demographic': 'middle'},
    {'text': 'Premium features work perfectly', 'rating': 5, 'price_range': 'high', 'demographic': 'affluent'},
    {'text': 'Basic functionality is okay', 'rating': 3, 'price_range': 'low', 'demographic': 'budget'},
]

bias_df = pd.DataFrame(biased_reviews)
print("\nBias Analysis by Price Range:")
bias_analysis = bias_df.groupby('price_range')['rating'].agg(['mean', 'count', 'std']).round(2)
print(bias_analysis)

# ============================================================================
# BIAS MITIGATION STRATEGIES
# ============================================================================

print("\n\nD. BIAS MITIGATION STRATEGIES:")
print("-" * 40)

print("\n1. DATA-LEVEL MITIGATION:")
print("   • Data Augmentation: Increase diversity in training data")
print("   • Balanced Sampling: Ensure equal representation across groups")
print("   • Synthetic Data Generation: Create balanced synthetic samples")
print("   • Multi-source Data Collection: Gather data from diverse sources")

print("\n2. ALGORITHM-LEVEL MITIGATION:")
print("   • Fairness Constraints: Add fairness objectives to loss functions")
print("   • Adversarial Debiasing: Train models to be invariant to protected attributes")
print("   • Regularization: Penalize discriminatory patterns")

print("\n3. POST-PROCESSING MITIGATION:")
print("   • Threshold Optimization: Adjust decision thresholds per group")
print("   • Calibration: Ensure equal calibration across groups")
print("   • Output Adjustment: Modify predictions to achieve fairness")

# Example: TensorFlow Fairness Indicators simulation
print("\n4. TENSORFLOW FAIRNESS INDICATORS EXAMPLE:")
print("   (Simulated metrics for demonstration)")

# Simulate fairness metrics
groups = ['Group A', 'Group B', 'Group C']
metrics = {
    'Accuracy': [0.85, 0.78, 0.82],
    'Precision': [0.83, 0.75, 0.80],
    'Recall': [0.87, 0.82, 0.85],
    'F1-Score': [0.85, 0.78, 0.82],
    'False Positive Rate': [0.12, 0.18, 0.15],
    'False Negative Rate': [0.13, 0.18, 0.15]
}

fairness_df = pd.DataFrame(metrics, index=groups)
print("\nFairness Metrics by Group:")
print(fairness_df.round(3))

# Calculate fairness gaps
print("\nFairness Gaps (Max - Min):")
for metric in metrics.keys():
    gap = max(metrics[metric]) - min(metrics[metric])
    print(f"   {metric}: {gap:.3f}")

# ============================================================================
# PART 2: TROUBLESHOOTING CHALLENGE - BUGGY TENSORFLOW CODE
# ============================================================================

print("\n\n2. TROUBLESHOOTING CHALLENGE - BUGGY CODE")
print("-" * 50)

print("\nORIGINAL BUGGY CODE:")
print("-" * 20)

buggy_code = '''
# BUGGY CODE - DO NOT RUN
import tensorflow as tf
from tensorflow.keras import layers

# Bug 1: Wrong input shape
model = tf.keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(28, 28)),  # BUG: Should be (784,) for flattened
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

# Bug 2: Wrong loss function for multiclass
model.compile(optimizer='adam',
              loss='binary_crossentropy',  # BUG: Should be 'sparse_categorical_crossentropy'
              metrics=['accuracy'])

# Bug 3: Dimension mismatch in data
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train / 255.0  # BUG: Not flattened for Dense layers
x_test = x_test / 255.0

# Bug 4: Wrong batch dimension in fit
model.fit(x_train, y_train, epochs=5, batch_size=32)  # Will fail due to shape mismatch
'''

print(buggy_code)

print("\nIDENTIFIED BUGS:")
print("-" * 15)
print("1. Input shape mismatch: Dense layer expects flattened input (784,), not (28,28)")
print("2. Wrong loss function: binary_crossentropy for multiclass problem")
print("3. Data not flattened: Images need to be reshaped for Dense layers")
print("4. Potential validation data missing")
print("5. No error handling or data validation")

print("\nFIXED CODE:")
print("-" * 10)

# Demonstrate the fixed code
print("Creating and training corrected model...")

# Fixed implementation
def create_fixed_model():
    """Create a properly configured MNIST model"""
    model = tf.keras.Sequential([
        layers.Flatten(input_shape=(28, 28)),  # FIX: Properly flatten input
        layers.Dense(128, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.2),  # IMPROVEMENT: Add dropout for regularization
        layers.Dense(10, activation='softmax')
    ])

    # FIX: Correct loss function for multiclass classification
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',  # FIXED
                  metrics=['accuracy'])

    return model

# Load and preprocess data correctly
def load_and_preprocess_data():
    """Load and properly preprocess MNIST data"""
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

    # Normalize pixel values
    x_train = x_train.astype('float32') / 255.0
    x_test = x_test.astype('float32') / 255.0

    # Validation: Check data shapes and types
    print(f"Training data shape: {x_train.shape}")
    print(f"Training labels shape: {y_train.shape}")
    print(f"Test data shape: {x_test.shape}")
    print(f"Test labels shape: {y_test.shape}")
    print(f"Label range: {y_train.min()} to {y_train.max()}")

    return (x_train, y_train), (x_test, y_test)

# Create and train the fixed model
try:
    model = create_fixed_model()
    (x_train, y_train), (x_test, y_test) = load_and_preprocess_data()

    print("\nModel Summary:")
    model.summary()

    print("\nTraining model...")
    # FIX: Add validation data and proper callbacks
    history = model.fit(
        x_train, y_train,
        epochs=3,  # Reduced for demo
        batch_size=128,
        validation_split=0.1,  # IMPROVEMENT: Add validation
        verbose=1
    )

    # Evaluate model
    test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=0)
    print(f"\nTest accuracy: {test_accuracy:.4f}")
    print(f"Test loss: {test_loss:.4f}")

except Exception as e:
    print(f"Error during model training: {e}")

# ============================================================================
# ADDITIONAL DEBUGGING TECHNIQUES
# ============================================================================

print("\n\n3. DEBUGGING BEST PRACTICES:")
print("-" * 30)

debugging_practices = [
    "1. Check data shapes at each step",
    "2. Validate input/output dimensions match model expectations",
    "3. Use appropriate loss functions for the problem type",
    "4. Add data type and range validation",
    "5. Include proper error handling and logging",
    "6. Use callbacks for monitoring (EarlyStopping, ModelCheckpoint)",
    "7. Visualize data and predictions to catch issues",
    "8. Start with simple models and gradually increase complexity",
    "9. Use TensorBoard for training visualization",
    "10. Test with small datasets first"
]

for practice in debugging_practices:
    print(f"   {practice}")

# ============================================================================
# SPACY RULE-BASED BIAS MITIGATION
# ============================================================================

print("\n\n4. SPACY RULE-BASED BIAS MITIGATION:")
print("-" * 40)

print("\nStrategies for reducing bias in NLP models:")
print("1. Diverse Training Data:")
print("   • Include reviews from different demographics")
print("   • Balance across product categories and price ranges")
print("   • Include multiple languages and dialects")

print("\n2. Bias-Aware Rule Creation:")
print("   • Avoid rules that favor specific groups")
print("   • Test rules across different populations")
print("   • Use inclusive language patterns")

print("\n3. Evaluation Across Subgroups:")
print("   • Measure performance on different demographic groups")
print("   • Check for disparate impact")
print("   • Monitor for systematic errors")

# Example bias mitigation in rule-based sentiment
print("\n4. Example: Bias-Aware Sentiment Rules")

class BiasAwareSentimentAnalyzer:
    def __init__(self):
        # Inclusive positive words across cultures
        self.positive_words = {
            'excellent', 'good', 'great', 'amazing', 'wonderful',
            'satisfactory', 'decent', 'adequate', 'acceptable', 'fine'
        }

        # Avoid culturally biased negative terms
        self.negative_words = {
            'poor', 'bad', 'disappointing', 'inadequate', 'unsatisfactory',
            'defective', 'broken', 'faulty', 'substandard'
        }

        # Cultural sensitivity adjustments
        self.cultural_adjustments = {
            'not bad': 0.3,  # Some cultures use double negatives positively
            'could be better': -0.2,  # Indirect criticism
            'okay': 0.1  # Neutral but slightly positive in some contexts
        }

    def analyze_with_bias_awareness(self, text):
        """Analyze sentiment with bias mitigation"""
        # Implementation would include cultural context awareness
        # This is a simplified example
        score = 0
        text_lower = text.lower()

        # Check for cultural phrases
        for phrase, adjustment in self.cultural_adjustments.items():
            if phrase in text_lower:
                score += adjustment

        # Regular sentiment analysis
        words = text_lower.split()
        for word in words:
            if word in self.positive_words:
                score += 1
            elif word in self.negative_words:
                score -= 1

        return {
            'score': score,
            'sentiment': 'positive' if score > 0 else 'negative' if score < 0 else 'neutral',
            'bias_adjusted': True
        }

# Demonstrate bias-aware analysis
bias_aware_analyzer = BiasAwareSentimentAnalyzer()
test_phrases = [
    "The product is not bad",
    "It's okay, could be better",
    "Excellent quality and great value"
]

print("\nBias-Aware Sentiment Analysis Examples:")
for phrase in test_phrases:
    result = bias_aware_analyzer.analyze_with_bias_awareness(phrase)
    print(f"   '{phrase}' → {result['sentiment']} (score: {result['score']})")

print("\n" + "=" * 80)
print("ETHICS & OPTIMIZATION ANALYSIS COMPLETE")
print("=" * 80)

ETHICS & OPTIMIZATION ANALYSIS FOR ML MODELS

1. ETHICAL CONSIDERATIONS
--------------------------------------------------

A. POTENTIAL BIASES IN MNIST MODEL:
   • Demographic bias: MNIST contains handwritten digits from specific
     populations and may not generalize to different writing styles
   • Cultural bias: Different cultures may write digits differently
   • Age bias: Children vs adults have different handwriting patterns
   • Quality bias: Dataset may favor certain image qualities/conditions
   • Selection bias: Historical data collection methods may not be representative

B. POTENTIAL BIASES IN AMAZON REVIEWS MODEL:
   • Language bias: English-only reviews exclude non-English speakers
   • Socioeconomic bias: Reviews may favor products accessible to certain income levels
   • Platform bias: Amazon-specific user behavior patterns
   • Temporal bias: Reviews from different time periods may have different patterns
   • Product category bias: Some categories may have more pola


Training model...
Epoch 1/3
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 9ms/step - accuracy: 0.7890 - loss: 0.7157 - val_accuracy: 0.9585 - val_loss: 0.1458
Epoch 2/3
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.9423 - loss: 0.1944 - val_accuracy: 0.9712 - val_loss: 0.1029
Epoch 3/3
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9612 - loss: 0.1321 - val_accuracy: 0.9742 - val_loss: 0.0904

Test accuracy: 0.9692
Test loss: 0.0998


3. DEBUGGING BEST PRACTICES:
------------------------------
   1. Check data shapes at each step
   2. Validate input/output dimensions match model expectations
   3. Use appropriate loss functions for the problem type
   4. Add data type and range validation
   5. Include proper error handling and logging
   6. Use callbacks for monitoring (EarlyStopping, ModelCheckpoint)
   7. Visualize data and predictions to catch issues
   8. Start with simple mo