# Model explainability

In [None]:
import numpy as np
import tensorflow as tf
from transformers import TFAutoModel, AutoTokenizer

model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModel.from_pretrained(model_name, output_attentions=True)

text = "Write a product description for an eco-friendly kitchenware product focusing on brand ethics."
inputs = tokenizer(text, return_tensors='tf')
outputs = model(inputs)

attention = outputs[-1][-1].numpy()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

attention = attention.squeeze(axis=0)
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"].numpy()[0])
tokens = tokens[1:-1]  
attention = attention[:, 1:-1, 1:-1] 

fig, ax = plt.subplots(figsize=(12, 10))
sns.heatmap(attention[0], annot=True, ax=ax, cmap="viridis", xticklabels=tokens, yticklabels=tokens, fmt='.2f', annot_kws={"size": 8})
ax.set_title('Attention Weights for Marketing Prompt')
plt.xticks(rotation=45, ha='right', fontsize=10)
plt.yticks(fontsize=10)
plt.show()

# Data Anonymization

In [None]:
import pandas as pd
import hashlib

customer_data = pd.DataFrame({
    'first_name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank', 'Grace', 'Hannah', 'Ivy', 'Jack'],
    'last_name': ['Smith', 'Jones', 'Brown', 'Johnson', 'Davis', 'Wilson', 'Moore', 'Taylor', 'Anderson', 'Thomas'],
    'age': [25, 30, 22, 40, 35, 28, 26, 33, 29, 37],
    'income': [20000, 35000, 27000, 50000, 45000, 30000, 32000, 38000, 31000, 47000],
    'purchase_amount': [100, 150, 200, 250, 220, 140, 180, 160, 190, 230]
})

def pseudonymize_id(first_name, last_name):
    return hashlib.sha256((first_name + last_name).encode()).hexdigest()
customer_data['customer_id'] = customer_data.apply(lambda row: pseudonymize_id(row['first_name'], row['last_name']), axis=1)
anonymized_data = customer_data.drop(columns=['first_name', 'last_name'])
display(anonymized_data)

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
anonymized_data[['age', 'income', 'purchase_amount']] = scaler.fit_transform(anonymized_data[['age', 'income', 'purchase_amount']])

epsilon = 5.0  

def add_noise(data, epsilon):
    sensitivity = np.max(data) - np.min(data)
    noise = np.random.laplace(0, sensitivity / epsilon, data.shape)
    return data + noise

noisy_data = anonymized_data.copy()
noisy_data[['age', 'income', 'purchase_amount']] = add_noise(anonymized_data[['age', 'income', 'purchase_amount']].values, epsilon)

noisy_data[['age', 'income', 'purchase_amount']] = scaler.inverse_transform(noisy_data[['age', 'income', 'purchase_amount']])

print("Noisy Data: \n", noisy_data)

average_purchase = noisy_data['purchase_amount'].mean()

In [None]:
def plot_data_with_trend_lines(original_data, anonymized_data, noisy_data):
    fig, ax = plt.subplots(1, 3, figsize=(18, 6))
    
    ax[0].scatter(original_data['age'], original_data['purchase_amount'], color='blue')
    z = np.polyfit(original_data['age'], original_data['purchase_amount'], 1)
    p = np.poly1d(z)
    ax[0].plot(original_data['age'], p(original_data['age']), "r--")
    ax[0].set_title('Original Data')
    ax[0].set_xlabel('Age')
    ax[0].set_ylabel('Purchase Amount')
    
    ax[1].scatter(anonymized_data['age'], anonymized_data['purchase_amount'], color='green')
    z = np.polyfit(anonymized_data['age'], anonymized_data['purchase_amount'], 1)
    p = np.poly1d(z)
    ax[1].plot(anonymized_data['age'], p(anonymized_data['age']), "r--")
    ax[1].set_title('Anonymized Data')
    ax[1].set_xlabel('Age')
    ax[1].set_ylabel('Purchase Amount')
    
    ax[2].scatter(noisy_data['age'], noisy_data['purchase_amount'], color='red')
    z = np.polyfit(noisy_data['age'], noisy_data['purchase_amount'], 1)
    p = np.poly1d(z)
    ax[2].plot(noisy_data['age'], p(noisy_data['age']), "r--")
    ax[2].set_title('Noisy Data with Differential Privacy')
    ax[2].set_xlabel('Age')
    ax[2].set_ylabel('Purchase Amount')
    
    plt.tight_layout()
    plt.show()

plot_data_with_trend_lines(customer_data, anonymized_data, noisy_data)
