In [2]:
import matplotlib.pyplot as plt
import numpy as np

# Data from Appendix Tables
# k-Anonymity Training Data
k_values = [3, 5, 9, 12, 15]
k_train_scores = [0, 0.00015047389999978122, 0.0020564779000000755, 0.06139338910000025, 0.06600792490000007]
k_train_se_longitude = [100, 99.98901, 99.85265, 95.32506, 94.9949]
k_train_se_latitude = [100, 99.99189, 99.85422, 94.23451, 93.81451]

# k-Anonymity Validation Data
k_val_scores = [0.9702970296999998, 0.9927992798999998, 1.0000000000000004, 1.0000000000000004, 1.0000000000000004]
k_val_se_longitude = [3.64259, 1.08587, 0, 0, 0]
k_val_se_latitude = [3.26768, 0.74771, 0, 0, 0]

# Differential Privacy Training Data
epsilon_values = [0.1, 1, 1.25, 1.5, 2]
dp_train_scores = [-332.28333333333336, -272.0153032099319, -245.9048500931458, -220.7239213984879, -169.3861045923802]
dp_train_se_longitude = [0, 0.70825, 2.34343, 2.37257, 1.02144]
dp_train_se_latitude = [0, 0.52956, 1.78966, 1.81213, 0.80232]

# Differential Privacy Validation Data
dp_val_scores = [-18.516666666666666, -15.219178082191782, -13.8875, -12.48314606741573, -9.495726495726496]
dp_val_se_longitude = [0, 0, 0, 0, 0]
dp_val_se_latitude = [0, 0, 0, 0, 0]

# Plotting function for k-Anonymity
def plot_k_anonymity(data_type, k_vals, scores, se_long, se_lat, filename):
    fig, ax1 = plt.subplots(figsize=(10, 6))

    # Plot Squared Error on primary y-axis
    ax1.plot(k_vals, se_long, 'b-', label='Squared Error (Longitude)')
    ax1.plot(k_vals, se_lat, 'g-', label='Squared Error (Latitude)')
    ax1.set_xlabel('k Value')
    ax1.set_ylabel('Squared Error (%)', color='b')
    ax1.tick_params(axis='y', labelcolor='b')
    ax1.legend(loc='upper left')
    ax1.grid(True)

    # Plot Score on secondary y-axis
    ax2 = ax1.twinx()
    ax2.plot(k_vals, scores, 'r--', label='Score')
    ax2.set_ylabel('Score', color='r')
    ax2.tick_params(axis='y', labelcolor='r')
    ax2.legend(loc='upper right')

    plt.title(f'k-Anonymity: {data_type} Data')
    plt.savefig(filename)
    plt.close()

# Plotting function for Differential Privacy
def plot_diff_privacy(data_type, eps_vals, scores, se_long, se_lat, filename):
    fig, ax1 = plt.subplots(figsize=(10, 6))

    # Plot Squared Error on primary y-axis
    ax1.plot(eps_vals, se_long, 'b-', label='Squared Error (Longitude)')
    ax1.plot(eps_vals, se_lat, 'g-', label='Squared Error (Latitude)')
    ax1.set_xlabel('Epsilon Value')
    ax1.set_ylabel('Squared Error (%)', color='b')
    ax1.tick_params(axis='y', labelcolor='b')
    ax1.legend(loc='upper left')
    ax1.grid(True)

    # Plot Score on secondary y-axis
    ax2 = ax1.twinx()
    ax2.plot(eps_vals, scores, 'r--', label='Score')
    ax2.set_ylabel('Score', color='r')
    ax2.tick_params(axis='y', labelcolor='r')
    ax2.legend(loc='upper right')

    plt.title(f'Differential Privacy: {data_type} Data')
    plt.savefig(filename)
    plt.close()

# Generate plots
plot_k_anonymity('Training', k_values, k_train_scores, k_train_se_longitude, k_train_se_latitude, 'k_anonymity_training.png')
plot_k_anonymity('Validation', k_values, k_val_scores, k_val_se_longitude, k_val_se_latitude, 'k_anonymity_validation.png')
plot_diff_privacy('Training', epsilon_values, dp_train_scores, dp_train_se_longitude, dp_train_se_latitude, 'diff_privacy_training.png')
plot_diff_privacy('Validation', epsilon_values, dp_val_scores, dp_val_se_longitude, dp_val_se_latitude, 'diff_privacy_validation.png')