In [None]:
import psycopg2
import math
import numpy as np
import pandas as pd
from typing import List, Literal, Dict, Any, Tuple, Optional
from decimal import Decimal, ROUND_HALF_UP
from datetime import datetime
import os
import scipy.optimize as opt
import math,random
import lp,nt,R2T
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [None]:
#compute relative absolute error
def error(true,noise) -> float:
    return abs(noise - true) / true

In [None]:
#compute a trimmed mean of the error list to reduce the impact of extreme outliers.
def filterError(errors: list[float],n) -> float:
    #sort errors in ascending order
    sorted_err = sorted(errors)
    #drop the smallest n and largest n values
    chosen_err = sorted_err[n:-n]
    #return the average of the remaining errors
    avg = sum(chosen_err) / len(chosen_err)
    return avg

In [None]:
def test(epsilons,rounds,agg_type,true):
    noi_r2t_dic = {}#stores raw outputs for each epsilon from r2t
    noi_lp_dic = {}#stores raw outputs for each epsilon from LP baseline
    noi_nt_dic = {}#stores raw outputs for each epsilon from NT baseline

    err_r2t_dic = {}#stores aggregated error for each epsilon from r2t
    err_lp_dic = {}#stores aggregated error for each epsilon from LP baseline
    err_nt_dic = {}#stores aggregated error for each epsilon from NT baseline
    #iterate each epsilon
    for epsilon in epsilons:
        #store raw outputs for each method of the current epsilon
        r2t_li = []
        lp_li = []
        nt_li = []
        #store relative errors for each method of the current epsilon
        error_r2t = []
        error_lp = []
        error_nt = []
        
        for i in range(rounds):
            if agg_type == 'sum':
                #for r2t
                r2t_result = R2T.r2t_sum(contrib_list, 
                             epsilon=epsilon, 
                             gs_q=gs_q, beta=beta)
                #for lp baseline
                lp_result = lp.lp_baseline_sum(contrib_list,
                                epsilon=epsilon, 
                                gs_q=gs_q,   
                                return_details=True)
                #for nt baseline
                nt_result = nt.NT_sum(contrib_list, 
                              epsilon, 
                              gs_q)
                
            elif agg_type == 'count':
                #for r2t
                r2t_result = R2T.r2t_count(contrib_list, 
                             epsilon=epsilon, 
                             gs_q=gs_q, beta=beta)
                #for lp baseline
                lp_result = lp.lp_baseline_count(contrib_list,
                                epsilon=epsilon,  
                                gs_q=gs_q,  
                                return_details=True)
                #for nt baseline
                nt_result = nt.NT_sum(contrib_list, 
                              epsilon, 
                              gs_q)

            r2t_li.append(r2t_result)
            err_r2t = error(true,r2t_result['noise_answer'])
            error_r2t.append(err_r2t)
            
            lp_li.append(lp_result)
            err_lp = error(true,lp_result['noise_answer'])
            error_lp.append(err_lp)
            
            nt_li.append(nt_result)
            err_nt = error(true,nt_result['noise_answer']) 
            error_nt.append(err_nt)
            
        #filter error
        avg_r2t = filterError(error_r2t,n)
        avg_lp = filterError(error_lp,n)
        avg_nt = filterError(error_nt,n)
        
        noi_r2t_dic[epsilon] = r2t_li
        noi_lp_dic[epsilon] = lp_li
        noi_nt_dic[epsilon] = nt_li

        err_r2t_dic[epsilon] = avg_r2t
        err_lp_dic[epsilon] = avg_lp
        err_nt_dic[epsilon] = avg_nt

        print(f'Done for Epsilon {epsilon}')
    return noi_r2t_dic,noi_lp_dic,noi_nt_dic,err_r2t_dic,err_lp_dic,err_nt_dic
            

In [None]:
#connect db
db = psycopg2.connect(
    host="localhost",
    port=5432,
    user="postgres",
    #password="ipProject",
    database="tpch_db"
)
tpch_cursor = db.cursor()

In [None]:
#define hyperparameters
gs_q = 10**6
beta = 0.1
n = 20
#define the epsilon list for SUM and COUNT operation queries
epsilons = [0.1,0.2,0.4,0.8,1.6,3.2,6.4,12.8,25.6,51.2]
rounds=100

In [None]:
#here try queries Q3,Q5,Q6,Q7,Q8,Q9,Q10,Q12,Q14,Q20 including count, sum
sql_li = [
    #Q3 (sum operation)
    (
        '''
        SELECT 
            o.o_orderkey AS entity_id,
            SUM(l.l_extendedprice * (1 - l.l_discount)) AS contrib
        FROM customer c
        JOIN orders o   ON c.c_custkey = o.o_custkey
        JOIN lineitem l ON o.o_orderkey = l.l_orderkey
        WHERE c.c_mktsegment = 'BUILDING'
          AND o.o_orderdate < DATE '1995-03-15'
          AND l.l_shipdate  > DATE '1995-03-15'
        GROUP BY o.o_orderkey
        ORDER BY o.o_orderkey;
        ''',
        'sum'
    ),

    #Q5 (sum operation)
    (
        '''
        SELECT 
            n.n_name AS entity_id,
            SUM(l.l_extendedprice * (1 - l.l_discount)) AS contrib
        FROM customer c
        JOIN orders o      ON c.c_custkey = o.o_custkey
        JOIN lineitem l    ON l.l_orderkey = o.o_orderkey
        JOIN supplier s    ON l.l_suppkey = s.s_suppkey
        JOIN nation n      ON s.s_nationkey = n.n_nationkey
        JOIN region r      ON r.r_regionkey = n.n_regionkey
        WHERE r.r_name = 'ASIA'
          AND o.o_orderdate >= DATE '1995-03-15'
          AND o.o_orderdate <  DATE '1996-03-15'
        GROUP BY 
            n.n_name
        ORDER BY 
            contrib DESC;
        ''',
        'sum'
    ),

    #Q6 (sum operation)
    (
        '''
        SELECT 
            l.l_orderkey AS entity_id,
            SUM(l.l_extendedprice * l.l_discount) AS contrib
        FROM 
            lineitem l
        WHERE
            l.l_shipdate >= DATE '1994-01-01'
            AND l.l_shipdate <  DATE '1995-01-01'
            AND l.l_discount BETWEEN 0.05 - 0.01 AND 0.05 + 0.01
            AND l.l_quantity < 25
        GROUP BY 
            l.l_orderkey
        ORDER BY 
            l.l_orderkey; 
        ''',
        'sum'
    ),

    #Q7 (sum operation)
    (
        '''
        SELECT 
            l.l_orderkey AS entity_id,
            SUM(l.l_extendedprice * (1 - l.l_discount)) AS contrib
        FROM 
            supplier s
        JOIN lineitem l    ON s.s_suppkey = l.l_suppkey
        JOIN orders o      ON o.o_orderkey = l.l_orderkey
        JOIN customer c    ON c.c_custkey = o.o_custkey
        JOIN nation n1     ON s.s_nationkey = n1.n_nationkey
        JOIN nation n2     ON c.c_nationkey = n2.n_nationkey
        WHERE 
            (
                (n1.n_name = 'FRANCE' AND n2.n_name = 'GERMANY')
                OR 
                (n1.n_name = 'GERMANY' AND n2.n_name = 'FRANCE')
            )
            AND l.l_shipdate BETWEEN DATE '1995-01-01' AND DATE '1996-12-31'
        GROUP BY 
            l.l_orderkey
        ORDER BY 
            l.l_orderkey;
        ''',
        'sum'
    ),

    #Q8 (sum operation)
    (
        '''
        SELECT 
            EXTRACT(YEAR FROM o.o_orderdate) AS entity_id,
            SUM(l.l_extendedprice * (1 - l.l_discount)) AS contrib
        FROM part p
        JOIN lineitem l  ON p.p_partkey = l.l_partkey
        JOIN supplier s  ON s.s_suppkey = l.l_suppkey
        JOIN orders o    ON o.o_orderkey = l.l_orderkey
        JOIN customer c  ON c.c_custkey = o.o_custkey
        JOIN nation n1   ON c.c_nationkey = n1.n_nationkey
        JOIN region r    ON n1.n_regionkey = r.r_regionkey
        JOIN nation n2   ON s.s_nationkey = n2.n_nationkey
        WHERE r.r_name = 'AMERICA'
          AND o.o_orderdate BETWEEN DATE '1995-01-01' AND DATE '1996-12-31'
          AND p.p_type = 'ECONOMY ANODIZED STEEL'
        GROUP BY entity_id
        ORDER BY entity_id;
        ''',
        'sum'
    ),

    #Q9 (sum operation)
    (
        '''
        SELECT
            SUM(
                l.l_extendedprice * (1 - l.l_discount) 
                - ps.ps_supplycost * l.l_quantity
            ) AS contrib
        FROM part p
        JOIN lineitem l  ON p.p_partkey = l.l_partkey
        JOIN supplier s  ON s.s_suppkey = l.l_suppkey
        JOIN partsupp ps ON ps.ps_suppkey = s.s_suppkey
                        AND ps.ps_partkey = p.p_partkey
        JOIN orders o    ON o.o_orderkey = l.l_orderkey
        JOIN nation n    ON s.s_nationkey = n.n_nationkey
        GROUP BY 
            n.n_name, EXTRACT(YEAR FROM o.o_orderdate)
        ORDER BY 
            n.n_name,
            EXTRACT(YEAR FROM o.o_orderdate) DESC;
        ''',
        'sum'
    ),

    #Q10 (sum operation)
    (
        '''
        SELECT 
            c.c_custkey AS entity_id,
            SUM(l.l_extendedprice * (1 - l.l_discount)) AS contrib
        FROM customer c
        JOIN orders o   ON c.c_custkey = o.o_custkey
        JOIN lineitem l ON o.o_orderkey = l.l_orderkey
        JOIN nation n   ON c.c_nationkey = n.n_nationkey
        WHERE o.o_orderdate >= DATE '1994-01-01'
          AND o.o_orderdate <  DATE '1994-04-01'
          AND l.l_returnflag = 'R'
        GROUP BY c.c_custkey
        ORDER BY c.c_custkey;
        ''',
        'sum'
    ),

    #Q11 (sum operation)
    (
        '''
        SELECT 
            c.c_custkey AS entity_id,
            SUM(l.l_extendedprice * (1 - l.l_discount)) AS contrib
        FROM customer c
        JOIN orders o   ON c.c_custkey = o.o_custkey
        JOIN lineitem l ON o.o_orderkey = l.l_orderkey
        JOIN nation n   ON c.c_nationkey = n.n_nationkey
        WHERE o.o_orderdate >= DATE '1994-01-01'
          AND o.o_orderdate <  DATE '1994-04-01'
          AND l.l_returnflag = 'R'
        GROUP BY c.c_custkey
        ORDER BY c.c_custkey;
        ''',
        'sum'
    ),

    #Q12 (sum operation)
    (
        '''
        SELECT
            o.o_orderkey AS entity_id,
            SUM(
                CASE 
                    WHEN o.o_orderpriority IN ('1-URGENT', '2-HIGH')
                    THEN 1 ELSE 0 
                END
            ) AS contrib
        FROM orders o
        JOIN lineitem l ON o.o_orderkey = l.l_orderkey
        WHERE l.l_shipmode IN ('MAIL', 'SHIP')
          AND l.l_commitdate < l.l_receiptdate
          AND l.l_shipdate   < l.l_commitdate
          AND l.l_receiptdate >= DATE '1994-01-01'
          AND l.l_receiptdate <  DATE '1995-01-01'
        GROUP BY o.o_orderkey
        ORDER BY o.o_orderkey;
        ''',
        'sum'
    ),

    #Q14 (sum operation)
    (
        '''
        SELECT
            p.p_partkey AS entity_id,
            SUM(l.l_extendedprice * (1 - l.l_discount)) AS contrib
        FROM lineitem l
        JOIN part p ON l.l_partkey = p.p_partkey
        WHERE p.p_type LIKE 'PROMO%'
          AND l.l_shipdate >= DATE '1995-09-01'
          AND l.l_shipdate <  DATE '1995-10-01'
        GROUP BY p.p_partkey
        ORDER BY p.p_partkey;
        ''',
        'sum'
    ),

    #Q20 (sum operation)
    (
        '''
        SELECT
            ps.ps_suppkey AS entity_id,
            1 AS contrib
        FROM partsupp ps
        JOIN part p ON ps.ps_partkey = p.p_partkey
        WHERE ps.ps_availqty > (
                SELECT 0.5 * SUM(l.l_quantity)
                FROM lineitem l
                WHERE l.l_partkey = ps.ps_partkey
                  AND l.l_suppkey = ps.ps_suppkey
                  AND l.l_shipdate >= DATE '1994-01-01'
                  AND l.l_shipdate <  DATE '1995-01-01'
            )
        ORDER BY ps.ps_suppkey;
        ''',
        'sum'
    ),

    #Q16 (count operation)
    (
        '''
        SELECT
            CONCAT(p.p_brand, ' / ', p.p_type, ' / ', p.p_size) AS entity_id,
            COUNT(DISTINCT ps.ps_suppkey) AS contrib
        FROM part p
        JOIN partsupp ps ON p.p_partkey = ps.ps_partkey
        WHERE p.p_brand <> 'Brand#45'
          AND p.p_type NOT LIKE 'ECONOMY%'
          AND p.p_size IN (49, 14, 23, 45, 19, 3, 36, 9)
          AND ps.ps_suppkey NOT IN (
              SELECT s.s_suppkey
              FROM supplier s
              WHERE s.s_comment LIKE '%Customer%Complaints%'
          )
        GROUP BY p.p_brand, p.p_type, p.p_size
        ORDER BY contrib DESC, p.p_brand, p.p_type, p.p_size;
        ''',
        'count'
    ),

    #Q13 (count operation)
    (
        '''
        SELECT
            c.c_custkey AS entity_id,
            COUNT(o.o_orderkey) AS contrib
        FROM customer c
        LEFT JOIN orders o
            ON c.c_custkey = o.o_custkey
           AND o.o_comment NOT LIKE '%special%packages%'
        GROUP BY c.c_custkey
        ORDER BY c.c_custkey;
        ''',
        'count'
    ),

    #Q1 (count operation)
    (
        '''
        SELECT
            l.l_returnflag || '-' || l.l_linestatus AS entity_id,
            COUNT(*) AS contrib
        FROM lineitem l
        WHERE l.l_shipdate <= DATE '1998-12-01' - INTERVAL '90' DAY
        GROUP BY l.l_returnflag, l.l_linestatus
        ORDER BY entity_id;
        ''',
        'count'
    ),

    #Q4 (count operation)
    (
        '''
        SELECT
            o.o_orderpriority AS entity_id,
            COUNT(*) AS contrib
        FROM orders o
        WHERE o.o_orderdate >= DATE '1995-01-01'
          AND o.o_orderdate <  DATE '1995-04-01'
          AND EXISTS (
              SELECT 1
              FROM lineitem l
              WHERE l.l_orderkey = o.o_orderkey
                AND l.l_commitdate < l.l_receiptdate
          )
        GROUP BY o.o_orderpriority
        ORDER BY o.o_orderpriority;
        ''',
        'count'
    )
    
]

In [None]:
index = 11
query_num = [3,5,6,7,8,9,10,11,12,14,20,16,13,1,4]
for sql in sql_li[11:]:
    sql_query = sql[0]
    df = pd.read_sql(sql_query,db)
    contrib_list = df['contrib'].tolist()
    true = float(np.sum(contrib_list))
    
    noi_r2t_dic,noi_lp_dic,noi_nt_dic,err_r2t_dic,err_lp_dic,err_nt_dic = test(epsilons,rounds,sql[1],true)
    print(f'Done for {index+1} ==========================')

    err_r2t = list(err_r2t_dic.values())
    err_lp = list(err_lp_dic.values())
    err_nt = list(err_nt_dic.values())
    #print(err_nt)
    
    plt.figure(figsize=(10,5))

    plt.plot(epsilons,err_r2t,marker='o',linewidth=2,markersize=7,label='R2T')
    plt.plot(epsilons,err_lp,marker='s',linewidth=2,markersize=7,label='LP Baseline')
    plt.plot(epsilons,err_nt,marker='^',linewidth=2,markersize=7,label='NT Baseline')

    plt.xscale('log', base=2)   
    plt.xlabel('ε (privacy budget)', fontsize=12)
    plt.ylabel('Relative Error', fontsize=12)
    plt.title(f'{sql[1]} Aggregation – Error vs ε (R2T vs Baselines), Query {query_num[index]}', fontsize=14)
    
    plt.grid(True, linestyle='--', alpha=0.4)
    plt.xticks(epsilons, epsilons,rotation=45) 
    plt.legend(fontsize=12)
    save_path = f'{sql[1]} Aggregation – Error vs ε (R2T vs Baselines), Query {query_num[index]}2'
    plt.savefig(save_path, dpi=300, bbox_inches='tight') 
    plt.show()
    index += 1

In [None]:
#here try average query Q1 
#average operation should be counted by sum/count
sql_sum_query = '''
SELECT 
    CONCAT(l_returnflag, '_', l_linestatus) AS entity_id,
    SUM(l_quantity) AS contrib
FROM lineitem
WHERE l_shipdate <= DATE '1998-12-01' - INTERVAL '90' DAY
GROUP BY l_returnflag, l_linestatus
ORDER BY entity_id;
'''

sql_count_query = '''
SELECT
    CONCAT(l_returnflag, '_', l_linestatus) AS entity_id,
    COUNT(*) AS contrib
FROM
    lineitem
WHERE
    l_shipdate <= DATE '1998-12-01' - INTERVAL '90' DAY
GROUP BY
    l_returnflag,
    l_linestatus
ORDER BY
    entity_id;
'''

df_sum = pd.read_sql(sql_sum_query,db)
contrib_list_sum = df_sum['contrib'].tolist()
true1 = float(np.sum(contrib_list_sum))

df_count = pd.read_sql(sql_count_query,db)
contrib_list_count = df_count['contrib'].tolist()
true2 = float(np.sum(contrib_list_count))

true_avg = true1 / true2
#define epsilon list for average operation query
epsilons2 = [25.6,51.2,102.4,204.8,409.6,819.2,1638.4,3276.8]

epsilon_avg = []

noi_r2t_dic = {}#store avg raw output for each epsilon from r2t
noi_lp_dic = {}#store avg raw output for each epsilon from lp baseline
noi_nt_dic = {}#store avg raw output for each epsilon from nt baseline

err_r2t_dic = {}#store aggregated error for each epsilon from r2t
err_lp_dic = {}#store aggregated error for each epsilon from lp baseline
err_nt_dic = {}#store aggregated error for each epsilon from nt baseline

for epsilon in epsilons2:
    #store raw outputs for each method of the current epsilon
    r2t_li = []
    lp_li = []
    nt_li = []
    #store relative errors for each method of the current epsilon
    error_r2t = []
    error_lp = []
    error_nt = []
    for i in range(rounds):
        #for r2t
        r2t_sum = R2T.r2t_sum(contrib_list_sum, 
                                 epsilon=epsilon, 
                                 gs_q=gs_q, beta=beta)
        r2t_count = R2T.r2t_count(contrib_list_count, 
                                 epsilon=epsilon, 
                                 gs_q=gs_q, beta=beta)

        #for lp
        lp_sum = lp.lp_baseline_sum(contrib_list_sum,
                                    epsilon=epsilon, 
                                    gs_q=gs_q,  
                                    return_details=True)
        lp_count = lp.lp_baseline_count(contrib_list_count,
                                    epsilon=epsilon,
                                    gs_q=gs_q,  
                                    return_details=True)
    
        #for nt
        nt_sum = nt.NT_sum(contrib_list_sum, 
                                  epsilon, 
                                  gs_q)
        nt_count = nt.NT_sum(contrib_list_count, 
                                  epsilon, 
                                  gs_q)
        
        #compute raw output for average
        r2t_avg = float(r2t_sum['noise_answer'] / r2t_count['noise_answer'])
        lp_avg = float(lp_sum['noise_answer'] / lp_count['noise_answer'])
        nt_avg = float(nt_sum['noise_answer'] / nt_count['noise_answer'])
        
        r2t_li.append(r2t_avg)
        err_r2t = error(true_avg,r2t_avg)
        error_r2t.append(err_r2t)
                
        lp_li.append(lp_avg)
        err_lp = error(true_avg,lp_avg)
        error_lp.append(err_lp)
                
        nt_li.append(nt_avg)
        err_nt = error(true_avg,nt_avg) 
        error_nt.append(err_nt)
        #print(error_r2t)     
        
    if r2t_count['noise_answer'] == 0:
        continue
    #filter error
    avg_r2t = filterError(error_r2t,n)
    avg_lp = filterError(error_lp,n)
    avg_nt = filterError(error_nt,n)
            
    noi_r2t_dic[epsilon] = r2t_li
    noi_lp_dic[epsilon] = lp_li
    noi_nt_dic[epsilon] = nt_li
    
    err_r2t_dic[epsilon] = avg_r2t
    err_lp_dic[epsilon] = avg_lp
    err_nt_dic[epsilon] = avg_nt
    
    epsilon_avg.append(epsilon)
    print(f'Done for {epsilon}')

In [None]:
err_r2t = list(err_r2t_dic.values())
err_lp = list(err_lp_dic.values())
err_nt = list(err_nt_dic.values())

In [None]:
#plotting
plt.figure(figsize=(10,5))

plt.plot(epsilons2,err_r2t,marker='o',linewidth=2,markersize=7,label='R2T')
plt.plot(epsilons2,err_lp,marker='s',linewidth=2,markersize=7,label='LP Baseline')
plt.plot(epsilons2,err_nt,marker='^',linewidth=2,markersize=7,label='NT Baseline')

plt.xscale('log', base=2)   
plt.xlabel('ε (privacy budget)', fontsize=12)
plt.ylabel('Relative Error', fontsize=12)
plt.title(f'avg Aggregation – Error vs ε (R2T vs Baselines), Query 1', fontsize=14)
    
plt.grid(True, linestyle='--', alpha=0.4)
plt.xticks(epsilons2, epsilons2,rotation=45)  
plt.legend(fontsize=12)

save_path = f'avg Aggregation – Error vs ε (R2T vs Baselines), Query 1'
plt.savefig(save_path, dpi=300, bbox_inches='tight') 
plt.show()

In [None]:
#try MIN operation query Q2
#define epsilon list for min operation query
epsilons3 = [0.1,0.2,0.4,0.8,1.6,3.2,6.4,12.8,25.6,51.2,102.4,204.8,409.6,819.2,1638.4,3276.8]
sql_min_query = '''
SELECT
    MIN(ps.ps_supplycost) as contrib
FROM
    partsupp ps
    JOIN supplier s ON ps.ps_suppkey = s.s_suppkey
    JOIN nation n ON s.s_nationkey = n.n_nationkey
    JOIN region r ON n.n_regionkey = r.r_regionkey
    JOIN part p ON ps.ps_partkey = p.p_partkey  
WHERE
    r.r_name = 'ASIA'
GROUP BY
    s.s_suppkey
ORDER BY
    s.s_suppkey;
'''
df_min = pd.read_sql(sql_min_query,db)
#df_min.head(3)
contrib_min = df_min['contrib'].tolist()
true_min = min(contrib_min)

noi_r2t_dic = {}
err_r2t_dic = {}

for epsilon in epsilons3:
    r2t_li = []

    error_r2t = []
    while 1:
        r2t_min = R2T.r2t_min(contrib_min, 
                                 epsilon=epsilon, 
                                 gs_q=20, beta=beta)

        if r2t_min['noise_answer'] != true_min:
            r2t_li.append(r2t_min)
            err_r2t = error(true_min,r2t_min['noise_answer'])
            error_r2t.append(err_r2t)
            
        if len(r2t_li) == 100:
            break
    #filter error
    avg_r2t = filterError(error_r2t,n)
    noi_r2t_dic[epsilon] = r2t_li

    err_r2t_dic[epsilon] = avg_r2t
    
    print(f'Done for {epsilon}')


In [None]:
err_r2t = list(err_r2t_dic.values())
plt.figure(figsize=(10, 5))
plt.plot(epsilons3, list(err_r2t_dic.values()), marker='o')

plt.xscale('log', base=2)
plt.xlabel('ε (privacy budget)')
plt.ylabel('Relative Error')
plt.title('MIN Aggregation – R2T Error vs ε Query 2')
plt.grid(True, linestyle='--', alpha=0.4)

save_path = f'min Aggregation – Error vs ε (R2T vs Baselines), Query 2'
plt.savefig(save_path, dpi=300, bbox_inches='tight') 
plt.show()

In [None]:
#try MAX operation query Q5
#Q15 MAX
'''
SELECT
    region AS entity_id,
    MAX(total_revenue) AS contrib
FROM
    revenue
GROUP BY
    region
ORDER BY
    region;
'''

In [None]:
#define epsilon list for max operation query
epsilons3 = [0.1,0.2,0.4,0.8,1.6,3.2,6.4,12.8,25.6,51.2,102.4,204.8,409.6,819.2,1638.4,3276.8]
sql_max_query = """
SELECT
    s.s_suppkey AS entity_id,
    MAX(ps.ps_supplycost) AS contrib
FROM
    partsupp ps
JOIN supplier s ON ps.ps_suppkey = s.s_suppkey
GROUP BY
    s.s_suppkey
ORDER BY
    s.s_suppkey;
"""
df_max = pd.read_sql(sql_max_query,db)
df_max.head(3)
contrib_max = df_max['contrib'].tolist()
true_max = max(contrib_max)

noi_r2t_dic = {}
err_r2t_dic = {}

for epsilon in epsilons3:
    r2t_li = []

    error_r2t = []
    while 1:
        r2t_max = R2T.r2t_max(contrib_max, 
                                 epsilon=epsilon, 
                                 gs_q=20, beta=beta)

        if r2t_max['noise_answer'] != true_max:
            r2t_li.append(r2t_max)
            err_r2t = error(true_max,r2t_max['noise_answer'])
            #print(true_min,r2t_min['noise_answer'],err_r2t)
            error_r2t.append(err_r2t)
            
        if len(r2t_li) == 100:
            break
        #print(error_r2t)     
    #filter error
    avg_r2t = filterError(error_r2t,n)
    #print(avg_r2t)
    noi_r2t_dic[epsilon] = r2t_li

    err_r2t_dic[epsilon] = avg_r2t
    
    print(f'Done for {epsilon}')


In [None]:
err_r2t = list(err_r2t_dic.values())
plt.figure(figsize=(10, 5))
plt.plot(epsilons3, list(err_r2t_dic.values()), marker='o')

plt.xscale('log', base=2)
plt.xlabel('ε (privacy budget)')
plt.ylabel('Relative Error')
plt.title('MAX Aggregation – R2T Error vs ε Query 15')
plt.grid(True, linestyle='--', alpha=0.4)

save_path = f'max Aggregation – Error vs ε (R2T vs Baselines), Query 15'
plt.savefig(save_path, dpi=300, bbox_inches='tight') 
plt.show()