In [1]:
import pandas as pd
import numpy as np
from scipy import stats

In [2]:
covid = pd.read_csv('covid_pre_proc.csv', index_col='Unnamed: 0')

In [3]:
covid

Unnamed: 0,localCaso,sexoCaso,dataNascimento,idadeCaso,racaCor,resultadoFinalExame
0,SOBRAL,MASCULINO,14/08/2003,17.0,Parda,Negativo
1,PACAJUS,MASCULINO,07/11/1983,37.0,Parda,Negativo
2,FORTALEZA,MASCULINO,12/03/1992,28.0,Parda,Negativo
3,CAUCAIA,MASCULINO,06/03/1970,50.0,Sem Informacao,Negativo
4,FORTALEZA,FEMININO,10/01/1939,81.0,Parda,Negativo
...,...,...,...,...,...,...
825972,FORTALEZA,MASCULINO,08/08/1990,30.0,Parda,Positivo
825973,FORTALEZA,FEMININO,08/12/1982,37.0,Branca,Positivo
825974,FORTALEZA,FEMININO,02/12/1980,40.0,Parda,Negativo
825975,FORTALEZA,FEMININO,17/05/1988,32.0,Branca,Provável


**Q1:** Média da idade dos indivíduos representados no dataset.

In [4]:
def Q1(dataset):
    return dataset['idadeCaso'].mean()

In [5]:
def delta_Q1():
    # Considering that the oldest person is 120 and the youngest 0, the sensibility is 120
    oldest = 120
    youngest = 0
    
    return abs(oldest - youngest)

**Q2:**  Número de exames positivos (atributo *resultadoFinalExame*).

In [6]:
def Q2(dataset):
    return dataset['resultadoFinalExame'].value_counts()['Positivo']

In [7]:
def delta_Q2():    
    # County query, sensibility=1
    return 1

**Q3:** Total de exames realizados por município (atributo *municipioCaso*).

In [8]:
def Q3(dataset):
    counties = np.unique(dataset['localCaso'])
    n_exams_county = {}
    for county in counties:
        n_exams = sum(dataset['localCaso'][dataset['localCaso'] == county].value_counts())
        n_exams_county[county] = n_exams
    
    return n_exams_county

In [9]:
def delta_Q3():
    # County query, sensibility=1
    return 1

In [10]:
type(Q2(covid)) == int

False

# Ruído de Laplace

In [11]:
def laplace_query(output, delta, eps):
    if isinstance(output, dict):
        eps = eps/len(output)
        cities = list(output.keys())
        counts = list(output.values())
        print("DELTA: {}, Epsilon = {}".format(delta, eps))
        output_noise = counts + stats.laplace.rvs(loc=0, scale=delta/eps, size=len(output))
        output_noise = output_noise.astype(int)
        
        return dict(zip(cities, output_noise))
    
    elif isinstance(output, np.int64):
        return int(output + stats.laplace.rvs(loc=0, scale=delta/eps))
    
    print("DELTA: {}, Epsilon = {}".format(delta, eps))
    return output + stats.laplace.rvs(loc=0, scale=delta/eps)

In [12]:
epsilon = [0.1, 0.5, 1, 10]
queries = [Q1, Q2, Q3]

In [13]:
all_results = []
for q in queries:
    outputs = {}
  #  print("Resultado da consulta original: {}".format(q(covid)))
    for eps in epsilon:
        sensibilidade = locals()['delta_{}'.format(q.__name__)]()
        outputs[eps] = laplace_query(q(covid), sensibilidade, eps)
    all_results.append(outputs)

DELTA: 120, Epsilon = 0.1
DELTA: 120, Epsilon = 0.5
DELTA: 120, Epsilon = 1
DELTA: 120, Epsilon = 10
DELTA: 1, Epsilon = 0.0005434782608695652
DELTA: 1, Epsilon = 0.002717391304347826
DELTA: 1, Epsilon = 0.005434782608695652
DELTA: 1, Epsilon = 0.05434782608695652


In [14]:
all_results

[{0.1: 1162.5180814333207,
  0.5: 204.94197252410163,
  1: -4.591811106861918,
  10: 43.6500154142042},
 {0.1: 247394, 0.5: 247397, 1: 247399, 10: 247399},
 {0.1: {'ABAIARA': 4288,
   'ACARAPE': 5494,
   'ACARAU': 7539,
   'ACOPIARA': 2033,
   'AIUABA': -99,
   'ALCANTARAS': 2240,
   'ALTANEIRA': 3090,
   'ALTO SANTO': 424,
   'AMONTADA': 3122,
   'ANTONINA DO NORTE': 116,
   'APUIARES': 2248,
   'AQUIRAZ': 1629,
   'ARACATI': 3168,
   'ARACOIABA': 544,
   'ARARENDA': 134,
   'ARARIPE': -1220,
   'ARATUBA': 631,
   'ARNEIROZ': 2573,
   'ASSARE': 3348,
   'AURORA': 2626,
   'BAIXIO': -1047,
   'BANABUIU': 1124,
   'BARBALHA': 6380,
   'BARREIRA': 460,
   'BARRO': 2042,
   'BARROQUINHA': -397,
   'BATURITÉ': 3858,
   'BEBERIBE': 11827,
   'BELA CRUZ': 6515,
   'BOA VIAGEM': 3797,
   'BREJO SANTO': 13295,
   'CAMOCIM': 8479,
   'CAMPOS SALES': 8230,
   'CANINDE': 4485,
   'CAPISTRANO': 437,
   'CARIDADE': 2201,
   'CARIRE': 1885,
   'CARIRIACU': 1604,
   'CARIUS': 4942,
   'CARNAUBAL': 12