# Noise
---

### Example of a function that generates noise based on a numerical distribution given as input

In [8]:
import numpy as np

def generate_noise_forDistribution(attributes):
    min_value = np.min(attributes)
    max_value = np.max(attributes)
    return  np.random.uniform(min_value, max_value, len(attributes))

# Create a NumPy array with 20 elements
attributes = np.random.rand(20)

# Print the array
print(attributes)



noise = generate_noise_forDistribution(attributes)

print(noise)


[0.99720882 0.68530062 0.93838928 0.60623254 0.42013797 0.81812804
 0.49300195 0.96166632 0.42521716 0.36710597 0.83331392 0.2502504
 0.32713284 0.73277016 0.41525158 0.87385638 0.09751032 0.45385017
 0.15903264 0.64882106]
[0.67539727 0.95843317 0.45453216 0.53984071 0.75468449 0.71286782
 0.82795079 0.89536395 0.10956692 0.71381695 0.62346129 0.59327498
 0.29029573 0.38790022 0.7188625  0.83243165 0.56342067 0.792359
 0.38453003 0.41799687]


### Example of a function that generates noise based on a categorical distribution given as input

In [9]:
attributes = np.array(['A121', 'A122', 'A121', 'A123'])


def generate_noise_forCategorical(attributes):
    unique_attributes = np.unique(attributes)
    return np.random.choice(unique_attributes, len(attributes))


random_noise = generate_noise_forCategorical(attributes)


print(f"Mapped vector based on unique attributes: {attributes}")
print(f"Random noise vector: {random_noise}")

Mapped vector based on unique attributes: ['A121' 'A122' 'A121' 'A123']
Random noise vector: ['A121' 'A123' 'A122' 'A123']


# Countermeasures and detection
---

### Example of detecting a change in an input numerical distribution
- The detection takes place through the comparison of the median first quartile and third quartile previously observed in the original dataset taking into account a delta of 10% (which can be changed)

In [39]:
def check_integrity_of_distribution(vector, median, first_quartile, third_quartile):
    median_range = np.median(vector) 
    first_quartile_range = np.percentile(vector, 25) 
    third_quartile_range = np.percentile(vector, 75) 
    median = median * 0.1
    first_quartile = first_quartile * 0.1
    third_quartile = third_quartile * 0.1
    
    # Verifica se la distribuzione è all'interno dei range specificati
    within_range = (
        (median >= median - median_range) and (median <= median + median_range) and
        (first_quartile >= first_quartile - first_quartile_range) and (first_quartile <= first_quartile + first_quartile_range) and
        (third_quartile >= third_quartile - third_quartile_range) and (third_quartile <= third_quartile + third_quartile_range)
    )

    return within_range

# Example
my_vector = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
noise_test = generate_noise_forDistribution(my_vector)
my_median = np.median(my_vector)
my_first_quartile = np.percentile(my_vector, 25)
my_third_quartile = np.percentile(my_vector, 75)

result = check_integrity_of_distribution(noise_test, my_median, my_first_quartile, my_third_quartile)


if result:
    print("The vector distribution is within a 10% range.")
else:
    print("The vector distribution is not within a 10% range.")


The vector distribution is within a 10% range.


### Example of detecting a change in an input categorical distribution
- The detection occurs through the comparison between the count of each category observed in the original dataset taking into account a delta of 10% (which can be changed)

In [40]:
def check_category_counts_within_range(attributes, values_count):
    unique_values, counts = np.unique(attributes, return_counts=True)
    my_value_counts = dict(zip(unique_values, counts))

    for key in set(my_value_counts.keys()) & set(values_count.keys()):
        lower_bound = values_count[key] - (values_count[key] * 0.1)
        upper_bound = values_count[key] + (values_count[key] * 0.1)

        if not (lower_bound <= my_value_counts[key] <= upper_bound):
            return False

    return True


# Example
my_vector = np.array(['A', 'B', 'A', 'C', 'B', 'A', 'A', 'C', 'C', 'B'])
unique_values, counts = np.unique(my_vector, return_counts=True)
my_value_counts = dict(zip(unique_values, counts))
result = check_category_counts_within_range(my_vector,my_value_counts)


if result:
    print("The count of each category is within a 10% range.")
else:
    print("The count of each category is not within a 10% range.")


The count of each category is within a 10% range.
