In [1]:
from net18.scenarios2 import get_data_by_scenario_and_case
import numpy as np

In [2]:
def mse(array1, array2):
    """Calculates the Mean Squared Error between two arrays."""
    return np.mean((array1 - array2) ** 2)

def filter_similar_entries(data, target_array, threshold, metric='mse'):
    """
    Filters out entries from `data` that are too similar to `target_array`.

    Parameters:
        data (np.ndarray): The array to filter from.
        target_array (np.ndarray): The array to compare against.
        threshold (float): The similarity threshold. Entries with similarity below this are filtered out.
        metric (str): The similarity metric to use ('mse', 'euclidean', or 'cosine').

    Returns:
        np.ndarray: A filtered array with dissimilar entries.
    """
    if metric == 'mse':
        dist_func = mse
    elif metric == 'euclidean':
        from scipy.spatial.distance import euclidean
        dist_func = euclidean
    elif metric == 'cosine':
        from scipy.spatial.distance import cosine
        dist_func = cosine
    else:
        raise ValueError("Unsupported metric. Use 'mse', 'euclidean', or 'cosine'.")

    filtered_data_x = []
    filtered_data_y = []
    for i, entry in enumerate(data):
        if dist_func(entry, target_array) >= threshold:
            filtered_data_x.append(entry)
            filtered_data_y.append(alt_y[i])

    return np.array(filtered_data_x), np.array(filtered_data_y)

In [3]:
s1_c1_data = get_data_by_scenario_and_case(1, 1, net_name='net18v1')

In [4]:
x = s1_c1_data[0]
x_hat = s1_c1_data[1]
y = s1_c1_data[2]
y_hat = s1_c1_data[3]

x

array([[ 0.01      ,  0.03      , -0.        ,  0.04      , -0.        ,
        -0.2       , -0.        ,  0.08      ,  0.08      , -0.        ,
         0.04      , -0.        , -0.5       , -0.        , -0.        ,
         0.01      ,  0.05      ,  0.01      ,  0.01      , -0.        ,
         0.02      , -0.        , -0.06      , -0.        ,  0.04      ,
         0.04      , -0.        ,  0.02      , -0.        , -0.2       ,
        -0.        , -0.        ,  0.01      ,  0.03      ,  1.01      ,
         1.02371975,  1.02655259,  1.02871535,  1.0234696 , -0.34755082,
        -0.44986314, -0.29107685,  0.04000132, -0.49179786,  0.06002274,
         0.01000065, -0.0631883 , -0.13655364, -0.09723596,  0.02000057,
        -0.19758448,  0.04000644,  0.01000017]])

In [5]:
alt_x = np.load('../net18/simulations_net18/net_18_v1/measured_data_x_alt.npy')
alt_y = np.load('../net18/simulations_net18/net_18_v1/data_y_alt.npy')

In [6]:
alt_x.shape

(30001, 53)

In [7]:
x.shape

(1, 53)

In [13]:
# Define the threshold and metric
threshold = 0.8  # Adjust as needed
metric = 'mse'  # Use MSE as the similarity metric

'''
threshold = 6.5  # Adjust as needed
metric = 'euclidean'  # Use MSE as the similarity metric
'''

# Filter the data
filtered_measured_alt_x, filtered_alt_y = filter_similar_entries(alt_x, x.ravel(), threshold, metric)

# Save the filtered data back to a file (optional)
np.save('../net18/simulations_net18/net_18_v1_filtered/filtered_measured_alt_x.npy', filtered_measured_alt_x)
np.save('../net18/simulations_net18/net_18_v1_filtered/filtered_alt_y.npy', filtered_alt_y)


print(f"Original data size: {alt_x.shape}")
print(f"Filtered data size: {filtered_measured_alt_x.shape}")
print(f"Filtered data size: {filtered_alt_y.shape}")


Original data size: (30001, 53)
Filtered data size: (9513, 53)
Filtered data size: (9513, 18)
