In [1]:
import numpy as np
import pandas as pd
from MetricMethod import *
from MetricMethod.Metrics import EuclideanMetric
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


In [2]:
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,
                                                       test_size=0.2, random_state=42)
y_train

array([0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0, 0, 1, 2, 2, 1, 2, 1, 2,
       1, 0, 2, 1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 1, 2, 0, 1, 2, 0, 2, 2,
       1, 1, 2, 1, 0, 1, 2, 0, 0, 1, 1, 0, 2, 0, 0, 1, 1, 2, 1, 2, 2, 1,
       0, 0, 2, 2, 0, 0, 0, 1, 2, 0, 2, 2, 0, 1, 1, 2, 1, 2, 0, 2, 1, 2,
       1, 1, 1, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 2, 0, 1, 2, 2, 1, 2,
       1, 1, 2, 2, 0, 1, 2, 0, 1, 2])

In [3]:
knn = KNN(metric="cosine")
knn.fit(X_train, y_train)
predict = knn.predict(X_test)
print(accuracy_score(y_test, predict))

1.0


In [4]:
parzen_window_fixed = ParzenWindowFixedWidth(width=1, method="kdtree")
parzen_window_fixed.fit(X_train, y_train)
predict = parzen_window_fixed.predict(X_test)
print(accuracy_score(y_test, predict))
print(predict)

1.0
[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]


In [5]:
parzen_window_variable = ParzenWindowVariableWidth(countNeighbor=5, method="kdtree")
parzen_window_variable.fit(X_train, y_train)
predict = parzen_window_variable.predict(X_test)
print(accuracy_score(y_test, predict))
print(predict)

1.0
[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]


In [9]:
import numpy as np

# Create a list of vectors
vectors = np.array([[1, 2, 3],
                    [4, 5, 6],
                    [7, 8, 9]])

# Calculate the distance matrix
distances = np.linalg.norm(vectors[:, np.newaxis, :] - vectors[np.newaxis, :, :], axis=-1)

# Define the distance range
min_distance = 3
max_distance = 7

# Create an empty dictionary to store the results
result_dict = {}

# Loop through each vector and check if its distance falls within the distance range
for i in range(len(vectors)):
    vector_distance = distances[i]
    # Get the indexes of vectors that fall within the distance range
    included_indexes = np.where((vector_distance >= min_distance) & (vector_distance <= max_distance))[0]
    # Add the included indexes to the dictionary with the vector index as the key
    result_dict[i] = included_indexes

print("Dictionary with indexes of included vectors in distance range:")
distances = np.array(distances)
print(distances)
print(result_dict)

Dictionary with indexes of included vectors in distance range:
[[ 0.          5.19615242 10.39230485]
 [ 5.19615242  0.          5.19615242]
 [10.39230485  5.19615242  0.        ]]
{0: array([1], dtype=int64), 1: array([0, 2], dtype=int64), 2: array([1], dtype=int64)}


In [5]:
import numpy as np
import pandas as pd
from abc import ABC, abstractmethod

class IMetric(ABC):
    __instance = None

    def __new__(cls, *args, **kwargs):
        if cls.__instance is None:
            cls.__instance = super().__new__(cls)

        return cls.__instance
    
    def __del__(self):
        IMetric.__instance = None
    
    @abstractmethod
    def get_distances(self, data : pd.DataFrame, point : pd.Series) -> np.ndarray:
        """_summary_

        Args:
            data (pd.DataFrame): matrix of vectors, with each of which the distance to point will be calculated
            point (pd.Series): vector from which distances will be calculated

        Returns:
            np.ndarray: Vector from distances
        """
        pass

    @abstractmethod
    def get_distance(self, data : pd.Series, point : pd.Series) -> float:
        """
        Args:
            data (pd.Series): vector1
            point (pd.Series): vector2

        Returns:
            float: distance between data and point
        """        
        pass

class EuclideanMetric(IMetric):

    def get_distances(self, data : pd.DataFrame, point : pd.Series) -> np.ndarray:
        return np.linalg.norm(data - point, axis=1)
    
    def get_distance(self, data : pd.Series, point : pd.Series) -> float:
        return np.linalg.norm(data - point, axis=-1)
    
cl1 = EuclideanMetric()
cl2 = EuclideanMetric()
print(id(cl1))
print(id(cl2))

2464121978384
2464121978384
