In [1]:
import numpy as np
import pandas as pd

In [2]:
def minkowski_distance(point1, point2, p):
    """
    Calculate the Minkowski distance between two points.

    Parameters:
    point1 (array-like): First point.
    point2 (array-like): Second point.
    p (int or float): The order of the norm.

    Returns:
    float: The Minkowski distance between the two points.
    """
    point1 = np.array(point1)
    point2 = np.array(point2)
    if (point1.shape != point2.shape) or (len(point1.shape) != 1):
        raise ValueError("Points must have the same dimensions and be one-dimensional.")
    if (p <= 0) or (not isinstance(p, (int, float))):
        raise ValueError("Order 'p' must be a positive integer or float.")
    return np.sum(np.abs(point1 - point2) ** p) ** (1 / p)

In [10]:
def distances_from_point(data, point, p):
    """
    Calculate the Minkowski distances from a given point to all points in the dataset.

    Parameters:
    data (array-like or DataFrame): Dataset containing multiple points.
    point (array-like): The reference point.
    p (int or float): The order of the norm.

    Returns:
    np.ndarray: Array of distances from the reference point to each point in the dataset.
    """
    if not isinstance(data, pd.DataFrame):
        data = pd.DataFrame(data)
    point = np.array(point)
    if len(point.shape) != 1 or point.shape[0] != data.shape[1]:
        raise ValueError("Reference point must be one-dimensional and match the number of features in the dataset.")
    
    distances = data.apply(lambda row: minkowski_distance(row, point, p), axis=1)
    return distances

In [28]:
from collections import Counter
def predict_the_label(distances,lables, k):
    """get majority lables

    Args:
        distances (pd.Series): distances from the point to all other points
        lables (pd.Series): labels corresponding to the points
        k (int): number of nearest neighbors to consider

    Returns:
        int: most common label among the k nearest neighbors
    """
    distances.sort_values(inplace=True)
    nearest_labels = lables.loc[distances.index[:k]]
    most_common = Counter(nearest_labels).most_common(1)
    return most_common[0][0]

In [4]:
distances_from_point

<function __main__.distances_from_point(data, point, p)>

In [5]:
a =np.array([[0, 1, 2],[3, 4, 5]])

In [6]:
len(a.shape)

2

In [7]:
from sklearn.datasets import load_iris
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

In [8]:
X

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [23]:
X.eq([5.1, 3.5, 1.3, 0.2]).all(axis=1).any()


np.False_

In [None]:
k = distances_from_point(X, [5.1, 3.5, 1.3, 0.2], 2)

In [13]:
k

0      0.000000
1      0.538516
2      0.509902
3      0.648074
4      0.141421
         ...   
145    4.654031
146    4.276681
147    4.459821
148    4.650806
149    4.140048
Length: 150, dtype: float64

In [14]:
k.sort_values(inplace=True)

In [24]:
k

0      0.000000
17     0.100000
4      0.141421
39     0.141421
28     0.141421
         ...   
131    6.014150
105    6.095080
122    6.211280
117    6.240192
118    6.498461
Length: 150, dtype: float64

In [27]:
predict_the_label(k, y, 3)

0