In [None]:
import numpy as np
import pandas as pd
class KNN():

  def __init__(self, k):
    """
    Creates a new KNN object.
    
    Arguments:
      K: the number of nearest neighboors
    """
    self.k = k

  def get_k_nearest_neighboors(self, datapoint):
    """
    Gets the k-nearest neighboors of a given datapoint
    Argunments:
      datapoint: numpy.array, a row vector
    Returns:
      indices: list, indices corresponding with the k datapoints in self.X most 
               similar to datapoint   
    """
  
    distances=[]   #distances between the matrix and the datapoint


    size=len(self.data)
    vector_of_the_matrix=[]


    for i in range(size):
      vector_of_the_matrix=self.data[i]
      np_vector_of_the_matrix=np.array(vector_of_the_matrix)
      two_vectors_difference=self.calculate_distance(datapoint,np_vector_of_the_matrix)
      distances.append(two_vectors_difference)

    distances=np.array(distances)
    indices=distances.argsort()
    k_indices = []
    for i in range(self.k):
        k_indices.append(indices[i])

    return k_indices  
    

  def calculate_distance(self,datapoint1, datapoint2):
    """
    Calculates the euclidean 
    Arguments:
      datapoint1: numpy.array, first datapoint. It's the row vector we want to compare with the others.
      datapoint2: numpy.array, second datapoint
    Returns:
      Distance between the given datapoints
    """
    if isinstance(datapoint1,np.ndarray) and isinstance(datapoint2,np.ndarray):
        array3 = np.subtract(datapoint2,datapoint1)
        return np.linalg.norm(array3)
    else:
      raise ValueError(" Datatype not valid")
  

  def fit(self, main_matrix, y): #main_matrix antes era X
    """
    Train the model, i.e., allocate the dictionary with features by datapoint 
    and their corresponding class

    Arguments:
      main_matrix: numpy.ndarray, matrix used to train the model, where each row represents a datapoint.  
    No returns:
    """
    self.data = main_matrix
    self.classes = y

  def predict(self, X): 
    """
    Predicts the class for each datapoint in the matrix X.
    Arguments:
      X: numpy.ndarray, matrix used to get predictions for each datapoint, where each row represents a datapoint.  
    Returns:
      predictions: numpy.ndarray, class predicted for each datapoint in X
    """
    preds = []
    for datapoint in X:
      indices=self.get_k_nearest_neighboors(datapoint)
      #Obtener los indices de las clases
      classes = np.array([self.classes[idX] for idX in indices]) 
      #Obtener la clase mas frecuente de los vecinos mas cercanos
      counts = np.bincount(classes)
      predicted_class=np.argmax(counts)
      preds.append(predicted_class)  
    return np.array(preds)

In [None]:
type(np.ndarray) 

type

In [None]:
from sklearn.datasets import load_iris
iris=load_iris()
X=iris.data
y=iris.target
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3)


In [None]:
object_knn= KNN(5)
object_knn.fit(X_train,y_train)
prediction=object_knn.predict(X_test)
print(prediction)

[1 0 0 1 1 2 1 0 1 0 2 1 1 0 1 2 0 0 0 0 2 0 0 1 2 1 2 2 0 1 0 1 0 2 1 0 0
 1 1 2 1 0 1 0 2]


In [None]:
np.sum(prediction == y_test) / len(prediction)

0.9777777777777777

In [None]:
matrix = {
  1,
  2,
  3,
  4,
  2
}
dict.get("brand")

'Ford'

In [None]:
from sklearn.datasets import load_iris
data,classes = load_iris(return_X_y=True)
print(data)
print(classes)

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 1.4 0.2]
 [4.9 3.1 1.5 0.1]
 [5.4 3.7 1.5 0.2]
 [4.8 3.4 1.6 0.2]
 [4.8 3.  1.4 0.1]
 [4.3 3.  1.1 0.1]
 [5.8 4.  1.2 0.2]
 [5.7 4.4 1.5 0.4]
 [5.4 3.9 1.3 0.4]
 [5.1 3.5 1.4 0.3]
 [5.7 3.8 1.7 0.3]
 [5.1 3.8 1.5 0.3]
 [5.4 3.4 1.7 0.2]
 [5.1 3.7 1.5 0.4]
 [4.6 3.6 1.  0.2]
 [5.1 3.3 1.7 0.5]
 [4.8 3.4 1.9 0.2]
 [5.  3.  1.6 0.2]
 [5.  3.4 1.6 0.4]
 [5.2 3.5 1.5 0.2]
 [5.2 3.4 1.4 0.2]
 [4.7 3.2 1.6 0.2]
 [4.8 3.1 1.6 0.2]
 [5.4 3.4 1.5 0.4]
 [5.2 4.1 1.5 0.1]
 [5.5 4.2 1.4 0.2]
 [4.9 3.1 1.5 0.2]
 [5.  3.2 1.2 0.2]
 [5.5 3.5 1.3 0.2]
 [4.9 3.6 1.4 0.1]
 [4.4 3.  1.3 0.2]
 [5.1 3.4 1.5 0.2]
 [5.  3.5 1.3 0.3]
 [4.5 2.3 1.3 0.3]
 [4.4 3.2 1.3 0.2]
 [5.  3.5 1.6 0.6]
 [5.1 3.8 1.9 0.4]
 [4.8 3.  1.4 0.3]
 [5.1 3.8 1.6 0.2]
 [4.6 3.2 1.4 0.2]
 [5.3 3.7 1.5 0.2]
 [5.  3.3 1.4 0.2]
 [7.  3.2 4.7 1.4]
 [6.4 3.2 4.5 1.5]
 [6.9 3.1 4.

In [None]:
l=np.array([1,5,0,12,52,1,4])
indices=l.argsort()
kIndices = indices[:5]
print(kIndices)


[2 0 5 6 1]


In [None]:
classes[kIndices]

array([0, 0, 0, 0, 0])

In [None]:
a = classes[kIndices]
counts = np.bincount(a)
print(np.argmax(counts))

0


In [None]:
c=[]
c.append(12)
c

[12]

In [None]:
m = [[9, 8, 7],
     [5, 6, 4],
     [2, 5, 6]]
[a[1] for a in m] #Sacar datos en columnas

[8, 6, 5]

In [None]:
m = [[9, 8, 7],
     [5, 6, 4],
     [2, 5, 6]]
m[:]

[[9, 8, 7], [5, 6, 4], [2, 5, 6]]

In [None]:
array1=[1,2,3]
array2=[3,2,1]
array3=np.subtract(array2,array1)
array3

array([ 2,  0, -2])

In [None]:
# 0 => baja
# 1 => media
# 2 => alta

X = np.array([
    [1000, 100, 4],
    [10, 40, 1],
    [1200, 20, 2],
    [10, 10, 1]          
])

classes = [
           2, #4
           0, # 2
           1, # 0.2
           0, # 10
]

k = 3
new_datapoint = [90, 80, 3]

In [None]:


a=np.sqrt((2)**2+(0)**2+(-2)**2)
a

2.8284271247461903

In [None]:
import numpy as np
b=[2,0,-2]
c=np.linalg.norm(b)
c

2.8284271247461903