In [None]:
import math

# Function to calculate the Euclidean distance between two points
def euclidean_distance(point1, point2):
  distance = 0
  for i in range(len(point1)):
    distance += (point1[i] - point2[i]) ** 2
  return math.sqrt(distance)

# Function to find the k nearest neighbors
def find_nearest_neighbors(training_data, test_instance, k):
  distances = []
  for i in range(len(training_data)):
    distance = euclidean_distance(training_data[i][:4], test_instance)
    distances.append((training_data[i], distance))
  distances.sort(key=lambda x: x[1])
  neighbors = [distance[0] for distance in distances[:k]]
  return neighbors

# Function to classify a test instance based on the k nearest neighbors
def classify_instance(neighbors):
  class_votes = {}
  for neighbor in neighbors:
    label = neighbor[4]
    if label in class_votes:
      class_votes[label] += 1
    else:
      class_votes[label] = 1
  sorted_votes = sorted(class_votes.items(), key=lambda x: x[1], reverse=True)
  return sorted_votes[0][0]

# Function to classify multiple test instances
def classify_instances(training_data, test_data, k):
  predictions = []
  for test_instance in test_data:
    neighbors = find_nearest_neighbors(training_data, test_instance, k)
    predicted_class = classify_instance(neighbors)
    predictions.append(predicted_class)
  return predictions

def accuracy(actual_classes, predictions):
    correct = 0
    for i in range(len(actual_classes)):
        if actual_classes[i] == predictions[i]:
            correct += 1
    return correct / float(len(actual_classes)) * 100.0

In [None]:
%%time
# Example usage
training_data = [
    [5.1, 3.5, 1.4, 0.2, 'Iris-setosa'],
    [4.9, 3.0, 1.4, 0.2, 'Iris-setosa'],
    [6.2, 3.4, 5.4, 2.3, 'Iris-virginica'],
    [7.0, 3.2, 4.7, 1.4, 'Iris-versicolor'],
    [6.4, 3.2, 4.5, 1.5, 'Iris-versicolor'],
    [5.5, 2.3, 4.0, 1.3, 'Iris-versicolor'],
    [6.3, 2.9, 5.6, 1.8, 'Iris-virginica'],
    [6.5, 3.0, 5.2, 2.0, 'Iris-virginica'],
    [6.4, 2.8, 5.6, 2.2, 'Iris-virginica']
]

test_data = [
  [5.8, 2.7, 5.1, 1.9],
  [6.0, 3.0, 4.8, 1.8],
  [5.5, 2.4, 3.7, 1.0],
  [4.9, 3.1, 1.5, 0.1],
  [6.7, 3.1, 4.4, 1.4],
]

k = 3

predictions = classify_instances(training_data, test_data, k)
print(predictions)

['Iris-setosa', 'Iris-virginica', 'Iris-virginica', 'Iris-versicolor', 'Iris-setosa', 'Iris-versicolor']
CPU times: user 1.41 ms, sys: 0 ns, total: 1.41 ms
Wall time: 2.06 ms


In [None]:
actual_classes=['Iris-virginica', 'Iris-virginica', 'Iris-versicolor', 'Iris-setosa', 'Iris-versicolor']
accuracy(actual_classes, predictions)

100.0

Nova versao com melhoria de velocidade

In [None]:
import numpy as np

# Function to calculate the Euclidean distance between two points
def euclidean_distance(point1, point2):
  return np.linalg.norm(point1 - point2)

# Function to find the k nearest neighbors
def find_nearest_neighbors(training_data, test_instance, k):
  distances = np.linalg.norm(training_data[:, :4] - test_instance, axis=1)
  neighbors = training_data[np.argsort(distances)][:k]
  return neighbors

# Function to classify a test instance based on the k nearest neighbors
def classify_instance(neighbors):
  class_votes = np.bincount(neighbors[:, 4].astype(int))
  return np.argmax(class_votes)

# Function to classify multiple test instances
def classify_instances(training_data, test_data, k):
  predictions = []
  for test_instance in test_data:
    neighbors = find_nearest_neighbors(training_data, test_instance, k)
    predicted_class = classify_instance(neighbors)
    predictions.append(predicted_class)
  return predictions

def accuracy(actual_classes, predictions):
    correct = 0
    for i in range(len(actual_classes)):
        if actual_classes[i] == predictions[i]:
            correct += 1
    return correct / float(len(actual_classes)) * 100.0

In [None]:
%%time
# Example usage
training_data = np.array([
    [5.1, 3.5, 1.4, 0.2, 0],  # Setosa
    [4.9, 3.0, 1.4, 0.2, 0],  # Setosa
    [6.2, 3.4, 5.4, 2.3, 2],  # Virginica
    [7.0, 3.2, 4.7, 1.4, 1],  # Versicolour
    [6.4, 3.2, 4.5, 1.5, 1],  # Versicolour
    [5.5, 2.3, 4.0, 1.3, 1],  # Versicolour
    [6.3, 2.9, 5.6, 1.8, 2],  # Virginica
    [6.5, 3.0, 5.2, 2.0, 2],  # Virginica
    [6.4, 2.8, 5.6, 2.2, 2]   # Virginica
])
#onde 0 é setosa, 1 é Versicolour, 2 é virginica
test_data = np.array([
  [5.8, 2.7, 5.1, 1.9],
  [6.0, 3.0, 4.8, 1.8],
  [5.5, 2.4, 3.7, 1.0],
  [4.9, 3.1, 1.5, 0.1],
  [6.7, 3.1, 4.4, 1.4],
])

k = 3

predictions = classify_instances(training_data, test_data, k)
print(predictions)

[0, 2, 2, 1, 0, 1]
CPU times: user 1.42 ms, sys: 0 ns, total: 1.42 ms
Wall time: 1.43 ms


In [None]:
actual_classes = [2.0, 2.0, 1.0, 0.0, 1.0]
accuracy(actual_classes, predictions)

100.0

Utilizei o %%time para verificar o tempo, o primeiro agoritmo teve 2.06ms e o segundo teve 1.43ms