<a href="https://colab.research.google.com/github/JeffersonEspinalA/Prueba/blob/main/ml_05_04_23_1_knn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from collections import Counter

class KNN:
    def __init__(self, k=3):
        self.k = k
    
    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
    
    def predict(self, X):
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)
    
    def _predict(self, x):
        distances = [self._euclidean_distance(x, x_train) for x_train in self.X_train]
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        most_common_label = Counter(k_nearest_labels).most_common(1)[0][0]
        return most_common_label
    
    def _euclidean_distance(self, x1, x2):
        return np.sqrt(np.sum((x1 - x2)**2))

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)

# Create a KNN classifier and fit it to the training data
knn = KNN(k=3)
knn.fit(X_train, y_train)

# Predict the class labels of the testing data
y_pred = knn.predict(X_test)

# Calculate the accuracy of the predictions
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [3]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

# load the breast cancer dataset
data = load_breast_cancer()

# split the dataset into train and test sets with a 60-40 ratio
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.4, random_state=42)

print("Training set size:", len(X_train))
print("Test set size:", len(X_test))

Training set size: 341
Test set size: 228


In [4]:
type(X_train)

numpy.ndarray

In [5]:
print(X_train.shape)

(341, 30)


In [6]:
from sklearn.metrics import confusion_matrix

# assuming you have trained your KNN model and made predictions on a test set
y_true = y_test
knn = KNN(k=3)
knn.fit(X_train, y_train)

# Predict the class labels of the testing data
y_pred = knn.predict(X_test)

# calculate the confusion matrix
cm = confusion_matrix(y_true, y_pred)

print("Confusion Matrix:\n", cm)

Confusion Matrix:
 [[ 73   7]
 [  7 141]]


In [7]:
type(data)

sklearn.utils._bunch.Bunch

In [8]:
data.target_names

array(['malignant', 'benign'], dtype='<U9')

In [9]:
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

f1 = f1_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
acc = accuracy_score(y_true, y_pred)

print(f"F1: {f1}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"Accuracy: {accuracy}")

F1: 0.9527027027027027
Precision: 0.9527027027027027
Recall: 0.9527027027027027
Accuracy: 1.0
