In [1]:
import numpy as np

def my_train_test_split(X, y, train_fraction):
    # Calculate the number of samples for the training set
    split = int(train_fraction * X.shape[0])

    # Split the data into training and testing sets
    X_train = X[:split]
    y_train = y[:split]
    X_test = X[split:]
    y_test = y[split:]

    # Return the splits as a tuple
    return (X_train, y_train, X_test, y_test)

# Sample data
data = np.array([[1, 2, 0], [3, 4, 1], [5, 6, 1], [7, 8, 0], [9, 10, 1], [11, 12, 0]])

# Split the data using my_train_test_split
X_train, y_train, X_test, y_test = my_train_test_split(data[:, :-1], data[:, -1], train_fraction=0.8)

# Print the results
print('X_train:')
print(X_train)
print('\ny_train:')
print(y_train)
print('\nX_test:')
print(X_test)
print('\ny_test:')
print(y_test)


X_train:
[[1 2]
 [3 4]
 [5 6]
 [7 8]]

y_train:
[0 1 1 0]

X_test:
[[ 9 10]
 [11 12]]

y_test:
[1 0]


In [None]:
import numpy as np

data = np.array([[1, 2, 0], [3, 4, 1], [5, 6, 1], [7, 8, 0]])
num_samples = data.shape[0]

# Shuffle the indices randomly
indices = np.random.choice(num_samples, num_samples, replace=False)

# Define the split fraction
split_fraction = 0.8
split = int(split_fraction * num_samples)

# Select the first 'split' shuffled indices for training
train_indices = indices[:split]

# Use the selected indices to split the data
X_train = data[train_indices, :-1]
y_train = data[train_indices, -1]

# The rest of the indices are for testing
test_indices = indices[split:]

X_test = data[test_indices, :-1]
y_test = data[test_indices, -1]

print('X_train:')
print(X_train)
print('\ny_train:')
print(y_train)
print('\nX_test:')
print(X_test)
print('\ny_test:')
print(y_test)


X_train:
[[1 2]
 [3 4]
 [5 6]]

y_train:
[0 1 1]

X_test:
[[7 8]]

y_test:
[0]


In [None]:
import numpy as np

def my_random_train_test_split(X, y, train_fraction):
    num_samples = X.shape[0]

    # Shuffle the indices randomly
    indices = np.random.choice(num_samples, num_samples, replace=False)

    # Calculate the split index
    split = int(train_fraction * num_samples)

    # Select the first 'split' shuffled indices for training
    train_indices = indices[:split]

    # Use the selected indices to split the data
    X_train = X[train_indices]
    y_train = y[train_indices]

    # The rest of the indices are for testing
    test_indices = indices[split:]

    X_test = X[test_indices]
    y_test = y[test_indices]

    return (X_train, y_train, X_test, y_test)


In [None]:
# Sample data
data = np.array([[1, 2, 0], [3, 4, 1], [5, 6, 1], [7, 8, 0]])

# Split the data using my_random_train_test_split
X_train, y_train, X_test, y_test = my_random_train_test_split(data[:, :-1], data[:, -1], train_fraction=0.8)

# Print the results
print('X_train:')
print(X_train)
print('\ny_train:')
print(y_train)
print('\nX_test:')
print(X_test)
print('\ny_test:')
print(y_test)


X_train:
[[5 6]
 [3 4]
 [7 8]]

y_train:
[1 1 0]

X_test:
[[1 2]]

y_test:
[0]


In [None]:
import numpy as np

# Define the function my_random_train_test_split
def my_random_train_test_split(X, y, train_fraction):
    num_samples = X.shape[0]

    # Shuffle the indices randomly
    indices = np.random.choice(num_samples, num_samples, replace=False)

    # Calculate the split index
    split = int(train_fraction * num_samples)

    # Select the first 'split' shuffled indices for training
    train_indices = indices[:split]

    # Use the selected indices to split the data
    X_train = X[train_indices]
    y_train = y[train_indices]

    # The rest of the indices are for testing
    test_indices = indices[split:]

    X_test = X[test_indices]
    y_test = y[test_indices]

    return (X_train, y_train, X_test, y_test)

# Sample data
data = np.array([[1, 2, 0], [3, 4, 1], [5, 6, 1], [7, 8, 0]])

# Split the data using my_random_train_test_split
X_train, y_train, X_test, y_test = my_random_train_test_split(data[:, :-1], data[:, -1], train_fraction=0.8)

# Print the results
print('X_train:')
print(X_train)
print('\ny_train:')
print(y_train)
print('\nX_test:')
print(X_test)
print('\ny_test:')
print(y_test)


X_train:
[[5 6]
 [3 4]
 [1 2]]

y_train:
[1 1 0]

X_test:
[[7 8]]

y_test:
[0]


In [None]:
import numpy as np

class KNN:
    def __init__(self, n_neighbors=5, weights='uniform'):
        self.n_neighbors = n_neighbors
        self.weights = weights
        self.X_train = None
        self.y_train = None

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        y_pred = np.zeros(X.shape[0])

        for i, x in enumerate(X):
            # Calculate distances between the current sample and all training samples
            distances = np.linalg.norm(self.X_train - x, axis=1)

            # Get indices of the k-nearest neighbors
            k_indices = np.argsort(distances)[:self.n_neighbors]

            if self.weights == 'uniform':
                # Use uniform weighting
                neighbor_labels = self.y_train[k_indices]
                labels, counts = np.unique(neighbor_labels, return_counts=True)
                y_pred[i] = labels[np.argmax(counts)]
            elif self.weights == 'distance':
                # Use distance-based weighting
                neighbor_labels = self.y_train[k_indices]
                neighbor_distances = distances[k_indices]
                weighted_votes = np.zeros(len(np.unique(self.y_train)))

                for j, label in enumerate(np.unique(self.y_train)):
                    indices = np.where(neighbor_labels == label)
                    weighted_votes[j] = np.sum(1 / (neighbor_distances[indices] + 1e-5))

                y_pred[i] = np.argmax(weighted_votes)

        return y_pred


In [None]:
# Create an instance of the KNN class with default settings (5 neighbors, 'uniform' weights)
knn = KNN()

# Define your training data and labels (example data)
X_train = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])
y_train = np.array([0, 1, 0, 1])

# Fit the KNN model with the training data and labels
knn.fit(X_train, y_train)


In [None]:
# Assuming you have already created the KNN instance and fitted it as shown in the previous example.

# Define your testing data (example data)
X_test = np.array([[1, 2], [3, 9]])

# Make predictions using the KNN model
y_pred = knn.predict(X_test)

# Calculate accuracy using some ground truth labels (example labels)
y_true = np.array([0, 1])

# Calculate accuracy
accuracy = np.mean(y_pred == y_true)

print(f"Predicted Labels: {y_pred}")
print(f"Accuracy = {accuracy}")


Predicted Labels: [0. 0.]
Accuracy = 0.5


In [None]:
from itertools import product
from sklearn.metrics import accuracy_score
import numpy as np

# Sample data (replace with your data)
X_train = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])
y_train = np.array([0, 1, 0, 1])
X_test = np.array([[2, 2], [3, 3]])
y_true = np.array([0, 1])

# Define a range of hyperparameters to search
param_grid = {
    'n_neighbors': [3, 5, 7],
    'weights': ['uniform', 'distance']
}

best_accuracy = 0
best_params = {}

# Iterate through all hyperparameter combinations
for params in product(*param_grid.values()):
    hyperparameters = dict(zip(param_grid.keys(), params))

    # Create and train a KNN model with the current hyperparameters
    knn = KNN(**hyperparameters)
    knn.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = knn.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_true, y_pred)

    # Check if this combination of hyperparameters gives a better accuracy
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = hyperparameters

print(f"Best Hyperparameters: {best_params}")
print(f"Best Accuracy: {best_accuracy}")


Best Hyperparameters: {'n_neighbors': 3, 'weights': 'uniform'}
Best Accuracy: 0.5


In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from itertools import product

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Use train_test_split to split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a range of hyperparameters to search
param_grid = {
    'n_neighbors': [3, 5, 7],
    'weights': ['uniform', 'distance']
}

best_accuracy = 0
best_params = {}

# Iterate through all hyperparameter combinations
for params in product(*param_grid.values()):
    hyperparameters = dict(zip(param_grid.keys(), params))

    # Create and train a KNN model with the current hyperparameters
    knn = KNN(**hyperparameters)
    knn.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = knn.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)

    # Check if this combination of hyperparameters gives a better accuracy
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = hyperparameters

print(f"Best Hyperparameters: {best_params}")
print(f"Best Accuracy: {best_accuracy}")


Best Hyperparameters: {'n_neighbors': 3, 'weights': 'uniform'}
Best Accuracy: 1.0
