K-Nearest Neighbors (KNN) Algorithm

In [2]:
from math import sqrt
from collections import Counter

def euclidean_distance(point1, point2):
    return sqrt(sum((x - y) ** 2 for x, y in zip(point1, point2)))

def knn_classifier(data_points, new_point, k=3):
    distances = [(euclidean_distance(point[:-1], new_point), point[-1]) for point in data_points]
    distances.sort(key=lambda x: x[0])
    nearest_neighbors = [label for _, label in distances[:k]]
    return Counter(nearest_neighbors).most_common(1)[0][0]


Remove Outliers from Data

In [3]:
def remove_outliers(data):
    mean = sum(data) / len(data)
    std_dev = (sum([(x - mean) ** 2 for x in data]) / len(data)) ** 0.5
    return [x for x in data if abs(x - mean) <= 2 * std_dev]


Matrix Multiplication

In [None]:
def matrix_multiply(mat1, mat2):
    rows_mat1 = len(mat1)
    cols_mat1 = len(mat1[0])
    rows_mat2 = len(mat2)
    cols_mat2 = len(mat2[0])

    if cols_mat1 != rows_mat2:
        raise ValueError("Incompatible matrices for multiplication")

    result = [[0 for _ in range(cols_mat2)] for _ in range(rows_mat1)]

    for i in range(rows_mat1):
        for j in range(cols_mat2):
            for k in range(cols_mat1):
                result[i][j] += mat1[i][k] * mat2[k][j]
    
    return result


Cosine Similarity

In [None]:
def cosine_similarity(vec1, vec2):
    dot_product = sum(x * y for x, y in zip(vec1, vec2))
    magnitude_vec1 = sum(x ** 2 for x in vec1) ** 0.5
    magnitude_vec2 = sum(y ** 2 for y in vec2) ** 0.5
    if magnitude_vec1 == 0 or magnitude_vec2 == 0:
        return 0
    return dot_product / (magnitude_vec1 * magnitude_vec2)


 Min-Heap Using a Priority Queue

In [5]:
import heapq

class MinHeap:
    def __init__(self):
        self.heap = []
    
    def insert(self, value):
        heapq.heappush(self.heap, value)
    
    def get_min(self):
        if self.heap:
            return self.heap[0]
        return None
    
    def extract_min(self):
        if self.heap:
            return heapq.heappop(self.heap)
        return None


 Support Vector Machine (SVM) Classifier

In [6]:
def svm_classifier(data_points, new_point):
    pass


Calculate the Z-Score of Data

In [7]:
def calculate_z_scores(data):
    mean = sum(data) / len(data)
    std_dev = (sum([(x - mean) ** 2 for x in data]) / len(data)) ** 0.5
    return [(x - mean) / std_dev for x in data]


K-Means Clustering

In [8]:
from random import sample

def euclidean_distance(point1, point2):
    return sqrt(sum((x - y) ** 2 for x, y in zip(point1, point2)))

def k_means_clustering(data_points, k):
    centroids = sample(data_points, k)
    for _ in range(100):
        clusters = {i: [] for i in range(k)}
        for point in data_points:
            distances = [euclidean_distance(point, centroid) for centroid in centroids]
            cluster_index = distances.index(min(distances))
            clusters[cluster_index].append(point)
        new_centroids = [tuple(map(lambda x: sum(x) / len(x), zip(*cluster))) for cluster in clusters.values()]
        if new_centroids == centroids:
            break
        centroids = new_centroids
    return centroids


Evaluate Classification Model Using F1 Score

In [9]:
def f1_score(true_labels, predicted_labels):
    tp = sum([1 for true, pred in zip(true_labels, predicted_labels) if true == pred == 1])
    fp = sum([1 for true, pred in zip(true_labels, predicted_labels) if true == 0 and pred == 1])
    fn = sum([1 for true, pred in zip(true_labels, predicted_labels) if true == 1 and pred == 0])
    
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    if precision + recall == 0:
        return 0
    return 2 * (precision * recall) / (precision + recall)


Visualize Data Distribution Using a Histogram

In [10]:
def create_histogram(data, bins):
    min_val, max_val = min(data), max(data)
    bin_size = (max_val - min_val) / bins
    histogram = {}
    
    for i in range(bins):
        bin_range = f"{min_val + i * bin_size:.2f} - {min_val + (i + 1) * bin_size:.2f}"
        histogram[bin_range] = 0
    
    for value in data:
        bin_index = min(int((value - min_val) // bin_size), bins - 1)
        bin_range = list(histogram.keys())[bin_index]
        histogram[bin_range] += 1
    
    return histogram


Decision Tree Classifier

In [11]:
def decision_tree_classifier(data_points, new_point):
    pass


Normalize Data Using Min-Max Scaling

In [12]:
def min_max_normalization(data):
    min_val, max_val = min(data), max(data)
    return [(x - min_val) / (max_val - min_val) for x in data]


Calculate Euclidean Distance Between Two Points

In [13]:
def euclidean_distance(point1, point2):
    return sqrt(sum((x - y) ** 2 for x, y in zip(point1, point2)))
