In [1]:
from collections import Counter
from math import sqrt
from typing import List, Tuple

def knn_classifier(data_points: List[Tuple[float, float, str]], 
                   new_point: Tuple[float, float], 
                   k: int = 3) -> str:
    # Calculate Euclidean distance between new point and all data points
    distances = []
    for (x, y, label) in data_points:
        distance = sqrt((x - new_point[0])**2 + (y - new_point[1])**2)
        distances.append((distance, label))
    
    # Sort by distance and take the k nearest neighbors
    distances.sort(key=lambda x: x[0])
    k_nearest = [label for _, label in distances[:k]]
    
    # Return the most common label
    most_common_label = Counter(k_nearest).most_common(1)[0][0]
    return most_common_label


In [2]:
from typing import List
from statistics import mean, stdev

def remove_outliers(data: List[float]) -> List[float]:
    # Calculate the mean and standard deviation
    data_mean = mean(data)
    data_stdev = stdev(data)
    
    # Define the threshold for outliers (2 * standard deviation)
    threshold = 2 * data_stdev
    
    # Remove points that are more than 2 * standard deviation from the mean
    filtered_data = [x for x in data if abs(x - data_mean) <= threshold]
    
    return filtered_data


In [3]:
from typing import List

def matrix_multiply(mat1: List[List[int]], mat2: List[List[int]]) -> List[List[int]]:
    # Get dimensions of the matrices
    rows_mat1, cols_mat1 = len(mat1), len(mat1[0])
    rows_mat2, cols_mat2 = len(mat2), len(mat2[0])
    
    # Ensure matrices are compatible for multiplication
    if cols_mat1 != rows_mat2:
        raise ValueError("Incompatible matrices for multiplication.")
    
    # Initialize the result matrix
    result = [[0 for _ in range(cols_mat2)] for _ in range(rows_mat1)]
    
    # Perform matrix multiplication
    for i in range(rows_mat1):
        for j in range(cols_mat2):
            result[i][j] = sum(mat1[i][k] * mat2[k][j] for k in range(cols_mat1))
    
    return result


In [4]:
from typing import List
from math import sqrt

def cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
    # Dot product of the two vectors
    dot_product = sum(x * y for x, y in zip(vec1, vec2))
    
    # Magnitude of the vectors
    magnitude_vec1 = sqrt(sum(x**2 for x in vec1))
    magnitude_vec2 = sqrt(sum(y**2 for y in vec2))
    
    # Cosine similarity calculation
    if magnitude_vec1 == 0 or magnitude_vec2 == 0:
        return 0.0
    return dot_product / (magnitude_vec1 * magnitude_vec2)


In [5]:
import heapq

class MinHeap:
    def __init__(self):
        self.heap = []
    
    def insert(self, value: int) -> None:
        heapq.heappush(self.heap, value)
    
    def get_min(self) -> int:
        return self.heap[0] if self.heap else None
    
    def extract_min(self) -> int:
        return heapq.heappop(self.heap) if self.heap else None


In [6]:
from typing import List, Tuple

def svm_classifier(data_points: List[Tuple[float, float, str]], 
                   new_point: Tuple[float, float]) -> str:
    # Dummy implementation using a linear kernel (actual implementation would be more complex)
    # In practice, you'd use a library like scikit-learn to handle SVMs.
    # For simplicity, this is a placeholder implementation.
    
    # Calculate a simple decision boundary (example logic for a linear kernel)
    weights = (1.0, 1.0)  # Example weights
    bias = 0.0
    
    # Linear decision function
    decision = new_point[0] * weights[0] + new_point[1] * weights[1] + bias
    
    return 'class_1' if decision > 0 else 'class_2'


In [7]:
from typing import List
from statistics import mean, stdev

def calculate_z_scores(data: List[float]) -> List[float]:
    data_mean = mean(data)
    data_stdev = stdev(data)
    
    z_scores = [(x - data_mean) / data_stdev for x in data]
    
    return z_scores


In [8]:
from typing import List, Tuple
from random import sample
from math import sqrt

def euclidean_distance(p1: Tuple[float, float], p2: Tuple[float, float]) -> float:
    return sqrt((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)

def k_means_clustering(data_points: List[Tuple[float, float]], k: int) -> List[Tuple[float, float]]:
    # Initialize centroids randomly from the data points
    centroids = sample(data_points, k)
    
    for _ in range(100):  # Arbitrary number of iterations
        # Assign points to nearest centroid
        clusters = {i: [] for i in range(k)}
        for point in data_points:
            distances = [euclidean_distance(point, centroid) for centroid in centroids]
            nearest_centroid = distances.index(min(distances))
            clusters[nearest_centroid].append(point)
        
        # Recalculate centroids
        for i in range(k):
            if clusters[i]:
                centroids[i] = (
                    sum([p[0] for p in clusters[i]]) / len(clusters[i]),
                    sum([p[1] for p in clusters[i]]) / len(clusters[i])
                )
    
    return centroids


In [9]:
from typing import List

def f1_score(true_labels: List[int], predicted_labels: List[int]) -> float:
    tp = sum([1 for t, p in zip(true_labels, predicted_labels) if t == 1 and p == 1])
    fp = sum([1 for t, p in zip(true_labels, predicted_labels) if t == 0 and p == 1])
    fn = sum([1 for t, p in zip(true_labels, predicted_labels) if t == 1 and p == 0])
    
    if tp == 0:
        return 0.0
    
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    
    f1 = 2 * (precision * recall) / (precision + recall)
    
    return f1


In [10]:
from typing import List, Dict

def create_histogram(data: List[float], bins: int) -> Dict[str, int]:
    min_val, max_val = min(data), max(data)
    bin_size = (max_val - min_val) / bins
    histogram = {}
    
    for i in range(bins):
        bin_range = f"{round(min_val + i*bin_size, 2)}-{round(min_val + (i+1)*bin_size, 2)}"
        histogram[bin_range] = sum([1 for x in data if min_val + i*bin_size <= x < min_val + (i+1)*bin_size])
    
    return histogram


In [11]:
from typing import List, Tuple

def decision_tree_classifier(data_points: List[Tuple[List[float], str]], 
                             new_point: List[float]) -> str:
    # Placeholder for actual decision tree logic (use scikit-learn in practice)
    return data_points[0][1]  # Just returning the label of the first point (as a placeholder)


In [12]:
from typing import List

def min_max_normalization(data: List[float]) -> List[float]:
    min_val, max_val = min(data), max(data)
    return [(x - min_val) / (max_val - min_val) for x in data]


In [13]:
from typing import List
from math import sqrt

def euclidean_distance(point1: List[float], point2: List[float]) -> float:
    return sqrt(sum((x - y)**2 for x, y in zip(point1, point2)))
