# 1. Implement a K-Nearest Neighbors (KNN) Algorithm

In [4]:
from collections import Counter
from math import sqrt

def knn_classifier(data_points, new_point, k=3):
    distances = []
    for point in data_points:
        distance = sqrt((point[0] - new_point[0]) ** 2 + (point[1] - new_point[1]) ** 2)
        distances.append((distance, point[2]))
    distances.sort(key=lambda x: x[0])
    nearest_neighbors = [label for _, label in distances[:k]]
    most_common = Counter(nearest_neighbors).most_common(1)
    return most_common[0][0]
    
data = [(1, 2, 'A'), (2, 3, 'B'), (3, 1, 'A'), (6, 7, 'B'), (7, 6, 'B')]
new_data_point = (5, 5)
result = knn_classifier(data, new_data_point)
print(result)



B


#  2. Remove Outliers from Data

In [5]:
import statistics

def remove_outliers(data):
    mean = statistics.mean(data)
    std_dev = statistics.stdev(data)
    threshold = 2 * std_dev
    return [x for x in data if abs(x - mean) <= threshold]
    
data = [10, 12, 13, 15, 22, 25, 500, 30, 32, 35]
cleaned_data = remove_outliers(data)
print(cleaned_data)



[10, 12, 13, 15, 22, 25, 30, 32, 35]


#  3. Optimize a Matrix Multiplication for Neural Network

In [7]:
def matrix_multiply(mat1, mat2):
    rows_mat1 = len(mat1)
    cols_mat1 = len(mat1[0])
    rows_mat2 = len(mat2)
    cols_mat2 = len(mat2[0])

    if cols_mat1 != rows_mat2:
        return "Incompatible matrices for multiplication"

    result = [[0 for _ in range(cols_mat2)] for _ in range(rows_mat1)]
    
    for i in range(rows_mat1):
        for j in range(cols_mat2):
            for k in range(cols_mat1):
                result[i][j] += mat1[i][k] * mat2[k][j]
    
    return result
    
mat1 = [[1, 2], [3, 4]]
mat2 = [[5, 6], [7, 8]]
result = matrix_multiply(mat1, mat2)
print(result)


[[19, 22], [43, 50]]


# 4. Word Embedding Similarity

In [8]:
import math

def cosine_similarity(vec1, vec2):
    dot_product = sum(v1 * v2 for v1, v2 in zip(vec1, vec2))
    magnitude_vec1 = math.sqrt(sum(v1 ** 2 for v1 in vec1))
    magnitude_vec2 = math.sqrt(sum(v2 ** 2 for v2 in vec2))
    return dot_product / (magnitude_vec1 * magnitude_vec2)
vec1 = [1.0, 2.0, 3.0]
vec2 = [4.0, 5.0, 6.0]
similarity = cosine_similarity(vec1, vec2)
print(similarity)


0.9746318461970762


#  5. Implement a Min-Heap Using a Priority Queue

In [9]:
import heapq

class MinHeap:
    def __init__(self):
        self.heap = []

    def insert(self, value):
        heapq.heappush(self.heap, value)

    def get_min(self):
        return self.heap[0] if self.heap else None

    def extract_min(self):
        return heapq.heappop(self.heap) if self.heap else None
        
min_heap = MinHeap()

min_heap.insert(10)
min_heap.insert(5)
min_heap.insert(30)

print(min_heap.get_min())   
print(min_heap.extract_min()) 
print(min_heap.get_min())    


5
5
10


# 6. Implement a Support Vector Machine (SVM) Classifier

In [10]:
from sklearn import svm

def svm_classifier(data_points, new_point):
    X = [(x, y) for x, y, label in data_points] 
    y = [label for _, _, label in data_points]  
    
    clf = svm.SVC(kernel='linear')  
    clf.fit(X, y)                   
    
    return clf.predict([new_point])[0]  
    
data_points = [(1.0, 2.0, 'A'), (2.0, 3.0, 'A'), (3.0, 3.0, 'B'), (4.0, 5.0, 'B')]
new_point = (3.5, 4.0)

predicted_label = svm_classifier(data_points, new_point)
print(predicted_label)  



B


#  7. Calculate the Z-Score of Data

In [12]:
def calculate_z_scores(data):
    mean = sum(data) / len(data)
    std_dev = (sum((x - mean) ** 2 for x in data) / len(data)) ** 0.5
    return [(x - mean) / std_dev for x in data]
data = [10, 20, 30, 40, 50]
z_scores = calculate_z_scores(data)
print(z_scores)



[-1.414213562373095, -0.7071067811865475, 0.0, 0.7071067811865475, 1.414213562373095]


#  8. K-Means Clustering Implementation

In [13]:
import random

def euclidean_distance(p1, p2):
    return ((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) ** 0.5

def assign_clusters(data_points, centroids):
    clusters = [[] for _ in centroids]
    for point in data_points:
        distances = [euclidean_distance(point, centroid) for centroid in centroids]
        closest_centroid = distances.index(min(distances))
        clusters[closest_centroid].append(point)
    return clusters

def calculate_centroids(clusters):
    centroids = []
    for cluster in clusters:
        if len(cluster) > 0:
            x_coords = [p[0] for p in cluster]
            y_coords = [p[1] for p in cluster]
            centroids.append((sum(x_coords) / len(x_coords), sum(y_coords) / len(y_coords)))
    return centroids

def k_means_clustering(data_points, k):
    centroids = random.sample(data_points, k)
    for _ in range(100):  # Max iterations
        clusters = assign_clusters(data_points, centroids)
        new_centroids = calculate_centroids(clusters)
        if new_centroids == centroids:
            break
        centroids = new_centroids
    return centroids
data_points = [(1, 1), (2, 1), (4, 3), (5, 4), (10, 10), (11, 11)]
k = 2
centroids = k_means_clustering(data_points, k)
print(centroids)


[(10.5, 10.5), (3.0, 2.25)]


#  9. Evaluate Classification Model Using F1 Score

In [14]:
def f1_score(true_labels, predicted_labels):
    tp = sum((t == 1 and p == 1) for t, p in zip(true_labels, predicted_labels))
    fp = sum((t == 0 and p == 1) for t, p in zip(true_labels, predicted_labels))
    fn = sum((t == 1 and p == 0) for t, p in zip(true_labels, predicted_labels))
    
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    
    if precision + recall == 0:
        return 0
    
    return 2 * (precision * recall) / (precision + recall)
true_labels = [1, 0, 1, 1, 0, 1, 0, 0, 1]
predicted_labels = [1, 0, 0, 1, 0, 1, 0, 1, 1]
f1 = f1_score(true_labels, predicted_labels)
print(f1)



0.8000000000000002


# 10. Visualize Data Distribution Using a Histogram

In [15]:
def create_histogram(data, bins):
    min_value = min(data)
    max_value = max(data)
    bin_width = (max_value - min_value) / bins
    histogram = {}
    
    for i in range(bins):
        bin_start = min_value + i * bin_width
        bin_end = bin_start + bin_width
        count = sum(1 for x in data if bin_start <= x < bin_end)
        histogram[f"{bin_start:.2f} - {bin_end:.2f}"] = count
        
    return histogram
data = [1.2, 2.3, 3.1, 2.8, 1.5, 3.7, 2.1, 4.0, 5.6, 2.5]
bins = 5
histogram = create_histogram(data, bins)
print(histogram)


{'1.20 - 2.08': 2, '2.08 - 2.96': 4, '2.96 - 3.84': 2, '3.84 - 4.72': 1, '4.72 - 5.60': 0}


# 11. Implement a Decision Tree Classifier

In [16]:
class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

def split(data, feature, threshold):
    left = [point for point in data if point[0][feature] <= threshold]
    right = [point for point in data if point[0][feature] > threshold]
    return left, right

def gini_index(data):
    labels = [point[1] for point in data]
    classes = set(labels)
    total = len(labels)
    gini = 1.0
    for cls in classes:
        prob = labels.count(cls) / total
        gini -= prob ** 2
    return gini

def best_split(data):
    best_gini = float('inf')
    best_feature, best_threshold = None, None
    for feature in range(len(data[0][0])):
        thresholds = set(point[0][feature] for point in data)
        for threshold in thresholds:
            left, right = split(data, feature, threshold)
            if left and right:
                gini = (len(left) / len(data)) * gini_index(left) + (len(right) / len(data)) * gini_index(right)
                if gini < best_gini:
                    best_gini = gini
                    best_feature = feature
                    best_threshold = threshold
    return best_feature, best_threshold

def build_tree(data):
    labels = [point[1] for point in data]
    if len(set(labels)) == 1:
        return Node(value=labels[0])
    if not data:
        return Node(value=max(set(labels), key=labels.count))

    feature, threshold = best_split(data)
    if feature is None:
        return Node(value=max(set(labels), key=labels.count))

    left_data, right_data = split(data, feature, threshold)
    left_node = build_tree(left_data)
    right_node = build_tree(right_data)
    return Node(feature, threshold, left_node, right_node)

def predict(tree, point):
    if tree.value is not None:
        return tree.value
    if point[tree.feature] <= tree.threshold:
        return predict(tree.left, point)
    else:
        return predict(tree.right, point)

def decision_tree_classifier(data_points, new_point):
    tree = build_tree(data_points)
    return predict(tree, new_point)
data_points = [([2.5, 3.5], 'A'), ([1.0, 1.5], 'B'), ([4.5, 5.5], 'A'), ([3.5, 2.5], 'B')]
new_point = [3.0, 3.0]
label = decision_tree_classifier(data_points, new_point)
print(label)


A


# 12. Normalize Data Using Min-Max Scaling