1. Implement a K-Nearest Neighbors (KNN) Algorithm

In [1]:


from typing import List, Tuple
import math
from collections import Counter

def euclidean_distance(point1: Tuple[float, float], point2: Tuple[float, float]) -> float:
    # Calculate the Euclidean distance between two points
    return math.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)

def knn_classifier(data_points: List[Tuple[float, float, str]], new_point: Tuple[float, float], k: int = 3) -> str:
    # Calculate the distance from new_point to all data points
    distances = []
    for point in data_points:
        distance = euclidean_distance(new_point, (point[0], point[1]))
        distances.append((distance, point[2]))  # Append (distance, label)
    
    # Sort the distances by ascending order
    distances.sort(key=lambda x: x[0])
    
    # Get the top k nearest neighbors (k=3 by default)
    nearest_neighbors = [label for _, label in distances[:k]]
    
    # Determine the most common label among the nearest neighbors
    most_common_label = Counter(nearest_neighbors).most_common(1)[0][0]
    
    return most_common_label

# Example usage
data_points = [(1.0, 2.0, 'A'), (2.0, 3.0, 'A'), (3.0, 4.0, 'B'), (5.0, 6.0, 'B')]
new_point = (3.5, 4.5)
predicted_label = knn_classifier(data_points, new_point)
print(f"Predicted Label: {predicted_label}")


Predicted Label: B


2. Remove Outliers from Data

In [2]:
 
from typing import List
import statistics

def remove_outliers(data: List[float]) -> List[float]:
    if len(data) < 2:
        return data  # If there are fewer than 2 numbers, no outliers to remove
    
    # Calculate mean and standard deviation
    mean = statistics.mean(data)
    std_dev = statistics.stdev(data)
    
    # Define the range within which numbers are not considered outliers
    lower_bound = mean - 2 * std_dev
    upper_bound = mean + 2 * std_dev
    
    # Filter and return numbers that are within the range
    filtered_data = [x for x in data if lower_bound <= x <= upper_bound]
    
    return filtered_data

# Example usage
data = [10, 12, 13, 14, 100, 15, 16, 17, 120]
filtered_data = remove_outliers(data)
print(f"Data without outliers: {filtered_data}")



Data without outliers: [10, 12, 13, 14, 100, 15, 16, 17, 120]


3. Optimize a Matrix Multiplication for Neural Network

In [4]:

from typing import List

def matrix_multiply(mat1: List[List[int]], mat2: List[List[int]]) -> List[List[int]]:
    # Check if matrix multiplication is possible (columns of mat1 == rows of mat2)
    if len(mat1[0]) != len(mat2):
        raise ValueError("Incompatible matrices: Columns of mat1 must match rows of mat2.")
    
    # Dimensions of the result matrix: rows of mat1 x columns of mat2
    result_rows = len(mat1)
    result_cols = len(mat2[0])
    
    # Initialize the result matrix with zeros
    result = [[0 for _ in range(result_cols)] for _ in range(result_rows)]
    
    # Perform matrix multiplication
    for i in range(result_rows):
        for j in range(result_cols):
            for k in range(len(mat1[0])):  # This is the shared dimension
                result[i][j] += mat1[i][k] * mat2[k][j]
    
    return result

# Example usage
mat1 = [[1, 2, 3], [4, 5, 6]]
mat2 = [[7, 8], [9, 10], [11, 12]]

result = matrix_multiply(mat1, mat2)
for row in result:
    print(row)


[58, 64]
[139, 154]


4. Word Embedding Similarity

In [5]:

from typing import List
import math

def cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
    # Ensure the vectors have the same length
    if len(vec1) != len(vec2):
        raise ValueError("Vectors must be of the same length")
    
    # Calculate dot product
    dot_product = sum(a * b for a, b in zip(vec1, vec2))
    
    # Calculate magnitudes of vec1 and vec2
    magnitude_vec1 = math.sqrt(sum(a ** 2 for a in vec1))
    magnitude_vec2 = math.sqrt(sum(b ** 2 for b in vec2))
    
    # Avoid division by zero by checking if any magnitude is zero
    if magnitude_vec1 == 0 or magnitude_vec2 == 0:
        raise ValueError("One of the vectors has zero magnitude, cannot compute cosine similarity")
    
    # Calculate cosine similarity
    cosine_sim = dot_product / (magnitude_vec1 * magnitude_vec2)
    
    return cosine_sim

# Example usage
vec1 = [1.0, 2.0, 3.0]
vec2 = [4.0, 5.0, 6.0]

similarity = cosine_similarity(vec1, vec2)
print(f"Cosine Similarity: {similarity}")


Cosine Similarity: 0.9746318461970762


5. Implement a Min-Heap Using a Priority Queue  

In [6]:


import heapq

class MinHeap:
    def __init__(self):
        # Initialize an empty list to hold the heap elements
        self.heap = []
    
    def insert(self, value: int) -> None:
        """Insert a value into the heap."""
        heapq.heappush(self.heap, value)
    
    def get_min(self) -> int:
        """Return the minimum value in the heap without removing it."""
        if not self.heap:
            raise IndexError("Heap is empty")
        return self.heap[0]
    
    def extract_min(self) -> int:
        """Remove and return the minimum value from the heap."""
        if not self.heap:
            raise IndexError("Heap is empty")
        return heapq.heappop(self.heap)

# Example usage
heap = MinHeap()
heap.insert(5)
heap.insert(3)
heap.insert(8)
heap.insert(1)

print("Minimum value:", heap.get_min())  # Output: 1
print("Extract minimum:", heap.extract_min())  # Output: 1
print("Minimum value after extraction:", heap.get_min())  # Output: 3



Minimum value: 1
Extract minimum: 1
Minimum value after extraction: 3


6. Implement a Support Vector Machine (SVM) Classifier

In [8]:


from typing import List, Tuple

class SVMClassifier:
    def __init__(self, data_points: List[Tuple[float, float, str]]):
        self.data_points = data_points
        self.w = [0, 0]  # Weights (slope components of the line)
        self.b = 0       # Bias term (intercept)
    
    def train(self, learning_rate: float = 0.01, epochs: int = 1000) -> None:
        """Train the SVM classifier using a simplified perceptron-based approach."""
        for _ in range(epochs):
            for (x, y, label) in self.data_points:
                # Convert labels to +1 or -1
                label = 1 if label == 'positive' else -1
                # Check if the point is correctly classified
                if label * (self.w[0] * x + self.w[1] * y + self.b) <= 0:
                    # Misclassified point, adjust weights and bias
                    self.w[0] += learning_rate * label * x
                    self.w[1] += learning_rate * label * y
                    self.b += learning_rate * label
    
    def predict(self, new_point: Tuple[float, float]) -> str:
        """Predict the label of a new point based on the learned hyperplane."""
        x, y = new_point
        # Calculate the decision value for the new point
        decision_value = self.w[0] * x + self.w[1] * y + self.b
        # Return the predicted label
        return 'positive' if decision_value  >= 0 else 'negative'

# Example usage
data_points = [
    (2.0, 3.0, 'positive'),
    (1.0, 1.0, 'negative'),
    (2.0, 1.5, 'positive'),
    (3.0, 2.0, 'positive'),
    (1.5, 0.5, 'negative')
]

# Create SVM classifier instance and train it
svm = SVMClassifier(data_points)
svm.train()

# Predict label for a new point
new_point = (2.5, 2.0)
predicted_label = svm.predict(new_point)
print(f"Predicted label for {new_point}: {predicted_label}")


Predicted label for (2.5, 2.0): positive


7  Calculate the Z-Score of Data

In [9]:

from typing import List
import math

def calculate_z_scores(data: List[float]) -> List[float]:
    if not data:
        raise ValueError("The data list cannot be empty")
    
    # Step 1: Calculate the mean
    mean = sum(data) / len(data)
    
    # Step 2: Calculate the standard deviation
    variance = sum((x - mean) ** 2 for x in data) / len(data)
    std_dev = math.sqrt(variance)
    
    # Step 3: Calculate z-scores for each data point
    z_scores = [(x - mean) / std_dev for x in data]
    
    return z_scores

# Example usage
data = [10, 20, 30, 40, 50]
z_scores = calculate_z_scores(data)
print(f"Z-Scores: {z_scores}")


Z-Scores: [-1.414213562373095, -0.7071067811865475, 0.0, 0.7071067811865475, 1.414213562373095]


8. K-Means Clustering Implementation

In [11]:


from typing import List, Tuple
import random
import math

def calculate_distance(point1: Tuple[float, float], point2: Tuple[float, float]) -> float:
    """Calculate the Euclidean distance between two points."""
    return math.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)

def k_means_clustering(data_points: List[Tuple[float, float]], k: int) -> List[Tuple[float, float]]:
    if k <= 0 or k > len(data_points):
        raise ValueError("k must be a positive integer less than or equal to the number of data points.")
    
    # Step 1: Initialize centroids by randomly selecting k data points
    centroids = random.sample(data_points, k)
    previous_centroids = None
    
    while previous_centroids != centroids:
        previous_centroids = centroids.copy()
        
        # Step 2: Assignment step
        clusters = [[] for _ in range(k)]
        for point in data_points:
            distances = [calculate_distance(point, centroid) for centroid in centroids]
            nearest_centroid_index = distances.index(min(distances))
            clusters[nearest_centroid_index].append(point)
        
        # Step 3: Update step
        centroids = []
        for cluster in clusters:
            if cluster:  # Avoid empty clusters
                mean_x = sum(point[0] for point in cluster) / len(cluster)
                mean_y = sum(point[1] for point in cluster) / len(cluster)
                centroids.append((mean_x, mean_y))
            else:
                # If a cluster is empty, reinitialize it with a random data point
                centroids.append(random.choice(data_points))
    
    return centroids

# Example usage
data_points = [
    (1.0, 2.0), (1.5, 1.8), (5.0, 8.0), 
    (8.0, 8.0), (1.0, 0.6), (9.0, 11.0)
]
k = 2
centroids = k_means_clustering(data_points, k)
print(f"Centroids of the clusters: {centroids}")


Centroids of the clusters: [(7.333333333333333, 9.0), (1.1666666666666667, 1.4666666666666668)]


9. Evaluate Classification Model Using F1 Score

In [12]:


from typing import List

def f1_score(true_labels: List[int], predicted_labels: List[int]) -> float:
    if len(true_labels) == 0:
        raise ValueError("The true labels list cannot be empty.")
    
    # Step 1: Calculate TP, FP, FN
    TP = sum(1 for true, pred in zip(true_labels, predicted_labels) if true == 1 and pred == 1)
    FP = sum(1 for true, pred in zip(true_labels, predicted_labels) if true == 0 and pred == 1)
    FN = sum(1 for true, pred in zip(true_labels, predicted_labels) if true == 1 and pred == 0)
    
    # Step 2: Calculate Precision and Recall
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0
    
    # Step 3: Calculate F1 Score
    if precision + recall == 0:
        return 0.0  # Avoid division by zero
    f1 = 2 * (precision * recall) / (precision + recall)
    
    return f1

# Example usage
true_labels = [1, 0, 1, 1, 0, 1, 0, 0, 1, 0]
predicted_labels = [1, 0, 1, 0, 0, 1, 1, 0, 1, 0]

f1 = f1_score(true_labels, predicted_labels)
print(f"F1 Score: {f1:.2f}")


F1 Score: 0.80


10. Visualize Data Distribution Using a Histogram

In [13]:


from typing import List, Dict

def create_histogram(data: List[float], bins: int) -> Dict[str, int]:
    if bins <= 0:
        raise ValueError("The number of bins must be a positive integer.")
    
    if not data:
        return {}
    
    # Step 1: Calculate the range of the data
    min_value = min(data)
    max_value = max(data)
    
    # Step 2: Calculate the width of each bin
    bin_width = (max_value - min_value) / bins
    
    # Step 3: Create the histogram dictionary
    histogram = {}
    
    # Step 4: Count data points in each bin
    for i in range(bins):
        lower_bound = min_value + i * bin_width
        upper_bound = min_value + (i + 1) * bin_width
        bin_range = f"[{lower_bound:.2f}, {upper_bound:.2f})"  # Format for bin range
        count = sum(1 for value in data if lower_bound <= value < upper_bound)
        
        # Add to the histogram dictionary
        histogram[bin_range] = count
    
    return histogram

# Example usage
data = [1.5, 2.3, 2.9, 3.7, 4.1, 4.8, 5.5, 5.8, 6.3, 7.0]
bins = 4
histogram = create_histogram(data, bins)
print(histogram)


{'[1.50, 2.88)': 2, '[2.88, 4.25)': 3, '[4.25, 5.62)': 2, '[5.62, 7.00)': 2}


11. Implement a Decision Tree Classifier

In [14]:

from typing import List, Tuple, Any

class TreeNode:
    def __init__(self, feature_index=None, threshold=None, left=None, right=None, value=None):
        self.feature_index = feature_index  # Index of the feature to split
        self.threshold = threshold            # Threshold value for the split
        self.left = left                      # Left subtree
        self.right = right                    # Right subtree
        self.value = value                    # Class label if leaf node

def gini_impurity(labels: List[str]) -> float:
    """Calculate the Gini impurity for a list of labels."""
    if not labels:
        return 0
    total = len(labels)
    label_counts = {}
    for label in labels:
        label_counts[label] = label_counts.get(label, 0) + 1
    impurity = 1.0
    for count in label_counts.values():
        prob = count / total
        impurity -= prob ** 2
    return impurity

def best_split(data_points: List[Tuple[List[float], str]]) -> Tuple[int, float]:
    """Find the best feature and threshold to split the dataset."""
    best_impurity = float('inf')
    best_feature = None
    best_threshold = None

    features = list(zip(*[x for x, _ in data_points]))  # Transpose to get features

    for feature_index in range(len(features)):
        thresholds = sorted(set(features[feature_index]))
        for threshold in thresholds:
            left_labels = [label for (features, label) in data_points if features[feature_index] <= threshold]
            right_labels = [label for (features, label) in data_points if features[feature_index] > threshold]
            
            impurity = (len(left_labels) / len(data_points)) * gini_impurity(left_labels) + \
                       (len(right_labels) / len(data_points)) * gini_impurity(right_labels)

            if impurity < best_impurity:
                best_impurity = impurity
                best_feature = feature_index
                best_threshold = threshold

    return best_feature, best_threshold

def build_tree(data_points: List[Tuple[List[float], str]], depth: int = 0, max_depth: int = 5) -> TreeNode:
    """Recursively build the decision tree."""
    labels = [label for (_, label) in data_points]

    # If all labels are the same or max depth is reached, create a leaf node
    if len(set(labels)) == 1 or depth >= max_depth:
        return TreeNode(value=labels[0])

    # Find the best feature to split
    feature_index, threshold = best_split(data_points)

    if feature_index is None:
        return TreeNode(value=max(labels, key=labels.count))  # Return the majority label if no split found

    left_data = [(features, label) for (features, label) in data_points if features[feature_index] <= threshold]
    right_data = [(features, label) for (features, label) in data_points if features[feature_index] > threshold]

    # Create subtrees
    left_subtree = build_tree(left_data, depth + 1, max_depth)
    right_subtree = build_tree(right_data, depth + 1, max_depth)

    return TreeNode(feature_index=feature_index, threshold=threshold, left=left_subtree, right=right_subtree)

def predict(tree: TreeNode, instance: List[float]) -> str:
    """Make a prediction for a new instance."""
    if tree.value is not None:  # Leaf node
        return tree.value
    if instance[tree.feature_index] <= tree.threshold:
        return predict(tree.left, instance)
    else:
        return predict(tree.right, instance)

def decision_tree_classifier(data_points: List[Tuple[List[float], str]], new_point: List[float]) -> str:
    """Predict the label of a new instance using a decision tree."""
    tree = build_tree(data_points)
    return predict(tree, new_point)

# Example usage
data_points = [
    ([1.0, 2.0], 'A'),
    ([1.5, 1.8], 'A'),
    ([5.0, 8.0], 'B'),
    ([6.0, 9.0], 'B'),
    ([1.0, 0.6], 'A'),
    ([5.5, 6.0], 'B')
]

new_point = [5.0, 7.0]
predicted_label = decision_tree_classifier(data_points, new_point)
print(f"Predicted label for {new_point}: {predicted_label}")


Predicted label for [5.0, 7.0]: B


12. Normalize Data Using Min-Max Scaling

In [3]:


from typing import List

def min_max_normalization(data: List[float]) -> List[float]:
    if not data:
        return []
    
    min_val = min(data)
    max_val = max(data)
    
    # Handle case when all data points are the same (to avoid division by zero)
    if min_val == max_val:
        return [0.0 for _ in data]
    
    normalized_data = [(x - min_val) / (max_val - min_val) for x in data]
    
    return normalized_data
data = [10, 20, 30, 40, 50]
normalized_data = min_max_normalization(data)
print(normalized_data)


[0.0, 0.25, 0.5, 0.75, 1.0]


13. Calculate distance between two points

In [8]:
from typing import List
import math

def euclidean_distance(point1: List[float], point2: List[float]) -> float:
    # Ensure both points have the same dimensionality
    if len(point1) != len(point2):
        raise ValueError("Points must have the same number of dimensions.")
    
    # Calculate the Euclidean distance
    distance = math.sqrt(sum((x - y) ** 2 for x, y in zip(point1, point2)))
    
    return distance
point1 = [1.0, 2.0, 3.0]
point2 = [4.0, 6.0, 8.0]

distance = euclidean_distance(point1, point2)
print(distance)

7.0710678118654755
