In [None]:
from typing import List, Tuple
import math
from collections import Counter

# Euclidean distance calculation
def euclidean_distance(point1: Tuple[float, float], point2: Tuple[float, float]) -> float:
    return math.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)

# KNN classifier
def knn_classifier(data_points: List[Tuple[float, float, str]], new_point: Tuple[float, float], k: int = 3) -> str:
    # Step 1: Compute distances from new_point to each data point
    distances = []
    for point in data_points:
        distance = euclidean_distance(point[:2], new_point)
        distances.append((distance, point[2]))  # Store distance and label
    
    # Step 2: Sort by distance (smallest distance first)
    distances.sort(key=lambda x: x[0])
    
    # Step 3: Select the top k closest points
    k_nearest_neighbors = distances[:k]
    
    # Step 4: Vote by majority label
    labels = [label for _, label in k_nearest_neighbors]
    most_common_label = Counter(labels).most_common(1)[0][0]
    #print(distances,labels,most_common_label)
    return most_common_label

# Example usage
data_points = [
    (1.0, 1.0, 'A'),
    (2.0, 2.0, 'A'),
    (3.0, 3.0, 'B'),
    (6.0, 5.0, 'B'),
    (7.0, 7.0, 'B')
]

new_point = (3.5, 3.5)
predicted_label = knn_classifier(data_points, new_point)
print(f"The predicted label for the point {new_point} is: {predicted_label}")


In [None]:
from typing import List
import statistics

def remove_outliers(data: List[float]) -> List[float]:
    if len(data) < 2:
        # If there is not enough data, return the original list
        return data
    
    # Step 1: Calculate the mean and standard deviation
    mean = statistics.mean(data)
    std_dev = statistics.stdev(data)
    
    # Step 2: Define the threshold for outliers
    lower_bound = mean - 2 * std_dev
    upper_bound = mean + 2 * std_dev
    
    # Step 3: Filter out the outliers
    filtered_data = [x for x in data if lower_bound <= x <= upper_bound]
    
    return filtered_data

# Example usage
data = [10, 12, 15, 18, 22, 25, 100, 110]  # 100 and 110 are likely outliers
cleaned_data = remove_outliers(data)
print(cleaned_data)


In [None]:
from typing import List

def matrix_multiply(mat1: List[List[int]], mat2: List[List[int]]) -> List[List[int]]:
    # Check if the matrices can be multiplied
    if len(mat1[0]) != len(mat2):
        raise ValueError("Incompatible matrices for multiplication.")
    
    # Initialize the result matrix with zeros, size will be rows of mat1 x columns of mat2
    result = [[0 for _ in range(len(mat2[0]))] for _ in range(len(mat1))]
    
    # Perform matrix multiplication
    for i in range(len(mat1)):  # iterate over rows of mat1
        for j in range(len(mat2[0])):  # iterate over columns of mat2
            for k in range(len(mat1[0])):  # iterate over elements of row/column
                result[i][j] += mat1[i][k] * mat2[k][j]
    
    return result

# Example usage:
mat1 = [
    [1, 2, 3],
    [4, 5, 6]
]

mat2 = [
    [7, 8],
    [9, 10],
    [11, 12]
]

# This should return a 2x2 matrix
result = matrix_multiply(mat1, mat2)
print(result)


In [None]:
from typing import List
import math

def cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
    # Step 1: Calculate the dot product
    dot_product = sum(v1 * v2 for v1, v2 in zip(vec1, vec2))
    
    # Step 2: Calculate the magnitudes (norms) of each vector
    magnitude_vec1 = math.sqrt(sum(v ** 2 for v in vec1))
    magnitude_vec2 = math.sqrt(sum(v ** 2 for v in vec2))
    
    # Step 3: Calculate the cosine similarity
    if magnitude_vec1 == 0 or magnitude_vec2 == 0:
        raise ValueError("One of the vectors has zero magnitude, cannot compute cosine similarity.")
    
    return dot_product / (magnitude_vec1 * magnitude_vec2)

# Example usage
vec1 = [1.0, 2.0, 3.0]
vec2 = [4.0, 5.0, 6.0]

similarity = cosine_similarity(vec1, vec2)
print(f"Cosine similarity: {similarity:.4f}")


In [None]:
import heapq

class MinHeap:
    def __init__(self):
        # Initialize an empty list to store heap elements
        self.heap = []
    
    def insert(self, value: int) -> None:
        """Insert a value into the heap."""
        heapq.heappush(self.heap, value)
    
    def get_min(self) -> int:
        """Get the smallest value from the heap without removing it."""
        if not self.heap:
            raise IndexError("get_min from an empty heap")
        return self.heap[0]
    
    def extract_min(self) -> int:
        """Extract (remove and return) the smallest value from the heap."""
        if not self.heap:
            raise IndexError("extract_min from an empty heap")
        return heapq.heappop(self.heap)

# Example usage
min_heap = MinHeap()
min_heap.insert(10)
min_heap.insert(4)
min_heap.insert(7)
min_heap.insert(1)

print(f"Minimum value: {min_heap.get_min()}")  # Should return 1
print(f"Extracted minimum: {min_heap.extract_min()}")  # Should return 1
print(f"Minimum after extraction: {min_heap.get_min()}")  # Should return 4


In [None]:
import heapq

class MinHeap:
    def __init__(self):
        # Initialize an empty list to store heap elements
        self.heap = []
    
    def insert(self, value: int) -> None:
        """Insert a value into the heap."""
        heapq.heappush(self.heap, value)
    
    def get_min(self) -> int:
        """Get the smallest value from the heap without removing it."""
        if not self.heap:
            raise IndexError("get_min from an empty heap")
        return self.heap[0]
    
    def extract_min(self) -> int:
        """Extract (remove and return) the smallest value from the heap."""
        if not self.heap:
            raise IndexError("extract_min from an empty heap")
        return heapq.heappop(self.heap)

# Example usage
min_heap = MinHeap()
min_heap.insert(10)
min_heap.insert(4)
min_heap.insert(7)
min_heap.insert(1)

print(f"Minimum value: {min_heap.get_min()}")  # Should return 1
print(f"Extracted minimum: {min_heap.extract_min()}")  # Should return 1
print(f"Minimum after extraction: {min_heap.get_min()}")  # Should return 4


In [None]:
from typing import List, Tuple

def sign(value: float) -> int:
    """Returns 1 if value is positive, -1 if negative."""
    return 1 if value >= 0 else -1

def svm_classifier(data_points: List[Tuple[float, float, str]], new_point: Tuple[float, float]) -> str:
    # Step 1: Assign numeric values to the labels for internal processing
    label_map = {'A': 1, 'B': -1}
    reverse_label_map = {1: 'A', -1: 'B'}
    
    # Step 2: Initialize weights (w) and bias (b) to zero
    w = [0.0, 0.0]  # weight vector for the linear hyperplane
    b = 0.0  # bias term
    
    # Step 3: Train a simple SVM using a simplified algorithm (linear perceptron-like training)
    learning_rate = 0.1
    epochs = 100  # number of iterations to update weights
    
    for _ in range(epochs):
        for point in data_points:
            x, y, label = point
            mapped_label = label_map[label]
            
            # Compute the prediction: y' = sign(w * x + b)
            prediction = mapped_label * (w[0] * x + w[1] * y + b)
            
            # If the point is misclassified, update weights and bias
            if prediction <= 0:
                w[0] += learning_rate * mapped_label * x
                w[1] += learning_rate * mapped_label * y
                b += learning_rate * mapped_label
    
    # Step 4: Classify the new point
    new_x, new_y = new_point
    result = sign(w[0] * new_x + w[1] * new_y + b)
    
    # Return the label for the new point based on the result
    return reverse_label_map[result]

# Example usage
data_points = [
    (1.0, 2.0, 'A'),
    (2.0, 3.0, 'A'),
    (3.0, 3.0, 'B'),
    (5.0, 4.0, 'B'),
    (1.0, 1.0, 'A')
]

new_point = (2.5, 2.5)
predicted_label = svm_classifier(data_points, new_point)
print(f"The predicted label for the point {new_point} is: {predicted_label}")


In [None]:
from typing import List
import statistics

def calculate_z_scores(data: List[float]) -> List[float]:
    if len(data) < 2:
        raise ValueError("Data must contain at least two numbers to calculate z-scores.")
    
    # Step 1: Calculate the mean and standard deviation
    mean = statistics.mean(data)
    std_dev = statistics.stdev(data)
    
    # Step 2: Calculate the z-scores for each element in the list
    z_scores = [(x - mean) / std_dev for x in data]
    
    return z_scores

# Example usage
data = [10, 20, 30, 40, 50]
z_scores = calculate_z_scores(data)
print(f"Z-scores: {z_scores}")


In [None]:
import random
from typing import List, Tuple
from math import sqrt

def euclidean_distance(p1: Tuple[float, float], p2: Tuple[float, float]) -> float:
    """Calculate the Euclidean distance between two 2D points."""
    return sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2)

def calculate_centroid(points: List[Tuple[float, float]]) -> Tuple[float, float]:
    """Calculate the centroid (mean point) of a list of 2D points."""
    if len(points) == 0:
        return (0.0, 0.0)
    
    x_coords = [p[0] for p in points]
    y_coords = [p[1] for p in points]
    return (sum(x_coords) / len(points), sum(y_coords) / len(points))

def k_means_clustering(data_points: List[Tuple[float, float]], k: int) -> List[Tuple[float, float]]:
    # Step 1: Randomly initialize k centroids
    centroids = random.sample(data_points, k)
    
    # Step 2: Loop until convergence (when centroids don't change)
    previous_centroids = []
    while centroids != previous_centroids:
        # Save the old centroids for convergence check
        previous_centroids = centroids[:]
        
        # Step 3: Assign each data point to the nearest centroid
        clusters = {i: [] for i in range(k)}  # Create a dictionary to hold clusters
        
        for point in data_points:
            distances = [euclidean_distance(point, centroid) for centroid in centroids]
            nearest_centroid_index = distances.index(min(distances))
            clusters[nearest_centroid_index].append(point)
        
        # Step 4: Update the centroids by calculating the mean of each cluster
        centroids = [calculate_centroid(clusters[i]) for i in range(k)]
    
    # Return the final centroids after convergence
    return centroids

# Example usage:
data_points = [
    (2.0, 3.0), (5.0, 8.0), (1.5, 1.8), (6.0, 9.0), 
    (2.5, 2.5), (8.0, 8.0), (1.0, 0.6), (9.0, 11.0)
]

k = 3
final_centroids = k_means_clustering(data_points, k)
print(f"Final centroids: {final_centroids}")


In [None]:
from typing import List

def f1_score(true_labels: List[int], predicted_labels: List[int]) -> float:
    # Step 1: Calculate true positives (TP), false positives (FP), and false negatives (FN)
    tp = sum(1 for true, pred in zip(true_labels, predicted_labels) if true == 1 and pred == 1)
    fp = sum(1 for true, pred in zip(true_labels, predicted_labels) if true == 0 and pred == 1)
    fn = sum(1 for true, pred in zip(true_labels, predicted_labels) if true == 1 and pred == 0)
    
    # Step 2: Calculate precision and recall
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    
    # Step 3: Calculate the F1 score
    if precision + recall == 0:
        return 0.0  # To avoid division by zero, return 0 if both precision and recall are 0
    f1 = 2 * (precision * recall) / (precision + recall)
    
    return f1

# Example usage
true_labels = [1, 0, 1, 1, 0, 1, 0, 0, 1, 0]
predicted_labels = [1, 0, 1, 0, 0, 1, 1, 0, 1, 0]

f1 = f1_score(true_labels, predicted_labels)
print(f"F1 Score: {f1:.6f}")



In [None]:
from typing import List, Dict

def create_histogram(data: List[float], bins: int) -> Dict[str, int]:
    if bins <= 0:
        raise ValueError("Number of bins must be a positive integer.")
    
    # Step 1: Determine the range of the data
    min_value = min(data)
    max_value = max(data)
    
    # Step 2: Calculate the width of each bin
    bin_width = (max_value - min_value) / bins
    
    # Step 3: Initialize the histogram dictionary
    histogram = {}
    
    # Step 4: Create bins and count the occurrences
    for i in range(bins):
        # Define the bin range
        lower_bound = min_value + i * bin_width
        upper_bound = lower_bound + bin_width
        
        # Count the number of data points in the current bin
        count = sum(1 for x in data if lower_bound <= x < upper_bound)
        
        # Step 5: Format the bin range as a string and store in the dictionary
        bin_range = f"[{lower_bound:.2f}, {upper_bound:.2f})"
        histogram[bin_range] = count
    
    return histogram

# Example usage
data = [1.2, 2.5, 3.1, 4.8, 5.0, 6.7, 7.5, 8.0, 9.3, 10.1]
bins = 5
histogram = create_histogram(data, bins)
print("Histogram:", histogram)


In [None]:
from typing import List

def min_max_normalization(data: List[float]) -> List[float]:
    if not data:  # Check for empty list
        return []
    
    # Step 1: Find the minimum and maximum values
    min_value = min(data)
    max_value = max(data)

    # Step 2: Apply Min-Max normalization
    normalized_data = [(x - min_value) / (max_value - min_value) for x in data]
    
    return normalized_data

# Example usage
data = [10, 20, 30, 40, 50]
normalized_data = min_max_normalization(data)
print("Normalized Data:", normalized_data)


In [None]:
from typing import List
import math

def euclidean_distance(point1: List[float], point2: List[float]) -> float:
    """Calculate the Euclidean distance between two points in multidimensional space."""
    if len(point1) != len(point2):
        raise ValueError("Points must have the same dimensionality.")
    
    # Step 2 & 3: Calculate the squared differences and sum them up
    squared_diffs = [(p1 - p2) ** 2 for p1, p2 in zip(point1, point2)]
    distance = math.sqrt(sum(squared_diffs))  # Step 4: Take the square root
    
    return distance

# Example usage
point1 = [1.0, 2.0, 3.0]
point2 = [4.0, 5.0, 6.0]
distance = euclidean_distance(point1, point2)
print(f"Euclidean Distance: {distance}")
