Skip to content

Add t-SNE algorithm #13499

@itsomg134

Description

@itsomg134

Feature description

"""
The Algorithms - Python
A comprehensive collection of algorithms implemented in Python for educational purposes.
All implementations are designed for learning and may not be optimized for production use.
"""

import math
from typing import List, Optional, Tuple, Any
from collections import deque, defaultdict
import heapq

==================== SORTING ALGORITHMS ====================

def bubble_sort(arr: List[int]) -> List[int]:
"""
Bubble Sort Algorithm
Time Complexity: O(n²), Space Complexity: O(1)
"""
n = len(arr)
for i in range(n):
swapped = False
for j in range(0, n - i - 1):
if arr[j] > arr[j + 1]:
arr[j], arr[j + 1] = arr[j + 1], arr[j]
swapped = True
if not swapped:
break
return arr

def quick_sort(arr: List[int]) -> List[int]:
"""
Quick Sort Algorithm
Time Complexity: O(n log n) average, O(n²) worst
Space Complexity: O(log n)
"""
if len(arr) <= 1:
return arr
pivot = arr[len(arr) // 2]
left = [x for x in arr if x < pivot]
middle = [x for x in arr if x == pivot]
right = [x for x in arr if x > pivot]
return quick_sort(left) + middle + quick_sort(right)

def merge_sort(arr: List[int]) -> List[int]:
"""
Merge Sort Algorithm
Time Complexity: O(n log n), Space Complexity: O(n)
"""
if len(arr) <= 1:
return arr

mid = len(arr) // 2
left = merge_sort(arr[:mid])
right = merge_sort(arr[mid:])

return merge(left, right)

def merge(left: List[int], right: List[int]) -> List[int]:
"""Helper function for merge sort"""
result = []
i = j = 0

while i < len(left) and j < len(right):
    if left[i] <= right[j]:
        result.append(left[i])
        i += 1
    else:
        result.append(right[j])
        j += 1

result.extend(left[i:])
result.extend(right[j:])
return result

def heap_sort(arr: List[int]) -> List[int]:
"""
Heap Sort Algorithm
Time Complexity: O(n log n), Space Complexity: O(1)
"""
def heapify(arr, n, i):
largest = i
left = 2 * i + 1
right = 2 * i + 2

    if left < n and arr[left] > arr[largest]:
        largest = left
    if right < n and arr[right] > arr[largest]:
        largest = right
    
    if largest != i:
        arr[i], arr[largest] = arr[largest], arr[i]
        heapify(arr, n, largest)

n = len(arr)
for i in range(n // 2 - 1, -1, -1):
    heapify(arr, n, i)

for i in range(n - 1, 0, -1):
    arr[0], arr[i] = arr[i], arr[0]
    heapify(arr, i, 0)

return arr

==================== SEARCHING ALGORITHMS ====================

def binary_search(arr: List[int], target: int) -> int:
"""
Binary Search Algorithm (iterative)
Time Complexity: O(log n), Space Complexity: O(1)
Returns: index of target or -1 if not found
"""
left, right = 0, len(arr) - 1

while left <= right:
    mid = (left + right) // 2
    if arr[mid] == target:
        return mid
    elif arr[mid] < target:
        left = mid + 1
    else:
        right = mid - 1

return -1

def binary_search_recursive(arr: List[int], target: int, left: int = 0, right: int = None) -> int:
"""
Binary Search Algorithm (recursive)
Time Complexity: O(log n), Space Complexity: O(log n)
"""
if right is None:
right = len(arr) - 1

if left > right:
    return -1

mid = (left + right) // 2
if arr[mid] == target:
    return mid
elif arr[mid] < target:
    return binary_search_recursive(arr, target, mid + 1, right)
else:
    return binary_search_recursive(arr, target, left, mid - 1)

def linear_search(arr: List[int], target: int) -> int:
"""
Linear Search Algorithm
Time Complexity: O(n), Space Complexity: O(1)
"""
for i, val in enumerate(arr):
if val == target:
return i
return -1

def jump_search(arr: List[int], target: int) -> int:
"""
Jump Search Algorithm
Time Complexity: O(√n), Space Complexity: O(1)
"""
n = len(arr)
step = int(math.sqrt(n))
prev = 0

while arr[min(step, n) - 1] < target:
    prev = step
    step += int(math.sqrt(n))
    if prev >= n:
        return -1

while arr[prev] < target:
    prev += 1
    if prev == min(step, n):
        return -1

if arr[prev] == target:
    return prev

return -1

==================== GRAPH ALGORITHMS ====================

class Graph:
"""Graph representation using adjacency list"""

def __init__(self):
    self.graph = defaultdict(list)

def add_edge(self, u: int, v: int):
    """Add an edge to the graph"""
    self.graph[u].append(v)

def bfs(self, start: int) -> List[int]:
    """
    Breadth-First Search
    Time Complexity: O(V + E), Space Complexity: O(V)
    """
    visited = set()
    queue = deque([start])
    result = []
    
    while queue:
        vertex = queue.popleft()
        if vertex not in visited:
            visited.add(vertex)
            result.append(vertex)
            queue.extend([v for v in self.graph[vertex] if v not in visited])
    
    return result

def dfs(self, start: int) -> List[int]:
    """
    Depth-First Search (iterative)
    Time Complexity: O(V + E), Space Complexity: O(V)
    """
    visited = set()
    stack = [start]
    result = []
    
    while stack:
        vertex = stack.pop()
        if vertex not in visited:
            visited.add(vertex)
            result.append(vertex)
            stack.extend(reversed(self.graph[vertex]))
    
    return result

def dfs_recursive(self, start: int, visited: Optional[set] = None, result: Optional[List[int]] = None) -> List[int]:
    """
    Depth-First Search (recursive)
    Time Complexity: O(V + E), Space Complexity: O(V)
    """
    if visited is None:
        visited = set()
    if result is None:
        result = []
    
    visited.add(start)
    result.append(start)
    
    for neighbor in self.graph[start]:
        if neighbor not in visited:
            self.dfs_recursive(neighbor, visited, result)
    
    return result

def dijkstra(graph: dict, start: int) -> dict:
"""
Dijkstra's Shortest Path Algorithm
Time Complexity: O((V + E) log V)
graph format: {node: [(neighbor, weight), ...]}
"""
distances = {node: float('inf') for node in graph}
distances[start] = 0
pq = [(0, start)]

while pq:
    current_dist, current = heapq.heappop(pq)
    
    if current_dist > distances[current]:
        continue
    
    for neighbor, weight in graph.get(current, []):
        distance = current_dist + weight
        if distance < distances[neighbor]:
            distances[neighbor] = distance
            heapq.heappush(pq, (distance, neighbor))

return distances

==================== DYNAMIC PROGRAMMING ====================

def fibonacci(n: int) -> int:
"""
Fibonacci Number (Dynamic Programming)
Time Complexity: O(n), Space Complexity: O(n)
"""
if n <= 1:
return n

dp = [0] * (n + 1)
dp[1] = 1

for i in range(2, n + 1):
    dp[i] = dp[i - 1] + dp[i - 2]

return dp[n]

def fibonacci_optimized(n: int) -> int:
"""
Fibonacci Number (Space Optimized)
Time Complexity: O(n), Space Complexity: O(1)
"""
if n <= 1:
return n

a, b = 0, 1
for _ in range(2, n + 1):
    a, b = b, a + b

return b

def knapsack_01(weights: List[int], values: List[int], capacity: int) -> int:
"""
0/1 Knapsack Problem
Time Complexity: O(n * capacity), Space Complexity: O(n * capacity)
"""
n = len(weights)
dp = [[0] * (capacity + 1) for _ in range(n + 1)]

for i in range(1, n + 1):
    for w in range(1, capacity + 1):
        if weights[i - 1] <= w:
            dp[i][w] = max(values[i - 1] + dp[i - 1][w - weights[i - 1]], dp[i - 1][w])
        else:
            dp[i][w] = dp[i - 1][w]

return dp[n][capacity]

def longest_common_subsequence(text1: str, text2: str) -> int:
"""
Longest Common Subsequence
Time Complexity: O(m * n), Space Complexity: O(m * n)
"""
m, n = len(text1), len(text2)
dp = [[0] * (n + 1) for _ in range(m + 1)]

for i in range(1, m + 1):
    for j in range(1, n + 1):
        if text1[i - 1] == text2[j - 1]:
            dp[i][j] = dp[i - 1][j - 1] + 1
        else:
            dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])

return dp[m][n]

def coin_change(coins: List[int], amount: int) -> int:
"""
Coin Change Problem (minimum coins)
Time Complexity: O(amount * len(coins))
Returns: minimum coins needed or -1 if impossible
"""
dp = [float('inf')] * (amount + 1)
dp[0] = 0

for coin in coins:
    for x in range(coin, amount + 1):
        dp[x] = min(dp[x], dp[x - coin] + 1)

return dp[amount] if dp[amount] != float('inf') else -1

==================== STRING ALGORITHMS ====================

def is_palindrome(s: str) -> bool:
"""
Check if string is palindrome
Time Complexity: O(n), Space Complexity: O(1)
"""
left, right = 0, len(s) - 1

while left < right:
    if s[left] != s[right]:
        return False
    left += 1
    right -= 1

return True

def kmp_search(text: str, pattern: str) -> List[int]:
"""
Knuth-Morris-Pratt Pattern Matching
Time Complexity: O(n + m), Space Complexity: O(m)
Returns: list of starting indices where pattern is found
"""
def compute_lps(pattern):
lps = [0] * len(pattern)
length = 0
i = 1

    while i < len(pattern):
        if pattern[i] == pattern[length]:
            length += 1
            lps[i] = length
            i += 1
        else:
            if length != 0:
                length = lps[length - 1]
            else:
                lps[i] = 0
                i += 1
    return lps

if not pattern:
    return []

lps = compute_lps(pattern)
result = []
i = j = 0

while i < len(text):
    if pattern[j] == text[i]:
        i += 1
        j += 1
    
    if j == len(pattern):
        result.append(i - j)
        j = lps[j - 1]
    elif i < len(text) and pattern[j] != text[i]:
        if j != 0:
            j = lps[j - 1]
        else:
            i += 1

return result

def longest_palindrome_substring(s: str) -> str:
"""
Find longest palindromic substring
Time Complexity: O(n²), Space Complexity: O(1)
"""
if not s:
return ""

def expand_around_center(left, right):
    while left >= 0 and right < len(s) and s[left] == s[right]:
        left -= 1
        right += 1
    return right - left - 1

start = end = 0

for i in range(len(s)):
    len1 = expand_around_center(i, i)
    len2 = expand_around_center(i, i + 1)
    max_len = max(len1, len2)
    
    if max_len > end - start:
        start = i - (max_len - 1) // 2
        end = i + max_len // 2

return s[start:end + 1]

==================== MATHEMATICAL ALGORITHMS ====================

def gcd(a: int, b: int) -> int:
"""
Greatest Common Divisor (Euclidean Algorithm)
Time Complexity: O(log(min(a, b)))
"""
while b:
a, b = b, a % b
return a

def lcm(a: int, b: int) -> int:
"""
Least Common Multiple
Time Complexity: O(log(min(a, b)))
"""
return abs(a * b) // gcd(a, b)

def is_prime(n: int) -> bool:
"""
Prime Number Check
Time Complexity: O(√n)
"""
if n < 2:
return False
if n == 2:
return True
if n % 2 == 0:
return False

for i in range(3, int(math.sqrt(n)) + 1, 2):
    if n % i == 0:
        return False

return True

def sieve_of_eratosthenes(n: int) -> List[int]:
"""
Find all primes up to n
Time Complexity: O(n log log n)
"""
if n < 2:
return []

is_prime = [True] * (n + 1)
is_prime[0] = is_prime[1] = False

for i in range(2, int(math.sqrt(n)) + 1):
    if is_prime[i]:
        for j in range(i * i, n + 1, i):
            is_prime[j] = False

return [i for i in range(n + 1) if is_prime[i]]

def power(base: int, exp: int, mod: int = None) -> int:
"""
Fast Exponentiation (Binary Exponentiation)
Time Complexity: O(log exp)
"""
result = 1
base = base % mod if mod else base

while exp > 0:
    if exp % 2 == 1:
        result = (result * base) % mod if mod else result * base
    exp = exp >> 1
    base = (base * base) % mod if mod else base * base

return result

==================== DATA STRUCTURES ====================

class Stack:
"""Stack implementation using list"""

def __init__(self):
    self.items = []

def push(self, item: Any):
    self.items.append(item)

def pop(self) -> Any:
    if not self.is_empty():
        return self.items.pop()
    raise IndexError("Pop from empty stack")

def peek(self) -> Any:
    if not self.is_empty():
        return self.items[-1]
    raise IndexError("Peek from empty stack")

def is_empty(self) -> bool:
    return len(self.items) == 0

def size(self) -> int:
    return len(self.items)

class Queue:
"""Queue implementation using deque"""

def __init__(self):
    self.items = deque()

def enqueue(self, item: Any):
    self.items.append(item)

def dequeue(self) -> Any:
    if not self.is_empty():
        return self.items.popleft()
    raise IndexError("Dequeue from empty queue")

def is_empty(self) -> bool:
    return len(self.items) == 0

def size(self) -> int:
    return len(self.items)

==================== MACHINE LEARNING ALGORITHMS ====================

class TSNE:
"""
t-SNE (t-Distributed Stochastic Neighbor Embedding)
Dimensionality reduction algorithm for visualization

Reference: Van der Maaten & Hinton (2008)
Time Complexity: O(n² * iterations)
"""

def __init__(self, n_components: int = 2, perplexity: float = 30.0, 
             learning_rate: float = 200.0, n_iter: int = 1000):
    """
    Initialize t-SNE
    
    Args:
        n_components: Target dimensionality (typically 2 or 3)
        perplexity: Balance between local and global aspects (5-50)
        learning_rate: Learning rate for gradient descent
        n_iter: Number of iterations
    """
    self.n_components = n_components
    self.perplexity = perplexity
    self.learning_rate = learning_rate
    self.n_iter = n_iter
    self.embedding_ = None

def _euclidean_distance(self, X: List[List[float]]) -> List[List[float]]:
    """Compute pairwise Euclidean distances"""
    n = len(X)
    distances = [[0.0] * n for _ in range(n)]
    
    for i in range(n):
        for j in range(i + 1, n):
            dist = sum((X[i][k] - X[j][k]) ** 2 for k in range(len(X[0])))
            dist = math.sqrt(dist)
            distances[i][j] = dist
            distances[j][i] = dist
    
    return distances

def _compute_perplexity(self, D: List[float], beta: float) -> Tuple[float, List[float]]:
    """Compute perplexity and probabilities for a given beta"""
    P = [math.exp(-D[j] * beta) for j in range(len(D))]
    sum_P = sum(P)
    
    if sum_P == 0:
        sum_P = 1e-8
    
    P = [p / sum_P for p in P]
    
    H = -sum(p * math.log2(p + 1e-8) for p in P if p > 0)
    
    return H, P

def _binary_search_perplexity(self, distances_i: List[float], target_perplexity: float, 
                               tol: float = 1e-5, max_iter: int = 50) -> List[float]:
    """Binary search for sigma that produces target perplexity"""
    beta_min = -float('inf')
    beta_max = float('inf')
    beta = 1.0
    
    for _ in range(max_iter):
        H, P = self._compute_perplexity(distances_i, beta)
        H_diff = H - math.log2(target_perplexity)
        
        if abs(H_diff) < tol:
            break
        
        if H_diff > 0:
            beta_min = beta
            beta = (beta + beta_max) / 2 if beta_max != float('inf') else beta * 2
        else:
            beta_max = beta
            beta = (beta + beta_min) / 2 if beta_min != -float('inf') else beta / 2
    
    return P

def _compute_joint_probabilities(self, X: List[List[float]]) -> List[List[float]]:
    """Compute joint probability matrix P from high-dimensional data"""
    n = len(X)
    distances = self._euclidean_distance(X)
    P = [[0.0] * n for _ in range(n)]
    
    # Compute conditional probabilities
    for i in range(n):
        distances_i = [distances[i][j] if j != i else float('inf') for j in range(n)]
        P_i = self._binary_search_perplexity(distances_i, self.perplexity)
        
        for j in range(n):
            if i != j:
                P[i][j] = P_i[j]
    
    # Symmetrize and normalize
    for i in range(n):
        for j in range(n):
            P[i][j] = (P[i][j] + P[j][i]) / (2 * n)
            P[i][j] = max(P[i][j], 1e-12)  # Numerical stability
    
    return P

def _compute_low_dim_affinities(self, Y: List[List[float]]) -> List[List[float]]:
    """Compute Student t-distribution affinities in low-dimensional space"""
    n = len(Y)
    Q = [[0.0] * n for _ in range(n)]
    
    for i in range(n):
        for j in range(n):
            if i != j:
                dist_sq = sum((Y[i][k] - Y[j][k]) ** 2 for k in range(len(Y[0])))
                Q[i][j] = 1.0 / (1.0 + dist_sq)
    
    # Normalize
    sum_Q = sum(sum(row) for row in Q)
    if sum_Q > 0:
        Q = [[q / sum_Q for q in row] for row in Q]
    
    # Ensure numerical stability
    for i in range(n):
        for j in range(n):
            Q[i][j] = max(Q[i][j], 1e-12)
    
    return Q

def _compute_gradient(self, P: List[List[float]], Q: List[List[float]], 
                     Y: List[List[float]]) -> List[List[float]]:
    """Compute gradient of KL divergence"""
    n = len(Y)
    n_components = len(Y[0])
    gradient = [[0.0] * n_components for _ in range(n)]
    
    for i in range(n):
        for j in range(n):
            if i != j:
                dist_sq = sum((Y[i][k] - Y[j][k]) ** 2 for k in range(n_components))
                mult = (P[i][j] - Q[i][j]) / (1.0 + dist_sq)
                
                for d in range(n_components):
                    gradient[i][d] += 4 * mult * (Y[i][d] - Y[j][d])
    
    return gradient

def fit_transform(self, X: List[List[float]]) -> List[List[float]]:
    """
    Fit t-SNE and return embedded coordinates
    
    Args:
        X: High-dimensional data (n_samples × n_features)
    
    Returns:
        Y: Low-dimensional embedding (n_samples × n_components)
    """
    n = len(X)
    
    # Initialize low-dimensional embedding randomly
    import random
    Y = [[random.gauss(0, 0.0001) for _ in range(self.n_components)] for _ in range(n)]
    
    # Compute pairwise affinities in high-dimensional space
    P = self._compute_joint_probabilities(X)
    
    # Early exaggeration
    for i in range(n):
        for j in range(n):
            P[i][j] *= 4.0
    
    # Gradient descent with momentum
    Y_momentum = [[0.0] * self.n_components for _ in range(n)]
    
    for iteration in range(self.n_iter):
        # Remove early exaggeration after 250 iterations
        if iteration == 250:
            for i in range(n):
                for j in range(n):
                    P[i][j] /= 4.0
        
        # Compute Q matrix (low-dimensional affinities)
        Q = self._compute_low_dim_affinities(Y)
        
        # Compute gradient
        gradient = self._compute_gradient(P, Q, Y)
        
        # Update with momentum
        momentum = 0.5 if iteration < 250 else 0.8
        
        for i in range(n):
            for d in range(self.n_components):
                Y_momentum[i][d] = momentum * Y_momentum[i][d] - self.learning_rate * gradient[i][d]
                Y[i][d] += Y_momentum[i][d]
        
        # Center the embedding
        means = [sum(Y[i][d] for i in range(n)) / n for d in range(self.n_components)]
        for i in range(n):
            for d in range(self.n_components):
                Y[i][d] -= means[d]
        
        # Print progress every 100 iterations
        if (iteration + 1) % 100 == 0:
            # Compute KL divergence
            kl_div = sum(P[i][j] * math.log((P[i][j] + 1e-12) / (Q[i][j] + 1e-12)) 
                       for i in range(n) for j in range(n) if i != j)
            print(f"Iteration {iteration + 1}/{self.n_iter}, KL divergence: {kl_div:.4f}")
    
    self.embedding_ = Y
    return Y

==================== DEMO USAGE ====================

if name == "main":
print("=" * 50)
print("THE ALGORITHMS - PYTHON DEMO")
print("=" * 50)

# Sorting Demo
print("\n--- SORTING ALGORITHMS ---")
arr = [64, 34, 25, 12, 22, 11, 90]
print(f"Original: {arr}")
print(f"Bubble Sort: {bubble_sort(arr.copy())}")
print(f"Quick Sort: {quick_sort(arr.copy())}")
print(f"Merge Sort: {merge_sort(arr.copy())}")

# Searching Demo
print("\n--- SEARCHING ALGORITHMS ---")
sorted_arr = [1, 3, 5, 7, 9, 11, 13, 15]
target = 7
print(f"Array: {sorted_arr}, Target: {target}")
print(f"Binary Search: Index {binary_search(sorted_arr, target)}")

# Dynamic Programming Demo
print("\n--- DYNAMIC PROGRAMMING ---")
print(f"Fibonacci(10): {fibonacci(10)}")
print(f"LCS('ABCDGH', 'AEDFHR'): {longest_common_subsequence('ABCDGH', 'AEDFHR')}")

# Math Demo
print("\n--- MATHEMATICAL ALGORITHMS ---")
print(f"GCD(48, 18): {gcd(48, 18)}")
print(f"Is 17 prime? {is_prime(17)}")
print(f"Primes up to 30: {sieve_of_eratosthenes(30)}")

print("\n" + "=" * 50)
print("Implementations complete! Visit TheAlgorithms/Python on GitHub for more!")
print("=" * 50)

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementThis PR modified some existing files

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions