From 8c581f7366fc97140ec13b47e20b3bcc0df8a904 Mon Sep 17 00:00:00 2001
From: Khansa435 <cs-2023-f-38@fcoe.edu.pk>
Date: Mon, 13 Oct 2025 22:56:28 +0500
Subject: [PATCH 01/14] Added t-SNE with Iris dataset example

---
 .../t_stochastic_neighbour_embedding.py       | 158 ++++++++++++++++++
 1 file changed, 158 insertions(+)
 create mode 100644 machine_learning/t_stochastic_neighbour_embedding.py

diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py
new file mode 100644
index 000000000000..bcf177567c46
--- /dev/null
+++ b/machine_learning/t_stochastic_neighbour_embedding.py
@@ -0,0 +1,158 @@
+import doctest
+
+import numpy as np
+from numpy import ndarray
+from sklearn.datasets import load_iris
+
+
+def collect_dataset() -> tuple[ndarray, ndarray]:
+    """
+    Load Iris dataset and return features and labels.
+    Returns:
+        tuple[ndarray, ndarray]: feature matrix and target labels
+    Example:
+    >>> x, y = collect_dataset()
+    >>> x.shape
+    (150, 4)
+    >>> y.shape
+    (150,)
+    """
+    data = load_iris()
+    return np.array(data.data), np.array(data.target)
+
+
+def compute_pairwise_affinities(data_x: ndarray, sigma: float = 1.0) -> ndarray:
+    """
+    Compute high-dimensional affinities (P matrix) using Gaussian kernel.
+    Args:
+        data_x: Input data of shape (n_samples, n_features)
+        sigma: Gaussian kernel bandwidth
+    Returns:
+        ndarray: Symmetrized probability matrix
+    Example:
+    >>> x = np.array([[0.0, 0.0], [1.0, 0.0]])
+    >>> p = compute_pairwise_affinities(x)
+    >>> float(round(p[0, 1], 3))
+    0.25
+    """
+    n_samples = data_x.shape[0]
+    sum_x = np.sum(np.square(data_x), axis=1)
+    dist_sq = np.add(np.add(-2 * np.dot(data_x, data_x.T), sum_x).T, sum_x)
+    p = np.exp(-dist_sq / (2 * sigma**2))
+    np.fill_diagonal(p, 0)
+    p /= np.sum(p)
+    return (p + p.T) / (2 * n_samples)
+
+
+def compute_low_dim_affinities(low_dim_embedding: ndarray) -> tuple[ndarray, ndarray]:
+    """
+    Compute low-dimensional affinities (Q matrix) using Student-t distribution.
+    Args:
+        low_dim_embedding: shape (n_samples, n_components)
+    Returns:
+        tuple[ndarray, ndarray]: Q probability matrix and numerator
+    Example:
+    >>> y = np.array([[0.0, 0.0], [1.0, 0.0]])
+    >>> q, num = compute_low_dim_affinities(y)
+    >>> q.shape
+    (2, 2)
+    """
+    sum_y = np.sum(np.square(low_dim_embedding), axis=1)
+    numerator = 1 / (
+        1
+        + np.add(
+            np.add(-2 * np.dot(low_dim_embedding, low_dim_embedding.T), sum_y).T,
+            sum_y,
+        )
+    )
+    np.fill_diagonal(numerator, 0)
+    q = numerator / np.sum(numerator)
+    return q, numerator
+
+
+def apply_tsne(
+    data_x: ndarray,
+    n_components: int = 2,
+    learning_rate: float = 200.0,
+    n_iter: int = 500,
+) -> ndarray:
+    """
+    Apply t-SNE for dimensionality reduction.
+    Args:
+        data_x: Original dataset (features)
+        n_components: Target dimension (2D or 3D)
+        learning_rate: Step size for gradient descent
+        n_iter: Number of iterations
+    Returns:
+        ndarray: Low-dimensional embedding of the data
+    Example:
+    >>> x, _ = collect_dataset()
+    >>> y_emb = apply_tsne(x, n_components=2, n_iter=50)
+    >>> y_emb.shape
+    (150, 2)
+    """
+    if n_components < 1 or n_iter < 1:
+        raise ValueError("n_components and n_iter must be >= 1")
+
+    n_samples = data_x.shape[0]
+    rng = np.random.default_rng()
+    y = rng.standard_normal((n_samples, n_components)) * 1e-4
+
+    p = compute_pairwise_affinities(data_x)
+    p = np.maximum(p, 1e-12)
+
+    y_inc = np.zeros_like(y)
+    momentum = 0.5
+
+    for i in range(n_iter):
+        q, num = compute_low_dim_affinities(y)
+        q = np.maximum(q, 1e-12)
+
+        pq = p - q
+        d_y = 4 * (
+            np.dot((pq * num), y)
+            - np.multiply(np.sum(pq * num, axis=1)[:, np.newaxis], y)
+        )
+
+        y_inc = momentum * y_inc - learning_rate * d_y
+        y += y_inc
+
+        if i == int(n_iter / 4):
+            momentum = 0.8
+
+    return y
+
+
+def main() -> None:
+    """
+    Run t-SNE on Iris dataset and display the first 5 embeddings.
+    Example:
+    >>> main()  # doctest: +ELLIPSIS
+    t-SNE embedding (first 5 points):
+    [[...
+    """
+    data_x,labels = collect_dataset()
+    y_emb = apply_tsne(data_x, n_components=2, n_iter=300)
+
+    if not isinstance(y_emb, np.ndarray):
+        raise TypeError("t-SNE embedding must be an ndarray")
+
+    print("t-SNE embedding (first 5 points):")
+    print(y_emb[:5])
+
+    # Optional visualization ( Ruff/mypy compliant)
+    import matplotlib.pyplot as plt
+    plt.scatter(
+        y_emb[:, 0],
+        y_emb[:, 1],
+        c=labels,
+        cmap="viridis"
+    )
+    plt.title("t-SNE Visualization of Iris Dataset")
+    plt.xlabel("Dimension 1")
+    plt.ylabel("Dimension 2")
+    plt.show()
+
+if __name__ == "__main__":
+    # doctest.testmod()
+    main()

From a394193c651085546f8b1c1dbcaa26b6334e6295 Mon Sep 17 00:00:00 2001
From: Khansa435 <cs-2023-f-38@fcoe.edu.pk>
Date: Mon, 13 Oct 2025 23:03:22 +0500
Subject: [PATCH 02/14] Added t-SNE with Iris dataset example

---
 machine_learning/t_stochastic_neighbour_embedding.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py
index bcf177567c46..0ba534719918 100644
--- a/machine_learning/t_stochastic_neighbour_embedding.py
+++ b/machine_learning/t_stochastic_neighbour_embedding.py
@@ -154,5 +154,5 @@ def main() -> None:
     plt.show()
 
 if __name__ == "__main__":
-    # doctest.testmod()
+    doctest.testmod()
     main()

From 165f516346f82367a2b438b2093ed1fcf22ef59b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 13 Oct 2025 18:29:09 +0000
Subject: [PATCH 03/14] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 machine_learning/t_stochastic_neighbour_embedding.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py
index 0ba534719918..76281d74e2ac 100644
--- a/machine_learning/t_stochastic_neighbour_embedding.py
+++ b/machine_learning/t_stochastic_neighbour_embedding.py
@@ -131,7 +131,7 @@ def main() -> None:
     t-SNE embedding (first 5 points):
     [[...
     """
-    data_x,labels = collect_dataset()
+    data_x, labels = collect_dataset()
     y_emb = apply_tsne(data_x, n_components=2, n_iter=300)
 
     if not isinstance(y_emb, np.ndarray):
@@ -142,17 +142,14 @@ def main() -> None:
 
     # Optional visualization ( Ruff/mypy compliant)
     import matplotlib.pyplot as plt
-    plt.scatter(
-        y_emb[:, 0],
-        y_emb[:, 1],
-        c=labels,
-        cmap="viridis"
-    )
+
+    plt.scatter(y_emb[:, 0], y_emb[:, 1], c=labels, cmap="viridis")
     plt.title("t-SNE Visualization of Iris Dataset")
     plt.xlabel("Dimension 1")
     plt.ylabel("Dimension 2")
     plt.show()
 
+
 if __name__ == "__main__":
     doctest.testmod()
     main()

From fb0fdb4a7521ca7830a21af330644ac005be6876 Mon Sep 17 00:00:00 2001
From: Khansa435 <cs-2023-f-38@fcoe.edu.pk>
Date: Tue, 14 Oct 2025 14:32:17 +0500
Subject: [PATCH 04/14] Updated with descriptive variables

---
 .../t_stochastic_neighbour_embedding.py       | 172 ++++++++++--------
 1 file changed, 96 insertions(+), 76 deletions(-)

diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py
index 76281d74e2ac..5712e4b386d0 100644
--- a/machine_learning/t_stochastic_neighbour_embedding.py
+++ b/machine_learning/t_stochastic_neighbour_embedding.py
@@ -1,5 +1,4 @@
 import doctest
-
 import numpy as np
 from numpy import ndarray
 from sklearn.datasets import load_iris
@@ -7,143 +6,163 @@
 
 def collect_dataset() -> tuple[ndarray, ndarray]:
     """
-    Load Iris dataset and return features and labels.
+    Load the Iris dataset and return features and labels.
+
     Returns:
-        tuple[ndarray, ndarray]: feature matrix and target labels
+        tuple[ndarray, ndarray]: Feature matrix and target labels.
+
     Example:
-    >>> x, y = collect_dataset()
-    >>> x.shape
-    (150, 4)
-    >>> y.shape
-    (150,)
+        >>> features, targets = collect_dataset()
+        >>> features.shape
+        (150, 4)
+        >>> targets.shape
+        (150,)
     """
-    data = load_iris()
-    return np.array(data.data), np.array(data.target)
+    iris_dataset = load_iris()
+    return np.array(iris_dataset.data), np.array(iris_dataset.target)
 
 
-def compute_pairwise_affinities(data_x: ndarray, sigma: float = 1.0) -> ndarray:
+def compute_pairwise_affinities(data_matrix: ndarray, sigma: float = 1.0) -> ndarray:
     """
-    Compute high-dimensional affinities (P matrix) using Gaussian kernel.
+    Compute high-dimensional affinities (P matrix) using a Gaussian kernel.
+
     Args:
-        data_x: Input data of shape (n_samples, n_features)
-        sigma: Gaussian kernel bandwidth
+        data_matrix: Input data of shape (n_samples, n_features).
+        sigma: Gaussian kernel bandwidth.
+
     Returns:
-        ndarray: Symmetrized probability matrix
+        ndarray: Symmetrized probability matrix.
+
     Example:
-    >>> x = np.array([[0.0, 0.0], [1.0, 0.0]])
-    >>> p = compute_pairwise_affinities(x)
-    >>> float(round(p[0, 1], 3))
-    0.25
+        >>> x = np.array([[0.0, 0.0], [1.0, 0.0]])
+        >>> probabilities = compute_pairwise_affinities(x)
+        >>> float(round(probabilities[0, 1], 3))
+        0.25
     """
-    n_samples = data_x.shape[0]
-    sum_x = np.sum(np.square(data_x), axis=1)
-    dist_sq = np.add(np.add(-2 * np.dot(data_x, data_x.T), sum_x).T, sum_x)
-    p = np.exp(-dist_sq / (2 * sigma**2))
-    np.fill_diagonal(p, 0)
-    p /= np.sum(p)
-    return (p + p.T) / (2 * n_samples)
+    n_samples = data_matrix.shape[0]
+    squared_sum = np.sum(np.square(data_matrix), axis=1)
+    squared_distance = np.add(np.add(-2 * np.dot(data_matrix, data_matrix.T), squared_sum).T, squared_sum)
+
+    affinity_matrix = np.exp(-squared_distance / (2 * sigma**2))
+    np.fill_diagonal(affinity_matrix, 0)
+
+    affinity_matrix /= np.sum(affinity_matrix)
+    return (affinity_matrix + affinity_matrix.T) / (2 * n_samples)
 
 
-def compute_low_dim_affinities(low_dim_embedding: ndarray) -> tuple[ndarray, ndarray]:
+def compute_low_dim_affinities(embedding_matrix: ndarray) -> tuple[ndarray, ndarray]:
     """
-    Compute low-dimensional affinities (Q matrix) using Student-t distribution.
+    Compute low-dimensional affinities (Q matrix) using a Student-t distribution.
+
     Args:
-        low_dim_embedding: shape (n_samples, n_components)
+        embedding_matrix: Low-dimensional embedding of shape (n_samples, n_components).
+
     Returns:
-        tuple[ndarray, ndarray]: Q probability matrix and numerator
+        tuple[ndarray, ndarray]: (Q probability matrix, numerator matrix).
+
     Example:
-    >>> y = np.array([[0.0, 0.0], [1.0, 0.0]])
-    >>> q, num = compute_low_dim_affinities(y)
-    >>> q.shape
-    (2, 2)
+        >>> y = np.array([[0.0, 0.0], [1.0, 0.0]])
+        >>> q_matrix, numerators = compute_low_dim_affinities(y)
+        >>> q_matrix.shape
+        (2, 2)
     """
-    sum_y = np.sum(np.square(low_dim_embedding), axis=1)
-    numerator = 1 / (
+    squared_sum = np.sum(np.square(embedding_matrix), axis=1)
+    numerator_matrix = 1 / (
         1
         + np.add(
-            np.add(-2 * np.dot(low_dim_embedding, low_dim_embedding.T), sum_y).T,
-            sum_y,
+            np.add(-2 * np.dot(embedding_matrix, embedding_matrix.T), squared_sum).T,
+            squared_sum,
         )
     )
-    np.fill_diagonal(numerator, 0)
-    q = numerator / np.sum(numerator)
-    return q, numerator
+    np.fill_diagonal(numerator_matrix, 0)
+
+    q_matrix = numerator_matrix / np.sum(numerator_matrix)
+    return q_matrix, numerator_matrix
 
 
 def apply_tsne(
-    data_x: ndarray,
+    data_matrix: ndarray,
     n_components: int = 2,
     learning_rate: float = 200.0,
     n_iter: int = 500,
 ) -> ndarray:
     """
     Apply t-SNE for dimensionality reduction.
+
     Args:
-        data_x: Original dataset (features)
-        n_components: Target dimension (2D or 3D)
-        learning_rate: Step size for gradient descent
-        n_iter: Number of iterations
+        data_matrix: Original dataset (features).
+        n_components: Target dimension (2D or 3D).
+        learning_rate: Step size for gradient descent.
+        n_iter: Number of iterations.
+
     Returns:
-        ndarray: Low-dimensional embedding of the data
+        ndarray: Low-dimensional embedding of the data.
+
     Example:
-    >>> x, _ = collect_dataset()
-    >>> y_emb = apply_tsne(x, n_components=2, n_iter=50)
-    >>> y_emb.shape
-    (150, 2)
+        >>> features, _ = collect_dataset()
+        >>> embedding = apply_tsne(features, n_components=2, n_iter=50)
+        >>> embedding.shape
+        (150, 2)
     """
     if n_components < 1 or n_iter < 1:
         raise ValueError("n_components and n_iter must be >= 1")
 
-    n_samples = data_x.shape[0]
+    n_samples = data_matrix.shape[0]
     rng = np.random.default_rng()
-    y = rng.standard_normal((n_samples, n_components)) * 1e-4
+    embedding = rng.standard_normal((n_samples, n_components)) * 1e-4
 
-    p = compute_pairwise_affinities(data_x)
-    p = np.maximum(p, 1e-12)
+    high_dim_affinities = compute_pairwise_affinities(data_matrix)
+    high_dim_affinities = np.maximum(high_dim_affinities, 1e-12)
 
-    y_inc = np.zeros_like(y)
+    embedding_increment = np.zeros_like(embedding)
     momentum = 0.5
 
-    for i in range(n_iter):
-        q, num = compute_low_dim_affinities(y)
-        q = np.maximum(q, 1e-12)
+    for iteration in range(n_iter):
+        low_dim_affinities, numerator_matrix = compute_low_dim_affinities(embedding)
+        low_dim_affinities = np.maximum(low_dim_affinities, 1e-12)
 
-        pq = p - q
-        d_y = 4 * (
-            np.dot((pq * num), y)
-            - np.multiply(np.sum(pq * num, axis=1)[:, np.newaxis], y)
+        affinity_diff = high_dim_affinities - low_dim_affinities
+
+        gradient = 4 * (
+            np.dot((affinity_diff * numerator_matrix), embedding)
+            - np.multiply(np.sum(affinity_diff * numerator_matrix, axis=1)[:, np.newaxis], embedding)
         )
 
-        y_inc = momentum * y_inc - learning_rate * d_y
-        y += y_inc
+        embedding_increment = momentum * embedding_increment - learning_rate * gradient
+        embedding += embedding_increment
 
-        if i == int(n_iter / 4):
+        if iteration == int(n_iter / 4):
             momentum = 0.8
 
-    return y
+    return embedding
 
 
 def main() -> None:
     """
-    Run t-SNE on Iris dataset and display the first 5 embeddings.
+    Run t-SNE on the Iris dataset and display the first 5 embeddings.
+
     Example:
-    >>> main()  # doctest: +ELLIPSIS
-    t-SNE embedding (first 5 points):
-    [[...
+        >>> main()  # doctest: +ELLIPSIS
+        t-SNE embedding (first 5 points):
+        [[...
     """
-    data_x, labels = collect_dataset()
+    data_x,labels = collect_dataset()
     y_emb = apply_tsne(data_x, n_components=2, n_iter=300)
 
-    if not isinstance(y_emb, np.ndarray):
+    if not isinstance(embedding, np.ndarray):
         raise TypeError("t-SNE embedding must be an ndarray")
 
     print("t-SNE embedding (first 5 points):")
-    print(y_emb[:5])
+    print(embedding[:5])
 
     # Optional visualization ( Ruff/mypy compliant)
     import matplotlib.pyplot as plt
-
-    plt.scatter(y_emb[:, 0], y_emb[:, 1], c=labels, cmap="viridis")
+    plt.scatter(
+        y_emb[:, 0],
+        y_emb[:, 1],
+        c=labels,
+        cmap="viridis"
+    )
     plt.title("t-SNE Visualization of Iris Dataset")
     plt.xlabel("Dimension 1")
     plt.ylabel("Dimension 2")
@@ -153,3 +172,4 @@ def main() -> None:
 if __name__ == "__main__":
     doctest.testmod()
     main()
+        
\ No newline at end of file

From 18c5d96fe72a37564e9fcbfe868ea6d1eb2c9c32 Mon Sep 17 00:00:00 2001
From: Khansa435 <cs-2023-f-38@fcoe.edu.pk>
Date: Tue, 14 Oct 2025 14:43:51 +0500
Subject: [PATCH 05/14] Add descriptive variable names

---
 .../t_stochastic_neighbour_embedding.py       | 24 ++++++++-----------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py
index 5712e4b386d0..5de9c58636bd 100644
--- a/machine_learning/t_stochastic_neighbour_embedding.py
+++ b/machine_learning/t_stochastic_neighbour_embedding.py
@@ -146,8 +146,8 @@ def main() -> None:
         t-SNE embedding (first 5 points):
         [[...
     """
-    data_x,labels = collect_dataset()
-    y_emb = apply_tsne(data_x, n_components=2, n_iter=300)
+    features, labels = collect_dataset()
+    embedding = apply_tsne(features, n_components=2, n_iter=300)
 
     if not isinstance(embedding, np.ndarray):
         raise TypeError("t-SNE embedding must be an ndarray")
@@ -155,18 +155,14 @@ def main() -> None:
     print("t-SNE embedding (first 5 points):")
     print(embedding[:5])
 
-    # Optional visualization ( Ruff/mypy compliant)
-    import matplotlib.pyplot as plt
-    plt.scatter(
-        y_emb[:, 0],
-        y_emb[:, 1],
-        c=labels,
-        cmap="viridis"
-    )
-    plt.title("t-SNE Visualization of Iris Dataset")
-    plt.xlabel("Dimension 1")
-    plt.ylabel("Dimension 2")
-    plt.show()
+    # Optional visualization (Ruff/mypy compliant)
+
+    # import matplotlib.pyplot as plt
+    # plt.scatter(embedding[:, 0], embedding[:, 1], c=labels, cmap="viridis")
+    # plt.title("t-SNE Visualization of the Iris Dataset")
+    # plt.xlabel("Dimension 1")
+    # plt.ylabel("Dimension 2")
+    # plt.show()
 
 
 if __name__ == "__main__":

From ef68a5f37af56f28d8d2231acad230914a6de0e4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 14 Oct 2025 09:44:35 +0000
Subject: [PATCH 06/14] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 machine_learning/t_stochastic_neighbour_embedding.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py
index 5de9c58636bd..d81845d4bfaf 100644
--- a/machine_learning/t_stochastic_neighbour_embedding.py
+++ b/machine_learning/t_stochastic_neighbour_embedding.py
@@ -41,7 +41,9 @@ def compute_pairwise_affinities(data_matrix: ndarray, sigma: float = 1.0) -> nda
     """
     n_samples = data_matrix.shape[0]
     squared_sum = np.sum(np.square(data_matrix), axis=1)
-    squared_distance = np.add(np.add(-2 * np.dot(data_matrix, data_matrix.T), squared_sum).T, squared_sum)
+    squared_distance = np.add(
+        np.add(-2 * np.dot(data_matrix, data_matrix.T), squared_sum).T, squared_sum
+    )
 
     affinity_matrix = np.exp(-squared_distance / (2 * sigma**2))
     np.fill_diagonal(affinity_matrix, 0)
@@ -125,7 +127,10 @@ def apply_tsne(
 
         gradient = 4 * (
             np.dot((affinity_diff * numerator_matrix), embedding)
-            - np.multiply(np.sum(affinity_diff * numerator_matrix, axis=1)[:, np.newaxis], embedding)
+            - np.multiply(
+                np.sum(affinity_diff * numerator_matrix, axis=1)[:, np.newaxis],
+                embedding,
+            )
         )
 
         embedding_increment = momentum * embedding_increment - learning_rate * gradient
@@ -168,4 +173,3 @@ def main() -> None:
 if __name__ == "__main__":
     doctest.testmod()
     main()
-        
\ No newline at end of file

From d1a552db867aa94b9d2029c37bff18f8ed28c135 Mon Sep 17 00:00:00 2001
From: Khansa435 <cs-2023-f-38@fcoe.edu.pk>
Date: Tue, 14 Oct 2025 14:52:21 +0500
Subject: [PATCH 07/14] Add Descriptive Variable names

---
 machine_learning/t_stochastic_neighbour_embedding.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py
index 5de9c58636bd..c57ac072314d 100644
--- a/machine_learning/t_stochastic_neighbour_embedding.py
+++ b/machine_learning/t_stochastic_neighbour_embedding.py
@@ -146,7 +146,7 @@ def main() -> None:
         t-SNE embedding (first 5 points):
         [[...
     """
-    features, labels = collect_dataset()
+    features, _labels = collect_dataset()
     embedding = apply_tsne(features, n_components=2, n_iter=300)
 
     if not isinstance(embedding, np.ndarray):

From 6a495bd035f4b96187e6669dc6fd522f317d77d5 Mon Sep 17 00:00:00 2001
From: Khansa435 <cs-2023-f-38@fcoe.edu.pk>
Date: Tue, 14 Oct 2025 15:08:09 +0500
Subject: [PATCH 08/14] Adding Descriptive variable names

---
 machine_learning/t_stochastic_neighbour_embedding.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py
index c5dd124f889f..7139152ff30c 100644
--- a/machine_learning/t_stochastic_neighbour_embedding.py
+++ b/machine_learning/t_stochastic_neighbour_embedding.py
@@ -1,4 +1,5 @@
 import doctest
+
 import numpy as np
 from numpy import ndarray
 from sklearn.datasets import load_iris

From 7e396757bab3cebd30cf98d8624cd4143032e41a Mon Sep 17 00:00:00 2001
From: Khansa435 <cs-2023-f-38@fcoe.edu.pk>
Date: Tue, 14 Oct 2025 15:09:57 +0500
Subject: [PATCH 09/14] Update
 machine_learning/t_stochastic_neighbour_embedding.py

Co-authored-by: Christian Clauss <cclauss@me.com>
---
 machine_learning/t_stochastic_neighbour_embedding.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py
index 7139152ff30c..424178097221 100644
--- a/machine_learning/t_stochastic_neighbour_embedding.py
+++ b/machine_learning/t_stochastic_neighbour_embedding.py
@@ -12,7 +12,6 @@ def collect_dataset() -> tuple[ndarray, ndarray]:
     Returns:
         tuple[ndarray, ndarray]: Feature matrix and target labels.
 
-    Example:
         >>> features, targets = collect_dataset()
         >>> features.shape
         (150, 4)

From 5e60cf2a020b2b3e253f7b6a7bca39e2992ceae2 Mon Sep 17 00:00:00 2001
From: Khansa435 <cs-2023-f-38@fcoe.edu.pk>
Date: Tue, 14 Oct 2025 15:29:46 +0500
Subject: [PATCH 10/14] Update
 machine_learning/t_stochastic_neighbour_embedding.py

Co-authored-by: Christian Clauss <cclauss@me.com>
---
 machine_learning/t_stochastic_neighbour_embedding.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py
index 424178097221..81576b595f8c 100644
--- a/machine_learning/t_stochastic_neighbour_embedding.py
+++ b/machine_learning/t_stochastic_neighbour_embedding.py
@@ -12,11 +12,11 @@ def collect_dataset() -> tuple[ndarray, ndarray]:
     Returns:
         tuple[ndarray, ndarray]: Feature matrix and target labels.
 
-        >>> features, targets = collect_dataset()
-        >>> features.shape
-        (150, 4)
-        >>> targets.shape
-        (150,)
+    >>> features, targets = collect_dataset()
+    >>> features.shape
+    (150, 4)
+    >>> targets.shape
+    (150,)
     """
     iris_dataset = load_iris()
     return np.array(iris_dataset.data), np.array(iris_dataset.target)

From 498c137aec6ab5e8a2303693bb5de241f3e5d2ea Mon Sep 17 00:00:00 2001
From: Khansa435 <cs-2023-f-38@fcoe.edu.pk>
Date: Tue, 14 Oct 2025 15:31:54 +0500
Subject: [PATCH 11/14] Improved line formatting

---
 .../t_stochastic_neighbour_embedding.py       | 45 +++++++++----------
 1 file changed, 20 insertions(+), 25 deletions(-)

diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py
index 7139152ff30c..5ade8e6cae24 100644
--- a/machine_learning/t_stochastic_neighbour_embedding.py
+++ b/machine_learning/t_stochastic_neighbour_embedding.py
@@ -12,12 +12,11 @@ def collect_dataset() -> tuple[ndarray, ndarray]:
     Returns:
         tuple[ndarray, ndarray]: Feature matrix and target labels.
 
-    Example:
-        >>> features, targets = collect_dataset()
-        >>> features.shape
-        (150, 4)
-        >>> targets.shape
-        (150,)
+    >>> features, targets = collect_dataset()
+    >>> features.shape
+    (150, 4)
+    >>> targets.shape
+    (150,)
     """
     iris_dataset = load_iris()
     return np.array(iris_dataset.data), np.array(iris_dataset.target)
@@ -34,11 +33,10 @@ def compute_pairwise_affinities(data_matrix: ndarray, sigma: float = 1.0) -> nda
     Returns:
         ndarray: Symmetrized probability matrix.
 
-    Example:
-        >>> x = np.array([[0.0, 0.0], [1.0, 0.0]])
-        >>> probabilities = compute_pairwise_affinities(x)
-        >>> float(round(probabilities[0, 1], 3))
-        0.25
+    >>> x = np.array([[0.0, 0.0], [1.0, 0.0]])
+    >>> probabilities = compute_pairwise_affinities(x)
+    >>> float(round(probabilities[0, 1], 3))
+    0.25
     """
     n_samples = data_matrix.shape[0]
     squared_sum = np.sum(np.square(data_matrix), axis=1)
@@ -63,11 +61,10 @@ def compute_low_dim_affinities(embedding_matrix: ndarray) -> tuple[ndarray, ndar
     Returns:
         tuple[ndarray, ndarray]: (Q probability matrix, numerator matrix).
 
-    Example:
-        >>> y = np.array([[0.0, 0.0], [1.0, 0.0]])
-        >>> q_matrix, numerators = compute_low_dim_affinities(y)
-        >>> q_matrix.shape
-        (2, 2)
+    >>> y = np.array([[0.0, 0.0], [1.0, 0.0]])
+    >>> q_matrix, numerators = compute_low_dim_affinities(y)
+    >>> q_matrix.shape
+    (2, 2)
     """
     squared_sum = np.sum(np.square(embedding_matrix), axis=1)
     numerator_matrix = 1 / (
@@ -101,11 +98,10 @@ def apply_tsne(
     Returns:
         ndarray: Low-dimensional embedding of the data.
 
-    Example:
-        >>> features, _ = collect_dataset()
-        >>> embedding = apply_tsne(features, n_components=2, n_iter=50)
-        >>> embedding.shape
-        (150, 2)
+    >>> features, _ = collect_dataset()
+    >>> embedding = apply_tsne(features, n_components=2, n_iter=50)
+    >>> embedding.shape
+    (150, 2)
     """
     if n_components < 1 or n_iter < 1:
         raise ValueError("n_components and n_iter must be >= 1")
@@ -147,10 +143,9 @@ def main() -> None:
     """
     Run t-SNE on the Iris dataset and display the first 5 embeddings.
 
-    Example:
-        >>> main()  # doctest: +ELLIPSIS
-        t-SNE embedding (first 5 points):
-        [[...
+    >>> main()  # doctest: +ELLIPSIS
+    t-SNE embedding (first 5 points):
+    [[...
     """
     features, _labels = collect_dataset()
     embedding = apply_tsne(features, n_components=2, n_iter=300)

From da78c477fb697d74d6cc71634b247c4a6f7b0057 Mon Sep 17 00:00:00 2001
From: Khansa435 <cs-2023-f-38@fcoe.edu.pk>
Date: Tue, 14 Oct 2025 15:39:09 +0500
Subject: [PATCH 12/14] Adding URL for t-SNE Wikipedia

---
 machine_learning/t_stochastic_neighbour_embedding.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py
index 5ade8e6cae24..cd3475adc352 100644
--- a/machine_learning/t_stochastic_neighbour_embedding.py
+++ b/machine_learning/t_stochastic_neighbour_embedding.py
@@ -1,3 +1,10 @@
+"""
+t-SNE (t-distributed Stochastic Neighbor Embedding) implementation.
+
+For more details, see:
+https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding
+"""
+
 import doctest
 
 import numpy as np

From ff974add8ef963bfa1887e3df66eb71d4e8b8222 Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Tue, 14 Oct 2025 13:14:07 +0200
Subject: [PATCH 13/14] Apply suggestion from @cclauss

---
 machine_learning/t_stochastic_neighbour_embedding.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py
index cd3475adc352..d6f630149087 100644
--- a/machine_learning/t_stochastic_neighbour_embedding.py
+++ b/machine_learning/t_stochastic_neighbour_embedding.py
@@ -1,5 +1,5 @@
 """
-t-SNE (t-distributed Stochastic Neighbor Embedding) implementation.
+t-distributed stochastic neighbor embedding (t-SNE)
 
 For more details, see:
 https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding

From f76020100d9abc7d50abc21d3062e0eb5d800599 Mon Sep 17 00:00:00 2001
From: Khansa435 <cs-2023-f-38@fcoe.edu.pk>
Date: Tue, 14 Oct 2025 22:38:14 +0500
Subject: [PATCH 14/14] Add t-SNE to DIRECTORY.md

---
 DIRECTORY.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/DIRECTORY.md b/DIRECTORY.md
index 6249b75c4231..0f9859577493 100644
--- a/DIRECTORY.md
+++ b/DIRECTORY.md
@@ -624,6 +624,7 @@
   * [Sequential Minimum Optimization](machine_learning/sequential_minimum_optimization.py)
   * [Similarity Search](machine_learning/similarity_search.py)
   * [Support Vector Machines](machine_learning/support_vector_machines.py)
+  * [T Stochastic Neighbour Embedding](machine_learning/t_stochastic_neighbour_embedding.py)
   * [Word Frequency Functions](machine_learning/word_frequency_functions.py)
   * [Xgboost Classifier](machine_learning/xgboost_classifier.py)
   * [Xgboost Regressor](machine_learning/xgboost_regressor.py)