From 8c581f7366fc97140ec13b47e20b3bcc0df8a904 Mon Sep 17 00:00:00 2001 From: Khansa435 Date: Mon, 13 Oct 2025 22:56:28 +0500 Subject: [PATCH 01/14] Added t-SNE with Iris dataset example --- .../t_stochastic_neighbour_embedding.py | 158 ++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 machine_learning/t_stochastic_neighbour_embedding.py diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py new file mode 100644 index 000000000000..bcf177567c46 --- /dev/null +++ b/machine_learning/t_stochastic_neighbour_embedding.py @@ -0,0 +1,158 @@ +import doctest + +import numpy as np +from numpy import ndarray +from sklearn.datasets import load_iris + + +def collect_dataset() -> tuple[ndarray, ndarray]: + """ + Load Iris dataset and return features and labels. + Returns: + tuple[ndarray, ndarray]: feature matrix and target labels + Example: + >>> x, y = collect_dataset() + >>> x.shape + (150, 4) + >>> y.shape + (150,) + """ + data = load_iris() + return np.array(data.data), np.array(data.target) + + +def compute_pairwise_affinities(data_x: ndarray, sigma: float = 1.0) -> ndarray: + """ + Compute high-dimensional affinities (P matrix) using Gaussian kernel. + Args: + data_x: Input data of shape (n_samples, n_features) + sigma: Gaussian kernel bandwidth + Returns: + ndarray: Symmetrized probability matrix + Example: + >>> x = np.array([[0.0, 0.0], [1.0, 0.0]]) + >>> p = compute_pairwise_affinities(x) + >>> float(round(p[0, 1], 3)) + 0.25 + """ + n_samples = data_x.shape[0] + sum_x = np.sum(np.square(data_x), axis=1) + dist_sq = np.add(np.add(-2 * np.dot(data_x, data_x.T), sum_x).T, sum_x) + p = np.exp(-dist_sq / (2 * sigma**2)) + np.fill_diagonal(p, 0) + p /= np.sum(p) + return (p + p.T) / (2 * n_samples) + + +def compute_low_dim_affinities(low_dim_embedding: ndarray) -> tuple[ndarray, ndarray]: + """ + Compute low-dimensional affinities (Q matrix) using Student-t distribution. + Args: + low_dim_embedding: shape (n_samples, n_components) + Returns: + tuple[ndarray, ndarray]: Q probability matrix and numerator + Example: + >>> y = np.array([[0.0, 0.0], [1.0, 0.0]]) + >>> q, num = compute_low_dim_affinities(y) + >>> q.shape + (2, 2) + """ + sum_y = np.sum(np.square(low_dim_embedding), axis=1) + numerator = 1 / ( + 1 + + np.add( + np.add(-2 * np.dot(low_dim_embedding, low_dim_embedding.T), sum_y).T, + sum_y, + ) + ) + np.fill_diagonal(numerator, 0) + q = numerator / np.sum(numerator) + return q, numerator + + +def apply_tsne( + data_x: ndarray, + n_components: int = 2, + learning_rate: float = 200.0, + n_iter: int = 500, +) -> ndarray: + """ + Apply t-SNE for dimensionality reduction. + Args: + data_x: Original dataset (features) + n_components: Target dimension (2D or 3D) + learning_rate: Step size for gradient descent + n_iter: Number of iterations + Returns: + ndarray: Low-dimensional embedding of the data + Example: + >>> x, _ = collect_dataset() + >>> y_emb = apply_tsne(x, n_components=2, n_iter=50) + >>> y_emb.shape + (150, 2) + """ + if n_components < 1 or n_iter < 1: + raise ValueError("n_components and n_iter must be >= 1") + + n_samples = data_x.shape[0] + rng = np.random.default_rng() + y = rng.standard_normal((n_samples, n_components)) * 1e-4 + + p = compute_pairwise_affinities(data_x) + p = np.maximum(p, 1e-12) + + y_inc = np.zeros_like(y) + momentum = 0.5 + + for i in range(n_iter): + q, num = compute_low_dim_affinities(y) + q = np.maximum(q, 1e-12) + + pq = p - q + d_y = 4 * ( + np.dot((pq * num), y) + - np.multiply(np.sum(pq * num, axis=1)[:, np.newaxis], y) + ) + + y_inc = momentum * y_inc - learning_rate * d_y + y += y_inc + + if i == int(n_iter / 4): + momentum = 0.8 + + return y + + +def main() -> None: + """ + Run t-SNE on Iris dataset and display the first 5 embeddings. + Example: + >>> main() # doctest: +ELLIPSIS + t-SNE embedding (first 5 points): + [[... + """ + data_x,labels = collect_dataset() + y_emb = apply_tsne(data_x, n_components=2, n_iter=300) + + if not isinstance(y_emb, np.ndarray): + raise TypeError("t-SNE embedding must be an ndarray") + + print("t-SNE embedding (first 5 points):") + print(y_emb[:5]) + + # Optional visualization ( Ruff/mypy compliant) + import matplotlib.pyplot as plt + plt.scatter( + y_emb[:, 0], + y_emb[:, 1], + c=labels, + cmap="viridis" + ) + plt.title("t-SNE Visualization of Iris Dataset") + plt.xlabel("Dimension 1") + plt.ylabel("Dimension 2") + plt.show() + +if __name__ == "__main__": + # doctest.testmod() + main() From a394193c651085546f8b1c1dbcaa26b6334e6295 Mon Sep 17 00:00:00 2001 From: Khansa435 Date: Mon, 13 Oct 2025 23:03:22 +0500 Subject: [PATCH 02/14] Added t-SNE with Iris dataset example --- machine_learning/t_stochastic_neighbour_embedding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py index bcf177567c46..0ba534719918 100644 --- a/machine_learning/t_stochastic_neighbour_embedding.py +++ b/machine_learning/t_stochastic_neighbour_embedding.py @@ -154,5 +154,5 @@ def main() -> None: plt.show() if __name__ == "__main__": - # doctest.testmod() + doctest.testmod() main() From 165f516346f82367a2b438b2093ed1fcf22ef59b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 13 Oct 2025 18:29:09 +0000 Subject: [PATCH 03/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/t_stochastic_neighbour_embedding.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py index 0ba534719918..76281d74e2ac 100644 --- a/machine_learning/t_stochastic_neighbour_embedding.py +++ b/machine_learning/t_stochastic_neighbour_embedding.py @@ -131,7 +131,7 @@ def main() -> None: t-SNE embedding (first 5 points): [[... """ - data_x,labels = collect_dataset() + data_x, labels = collect_dataset() y_emb = apply_tsne(data_x, n_components=2, n_iter=300) if not isinstance(y_emb, np.ndarray): @@ -142,17 +142,14 @@ def main() -> None: # Optional visualization ( Ruff/mypy compliant) import matplotlib.pyplot as plt - plt.scatter( - y_emb[:, 0], - y_emb[:, 1], - c=labels, - cmap="viridis" - ) + + plt.scatter(y_emb[:, 0], y_emb[:, 1], c=labels, cmap="viridis") plt.title("t-SNE Visualization of Iris Dataset") plt.xlabel("Dimension 1") plt.ylabel("Dimension 2") plt.show() + if __name__ == "__main__": doctest.testmod() main() From fb0fdb4a7521ca7830a21af330644ac005be6876 Mon Sep 17 00:00:00 2001 From: Khansa435 Date: Tue, 14 Oct 2025 14:32:17 +0500 Subject: [PATCH 04/14] Updated with descriptive variables --- .../t_stochastic_neighbour_embedding.py | 172 ++++++++++-------- 1 file changed, 96 insertions(+), 76 deletions(-) diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py index 76281d74e2ac..5712e4b386d0 100644 --- a/machine_learning/t_stochastic_neighbour_embedding.py +++ b/machine_learning/t_stochastic_neighbour_embedding.py @@ -1,5 +1,4 @@ import doctest - import numpy as np from numpy import ndarray from sklearn.datasets import load_iris @@ -7,143 +6,163 @@ def collect_dataset() -> tuple[ndarray, ndarray]: """ - Load Iris dataset and return features and labels. + Load the Iris dataset and return features and labels. + Returns: - tuple[ndarray, ndarray]: feature matrix and target labels + tuple[ndarray, ndarray]: Feature matrix and target labels. + Example: - >>> x, y = collect_dataset() - >>> x.shape - (150, 4) - >>> y.shape - (150,) + >>> features, targets = collect_dataset() + >>> features.shape + (150, 4) + >>> targets.shape + (150,) """ - data = load_iris() - return np.array(data.data), np.array(data.target) + iris_dataset = load_iris() + return np.array(iris_dataset.data), np.array(iris_dataset.target) -def compute_pairwise_affinities(data_x: ndarray, sigma: float = 1.0) -> ndarray: +def compute_pairwise_affinities(data_matrix: ndarray, sigma: float = 1.0) -> ndarray: """ - Compute high-dimensional affinities (P matrix) using Gaussian kernel. + Compute high-dimensional affinities (P matrix) using a Gaussian kernel. + Args: - data_x: Input data of shape (n_samples, n_features) - sigma: Gaussian kernel bandwidth + data_matrix: Input data of shape (n_samples, n_features). + sigma: Gaussian kernel bandwidth. + Returns: - ndarray: Symmetrized probability matrix + ndarray: Symmetrized probability matrix. + Example: - >>> x = np.array([[0.0, 0.0], [1.0, 0.0]]) - >>> p = compute_pairwise_affinities(x) - >>> float(round(p[0, 1], 3)) - 0.25 + >>> x = np.array([[0.0, 0.0], [1.0, 0.0]]) + >>> probabilities = compute_pairwise_affinities(x) + >>> float(round(probabilities[0, 1], 3)) + 0.25 """ - n_samples = data_x.shape[0] - sum_x = np.sum(np.square(data_x), axis=1) - dist_sq = np.add(np.add(-2 * np.dot(data_x, data_x.T), sum_x).T, sum_x) - p = np.exp(-dist_sq / (2 * sigma**2)) - np.fill_diagonal(p, 0) - p /= np.sum(p) - return (p + p.T) / (2 * n_samples) + n_samples = data_matrix.shape[0] + squared_sum = np.sum(np.square(data_matrix), axis=1) + squared_distance = np.add(np.add(-2 * np.dot(data_matrix, data_matrix.T), squared_sum).T, squared_sum) + + affinity_matrix = np.exp(-squared_distance / (2 * sigma**2)) + np.fill_diagonal(affinity_matrix, 0) + + affinity_matrix /= np.sum(affinity_matrix) + return (affinity_matrix + affinity_matrix.T) / (2 * n_samples) -def compute_low_dim_affinities(low_dim_embedding: ndarray) -> tuple[ndarray, ndarray]: +def compute_low_dim_affinities(embedding_matrix: ndarray) -> tuple[ndarray, ndarray]: """ - Compute low-dimensional affinities (Q matrix) using Student-t distribution. + Compute low-dimensional affinities (Q matrix) using a Student-t distribution. + Args: - low_dim_embedding: shape (n_samples, n_components) + embedding_matrix: Low-dimensional embedding of shape (n_samples, n_components). + Returns: - tuple[ndarray, ndarray]: Q probability matrix and numerator + tuple[ndarray, ndarray]: (Q probability matrix, numerator matrix). + Example: - >>> y = np.array([[0.0, 0.0], [1.0, 0.0]]) - >>> q, num = compute_low_dim_affinities(y) - >>> q.shape - (2, 2) + >>> y = np.array([[0.0, 0.0], [1.0, 0.0]]) + >>> q_matrix, numerators = compute_low_dim_affinities(y) + >>> q_matrix.shape + (2, 2) """ - sum_y = np.sum(np.square(low_dim_embedding), axis=1) - numerator = 1 / ( + squared_sum = np.sum(np.square(embedding_matrix), axis=1) + numerator_matrix = 1 / ( 1 + np.add( - np.add(-2 * np.dot(low_dim_embedding, low_dim_embedding.T), sum_y).T, - sum_y, + np.add(-2 * np.dot(embedding_matrix, embedding_matrix.T), squared_sum).T, + squared_sum, ) ) - np.fill_diagonal(numerator, 0) - q = numerator / np.sum(numerator) - return q, numerator + np.fill_diagonal(numerator_matrix, 0) + + q_matrix = numerator_matrix / np.sum(numerator_matrix) + return q_matrix, numerator_matrix def apply_tsne( - data_x: ndarray, + data_matrix: ndarray, n_components: int = 2, learning_rate: float = 200.0, n_iter: int = 500, ) -> ndarray: """ Apply t-SNE for dimensionality reduction. + Args: - data_x: Original dataset (features) - n_components: Target dimension (2D or 3D) - learning_rate: Step size for gradient descent - n_iter: Number of iterations + data_matrix: Original dataset (features). + n_components: Target dimension (2D or 3D). + learning_rate: Step size for gradient descent. + n_iter: Number of iterations. + Returns: - ndarray: Low-dimensional embedding of the data + ndarray: Low-dimensional embedding of the data. + Example: - >>> x, _ = collect_dataset() - >>> y_emb = apply_tsne(x, n_components=2, n_iter=50) - >>> y_emb.shape - (150, 2) + >>> features, _ = collect_dataset() + >>> embedding = apply_tsne(features, n_components=2, n_iter=50) + >>> embedding.shape + (150, 2) """ if n_components < 1 or n_iter < 1: raise ValueError("n_components and n_iter must be >= 1") - n_samples = data_x.shape[0] + n_samples = data_matrix.shape[0] rng = np.random.default_rng() - y = rng.standard_normal((n_samples, n_components)) * 1e-4 + embedding = rng.standard_normal((n_samples, n_components)) * 1e-4 - p = compute_pairwise_affinities(data_x) - p = np.maximum(p, 1e-12) + high_dim_affinities = compute_pairwise_affinities(data_matrix) + high_dim_affinities = np.maximum(high_dim_affinities, 1e-12) - y_inc = np.zeros_like(y) + embedding_increment = np.zeros_like(embedding) momentum = 0.5 - for i in range(n_iter): - q, num = compute_low_dim_affinities(y) - q = np.maximum(q, 1e-12) + for iteration in range(n_iter): + low_dim_affinities, numerator_matrix = compute_low_dim_affinities(embedding) + low_dim_affinities = np.maximum(low_dim_affinities, 1e-12) - pq = p - q - d_y = 4 * ( - np.dot((pq * num), y) - - np.multiply(np.sum(pq * num, axis=1)[:, np.newaxis], y) + affinity_diff = high_dim_affinities - low_dim_affinities + + gradient = 4 * ( + np.dot((affinity_diff * numerator_matrix), embedding) + - np.multiply(np.sum(affinity_diff * numerator_matrix, axis=1)[:, np.newaxis], embedding) ) - y_inc = momentum * y_inc - learning_rate * d_y - y += y_inc + embedding_increment = momentum * embedding_increment - learning_rate * gradient + embedding += embedding_increment - if i == int(n_iter / 4): + if iteration == int(n_iter / 4): momentum = 0.8 - return y + return embedding def main() -> None: """ - Run t-SNE on Iris dataset and display the first 5 embeddings. + Run t-SNE on the Iris dataset and display the first 5 embeddings. + Example: - >>> main() # doctest: +ELLIPSIS - t-SNE embedding (first 5 points): - [[... + >>> main() # doctest: +ELLIPSIS + t-SNE embedding (first 5 points): + [[... """ - data_x, labels = collect_dataset() + data_x,labels = collect_dataset() y_emb = apply_tsne(data_x, n_components=2, n_iter=300) - if not isinstance(y_emb, np.ndarray): + if not isinstance(embedding, np.ndarray): raise TypeError("t-SNE embedding must be an ndarray") print("t-SNE embedding (first 5 points):") - print(y_emb[:5]) + print(embedding[:5]) # Optional visualization ( Ruff/mypy compliant) import matplotlib.pyplot as plt - - plt.scatter(y_emb[:, 0], y_emb[:, 1], c=labels, cmap="viridis") + plt.scatter( + y_emb[:, 0], + y_emb[:, 1], + c=labels, + cmap="viridis" + ) plt.title("t-SNE Visualization of Iris Dataset") plt.xlabel("Dimension 1") plt.ylabel("Dimension 2") @@ -153,3 +172,4 @@ def main() -> None: if __name__ == "__main__": doctest.testmod() main() + \ No newline at end of file From 18c5d96fe72a37564e9fcbfe868ea6d1eb2c9c32 Mon Sep 17 00:00:00 2001 From: Khansa435 Date: Tue, 14 Oct 2025 14:43:51 +0500 Subject: [PATCH 05/14] Add descriptive variable names --- .../t_stochastic_neighbour_embedding.py | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py index 5712e4b386d0..5de9c58636bd 100644 --- a/machine_learning/t_stochastic_neighbour_embedding.py +++ b/machine_learning/t_stochastic_neighbour_embedding.py @@ -146,8 +146,8 @@ def main() -> None: t-SNE embedding (first 5 points): [[... """ - data_x,labels = collect_dataset() - y_emb = apply_tsne(data_x, n_components=2, n_iter=300) + features, labels = collect_dataset() + embedding = apply_tsne(features, n_components=2, n_iter=300) if not isinstance(embedding, np.ndarray): raise TypeError("t-SNE embedding must be an ndarray") @@ -155,18 +155,14 @@ def main() -> None: print("t-SNE embedding (first 5 points):") print(embedding[:5]) - # Optional visualization ( Ruff/mypy compliant) - import matplotlib.pyplot as plt - plt.scatter( - y_emb[:, 0], - y_emb[:, 1], - c=labels, - cmap="viridis" - ) - plt.title("t-SNE Visualization of Iris Dataset") - plt.xlabel("Dimension 1") - plt.ylabel("Dimension 2") - plt.show() + # Optional visualization (Ruff/mypy compliant) + + # import matplotlib.pyplot as plt + # plt.scatter(embedding[:, 0], embedding[:, 1], c=labels, cmap="viridis") + # plt.title("t-SNE Visualization of the Iris Dataset") + # plt.xlabel("Dimension 1") + # plt.ylabel("Dimension 2") + # plt.show() if __name__ == "__main__": From ef68a5f37af56f28d8d2231acad230914a6de0e4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 14 Oct 2025 09:44:35 +0000 Subject: [PATCH 06/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/t_stochastic_neighbour_embedding.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py index 5de9c58636bd..d81845d4bfaf 100644 --- a/machine_learning/t_stochastic_neighbour_embedding.py +++ b/machine_learning/t_stochastic_neighbour_embedding.py @@ -41,7 +41,9 @@ def compute_pairwise_affinities(data_matrix: ndarray, sigma: float = 1.0) -> nda """ n_samples = data_matrix.shape[0] squared_sum = np.sum(np.square(data_matrix), axis=1) - squared_distance = np.add(np.add(-2 * np.dot(data_matrix, data_matrix.T), squared_sum).T, squared_sum) + squared_distance = np.add( + np.add(-2 * np.dot(data_matrix, data_matrix.T), squared_sum).T, squared_sum + ) affinity_matrix = np.exp(-squared_distance / (2 * sigma**2)) np.fill_diagonal(affinity_matrix, 0) @@ -125,7 +127,10 @@ def apply_tsne( gradient = 4 * ( np.dot((affinity_diff * numerator_matrix), embedding) - - np.multiply(np.sum(affinity_diff * numerator_matrix, axis=1)[:, np.newaxis], embedding) + - np.multiply( + np.sum(affinity_diff * numerator_matrix, axis=1)[:, np.newaxis], + embedding, + ) ) embedding_increment = momentum * embedding_increment - learning_rate * gradient @@ -168,4 +173,3 @@ def main() -> None: if __name__ == "__main__": doctest.testmod() main() - \ No newline at end of file From d1a552db867aa94b9d2029c37bff18f8ed28c135 Mon Sep 17 00:00:00 2001 From: Khansa435 Date: Tue, 14 Oct 2025 14:52:21 +0500 Subject: [PATCH 07/14] Add Descriptive Variable names --- machine_learning/t_stochastic_neighbour_embedding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py index 5de9c58636bd..c57ac072314d 100644 --- a/machine_learning/t_stochastic_neighbour_embedding.py +++ b/machine_learning/t_stochastic_neighbour_embedding.py @@ -146,7 +146,7 @@ def main() -> None: t-SNE embedding (first 5 points): [[... """ - features, labels = collect_dataset() + features, _labels = collect_dataset() embedding = apply_tsne(features, n_components=2, n_iter=300) if not isinstance(embedding, np.ndarray): From 6a495bd035f4b96187e6669dc6fd522f317d77d5 Mon Sep 17 00:00:00 2001 From: Khansa435 Date: Tue, 14 Oct 2025 15:08:09 +0500 Subject: [PATCH 08/14] Adding Descriptive variable names --- machine_learning/t_stochastic_neighbour_embedding.py | 1 + 1 file changed, 1 insertion(+) diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py index c5dd124f889f..7139152ff30c 100644 --- a/machine_learning/t_stochastic_neighbour_embedding.py +++ b/machine_learning/t_stochastic_neighbour_embedding.py @@ -1,4 +1,5 @@ import doctest + import numpy as np from numpy import ndarray from sklearn.datasets import load_iris From 7e396757bab3cebd30cf98d8624cd4143032e41a Mon Sep 17 00:00:00 2001 From: Khansa435 Date: Tue, 14 Oct 2025 15:09:57 +0500 Subject: [PATCH 09/14] Update machine_learning/t_stochastic_neighbour_embedding.py Co-authored-by: Christian Clauss --- machine_learning/t_stochastic_neighbour_embedding.py | 1 - 1 file changed, 1 deletion(-) diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py index 7139152ff30c..424178097221 100644 --- a/machine_learning/t_stochastic_neighbour_embedding.py +++ b/machine_learning/t_stochastic_neighbour_embedding.py @@ -12,7 +12,6 @@ def collect_dataset() -> tuple[ndarray, ndarray]: Returns: tuple[ndarray, ndarray]: Feature matrix and target labels. - Example: >>> features, targets = collect_dataset() >>> features.shape (150, 4) From 5e60cf2a020b2b3e253f7b6a7bca39e2992ceae2 Mon Sep 17 00:00:00 2001 From: Khansa435 Date: Tue, 14 Oct 2025 15:29:46 +0500 Subject: [PATCH 10/14] Update machine_learning/t_stochastic_neighbour_embedding.py Co-authored-by: Christian Clauss --- machine_learning/t_stochastic_neighbour_embedding.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py index 424178097221..81576b595f8c 100644 --- a/machine_learning/t_stochastic_neighbour_embedding.py +++ b/machine_learning/t_stochastic_neighbour_embedding.py @@ -12,11 +12,11 @@ def collect_dataset() -> tuple[ndarray, ndarray]: Returns: tuple[ndarray, ndarray]: Feature matrix and target labels. - >>> features, targets = collect_dataset() - >>> features.shape - (150, 4) - >>> targets.shape - (150,) + >>> features, targets = collect_dataset() + >>> features.shape + (150, 4) + >>> targets.shape + (150,) """ iris_dataset = load_iris() return np.array(iris_dataset.data), np.array(iris_dataset.target) From 498c137aec6ab5e8a2303693bb5de241f3e5d2ea Mon Sep 17 00:00:00 2001 From: Khansa435 Date: Tue, 14 Oct 2025 15:31:54 +0500 Subject: [PATCH 11/14] Improved line formatting --- .../t_stochastic_neighbour_embedding.py | 45 +++++++++---------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py index 7139152ff30c..5ade8e6cae24 100644 --- a/machine_learning/t_stochastic_neighbour_embedding.py +++ b/machine_learning/t_stochastic_neighbour_embedding.py @@ -12,12 +12,11 @@ def collect_dataset() -> tuple[ndarray, ndarray]: Returns: tuple[ndarray, ndarray]: Feature matrix and target labels. - Example: - >>> features, targets = collect_dataset() - >>> features.shape - (150, 4) - >>> targets.shape - (150,) + >>> features, targets = collect_dataset() + >>> features.shape + (150, 4) + >>> targets.shape + (150,) """ iris_dataset = load_iris() return np.array(iris_dataset.data), np.array(iris_dataset.target) @@ -34,11 +33,10 @@ def compute_pairwise_affinities(data_matrix: ndarray, sigma: float = 1.0) -> nda Returns: ndarray: Symmetrized probability matrix. - Example: - >>> x = np.array([[0.0, 0.0], [1.0, 0.0]]) - >>> probabilities = compute_pairwise_affinities(x) - >>> float(round(probabilities[0, 1], 3)) - 0.25 + >>> x = np.array([[0.0, 0.0], [1.0, 0.0]]) + >>> probabilities = compute_pairwise_affinities(x) + >>> float(round(probabilities[0, 1], 3)) + 0.25 """ n_samples = data_matrix.shape[0] squared_sum = np.sum(np.square(data_matrix), axis=1) @@ -63,11 +61,10 @@ def compute_low_dim_affinities(embedding_matrix: ndarray) -> tuple[ndarray, ndar Returns: tuple[ndarray, ndarray]: (Q probability matrix, numerator matrix). - Example: - >>> y = np.array([[0.0, 0.0], [1.0, 0.0]]) - >>> q_matrix, numerators = compute_low_dim_affinities(y) - >>> q_matrix.shape - (2, 2) + >>> y = np.array([[0.0, 0.0], [1.0, 0.0]]) + >>> q_matrix, numerators = compute_low_dim_affinities(y) + >>> q_matrix.shape + (2, 2) """ squared_sum = np.sum(np.square(embedding_matrix), axis=1) numerator_matrix = 1 / ( @@ -101,11 +98,10 @@ def apply_tsne( Returns: ndarray: Low-dimensional embedding of the data. - Example: - >>> features, _ = collect_dataset() - >>> embedding = apply_tsne(features, n_components=2, n_iter=50) - >>> embedding.shape - (150, 2) + >>> features, _ = collect_dataset() + >>> embedding = apply_tsne(features, n_components=2, n_iter=50) + >>> embedding.shape + (150, 2) """ if n_components < 1 or n_iter < 1: raise ValueError("n_components and n_iter must be >= 1") @@ -147,10 +143,9 @@ def main() -> None: """ Run t-SNE on the Iris dataset and display the first 5 embeddings. - Example: - >>> main() # doctest: +ELLIPSIS - t-SNE embedding (first 5 points): - [[... + >>> main() # doctest: +ELLIPSIS + t-SNE embedding (first 5 points): + [[... """ features, _labels = collect_dataset() embedding = apply_tsne(features, n_components=2, n_iter=300) From da78c477fb697d74d6cc71634b247c4a6f7b0057 Mon Sep 17 00:00:00 2001 From: Khansa435 Date: Tue, 14 Oct 2025 15:39:09 +0500 Subject: [PATCH 12/14] Adding URL for t-SNE Wikipedia --- machine_learning/t_stochastic_neighbour_embedding.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py index 5ade8e6cae24..cd3475adc352 100644 --- a/machine_learning/t_stochastic_neighbour_embedding.py +++ b/machine_learning/t_stochastic_neighbour_embedding.py @@ -1,3 +1,10 @@ +""" +t-SNE (t-distributed Stochastic Neighbor Embedding) implementation. + +For more details, see: +https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding +""" + import doctest import numpy as np From ff974add8ef963bfa1887e3df66eb71d4e8b8222 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Tue, 14 Oct 2025 13:14:07 +0200 Subject: [PATCH 13/14] Apply suggestion from @cclauss --- machine_learning/t_stochastic_neighbour_embedding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py index cd3475adc352..d6f630149087 100644 --- a/machine_learning/t_stochastic_neighbour_embedding.py +++ b/machine_learning/t_stochastic_neighbour_embedding.py @@ -1,5 +1,5 @@ """ -t-SNE (t-distributed Stochastic Neighbor Embedding) implementation. +t-distributed stochastic neighbor embedding (t-SNE) For more details, see: https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding From f76020100d9abc7d50abc21d3062e0eb5d800599 Mon Sep 17 00:00:00 2001 From: Khansa435 Date: Tue, 14 Oct 2025 22:38:14 +0500 Subject: [PATCH 14/14] Add t-SNE to DIRECTORY.md --- DIRECTORY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/DIRECTORY.md b/DIRECTORY.md index 6249b75c4231..0f9859577493 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -624,6 +624,7 @@ * [Sequential Minimum Optimization](machine_learning/sequential_minimum_optimization.py) * [Similarity Search](machine_learning/similarity_search.py) * [Support Vector Machines](machine_learning/support_vector_machines.py) + * [T Stochastic Neighbour Embedding](machine_learning/t_stochastic_neighbour_embedding.py) * [Word Frequency Functions](machine_learning/word_frequency_functions.py) * [Xgboost Classifier](machine_learning/xgboost_classifier.py) * [Xgboost Regressor](machine_learning/xgboost_regressor.py)