In [None]:
import numpy as np

class DatasetGenerator:
    def __init__(self, dim_of_space, dim_of_manifold):
        self.dim_of_space = dim_of_space
        self.dim_of_manifold = dim_of_manifold

    def generate_dataset_in_subspace(self, n_samples):
        """
        Generate a dataset that lies along a low-dimensional subspace within a high-dimensional space.

        Parameters:
        - num_samples: Number of data points to generate.
        - dim_of_space: Dimension of the high-dimensional space (L).
        - dim_of_subspace: Dimension of the low-dimensional subspace (l).

        Returns:
        - dataset: NumPy array with shape (num_samples, dim_of_space)
        """

        # Generate a random orthonormal basis for the subspace
        basis = np.linalg.qr(np.random.randn(self.dim_of_space, self.dim_of_manifold))[0]
        # Generate random coefficients for the linear combination
        coefficients = np.random.randn(n_samples, self.dim_of_manifold)
        # Compute the dataset by projecting random points onto the subspace
        dataset = np.dot(coefficients, basis.T)

        return dataset

    def generate_lines_planes(self, n_samples, scale, noise):
        """
        Generate a dataset of straight lines in either a 2D plane or 3D space.

        Parameters:
        - n_samples: Number of samples in the dataset.
        - plane: If True, generate lines in a 2D plane. If False, generate lines in 3D space.

        Returns:
        - dataset: NumPy array with shape (n_samples, dim_of_space)
        """
        if self.dim_of_space not in [2, 3]:
            raise ValueError("dim_of_space must be either 2 or 3.")
        dataset = np.zeros((n_samples, self.dim_of_space))
        if self.dim_of_space == 2:
            k, b = np.random.rand(2)
            line_params = np.array([k, b])
        else:
            if self.dim_of_manifold == 2: # plane in 3D space
                a, b, c = np.random.rand(3)
                line_params = np.array([a, b, c])
            if self.dim_of_manifold == 1:
                p_0, p_1, p_2, x_0, y_0, z_0 = np.random.rand(6)

        for i in range(n_samples):
            eps = noise * np.random.normal()
            # Generate random parameters for the line
            if self.dim_of_space == 2:
                x = scale * np.random.rand()
                y = k * x + b + eps
                dataset[i] = [x, y]
            else:
                if self.dim_of_manifold == 2:
                    x = scale * np.random.rand(self.dim_of_space - 1)
                    y = a * x[0] + b * x[1] + c + eps
                    dataset[i] = [x[0], x[1], y]
                if self.dim_of_manifold == 1:

                    t = scale * np.random.rand()
                    x = x_0 + p_0 * t + noise * np.random.normal()
                    y = y_0 + p_1 * t + noise * np.random.normal()
                    z = z_0 + p_2 * t + noise * np.random.normal()
                    dataset[i] = [x, y, z]
        return dataset


### 1 Example

In [None]:
# Example usage:

import matplotlib.pyplot as plt
dim_of_space = 2
dim_of_manifold = 1
n_samples = 100
line_generator = DatasetGenerator(dim_of_space, dim_of_manifold)
dataset = line_generator.generate_lines_planes(n_samples, scale=10,noise = 0.1)
print(dataset.shape)

plt.scatter(dataset[:, 0], dataset[:, 1], marker='o', label='Generated Points')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.legend()


plt.show()

### 2 Example

In [None]:
# Example usage:
dim_of_space = 3
dim_of_manifold = 2
n_samples = 10000
plane_generator = DatasetGenerator(dim_of_space, dim_of_manifold)
dataset = plane_generator.generate_lines_planes(n_samples, scale=10,noise = 1)
print(dataset.shape)

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(dataset[:, 0], dataset[:, 1], dataset[:, 2], marker='o', label='Generated Points')

ax.set_xlabel('X-axis')
ax.set_ylabel('Y-axis')
ax.set_zlabel('Z-axis')
ax.legend()

plt.show()


### 3 Example

In [None]:
dim_of_space = 3
dim_of_manifold = 1
n_samples = 1000

plane_generator = DatasetGenerator(dim_of_space, dim_of_manifold)
dataset = plane_generator.generate_lines_planes(n_samples, scale =10 , noise =0.01)
print(dataset.shape)

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(dataset[:, 0], dataset[:, 1], dataset[:, 2], marker='o', label='Generated Points')
ax.set_xlabel('X-axis')
ax.set_ylabel('Y-axis')
ax.set_zlabel('Z-axis')
ax.legend()
plt.show()

### 4 Example

In [None]:
n_samples = 100
dim_of_space = 50
dim_of_manifold = 5

plane_generator = DatasetGenerator(dim_of_space, dim_of_manifold)
dataset = plane_generator.generate_dataset_in_subspace(n_samples)

print(dataset.shape)  # (100, 50)

rank_check = np.linalg.matrix_rank(dataset) == dim_of_manifold
print("Dimensionality Check:", rank_check)

5 Example

In [None]:
import numpy as np
import matplotlib.pyplot as plt

class KSquareDataset:
    def __init__(self, ambient_dim=32, num_squares=1, square_size=3):
        self.ambient_dim = ambient_dim
        self.num_squares = num_squares
        self.square_size = square_size

    def generate_dataset(self, num_samples):
        dataset = []
        for _ in range(num_samples):
            # Initialize an empty image
            image = np.zeros((self.ambient_dim, self.ambient_dim))

            # Add squares to the image
            for _ in range(self.num_squares):
                center_x, center_y = np.random.randint(self.square_size, self.ambient_dim - self.square_size, size=2)
                brightness = np.random.uniform(0, 1)

                # Add the square to the image
                square = np.ones((self.square_size, self.square_size)) * brightness
                image[center_x:center_x + self.square_size, center_y:center_y + self.square_size] += square[:self.ambient_dim - center_x, :self.ambient_dim - center_y]

            # Clip the values to be in the range [0, 1]
            image = np.clip(image, 0, 1)

            dataset.append(image)

        return np.array(dataset)


In [None]:
# Example usage:
ambient_dim = 32
num_squares = 30
square_size = 3
num_samples = 5

ksquare_dataset = KSquareDataset(ambient_dim=ambient_dim, num_squares=num_squares, square_size=square_size)
dataset = ksquare_dataset.generate_dataset(num_samples)

# Visualize a few examples
for i in range(num_samples):
    plt.subplot(1, num_samples, i + 1)
    plt.imshow(dataset[i], cmap='gray')
    plt.axis('off')

plt.show()

In [None]:
plt.imshow(dataset[4], cmap='gray')
plt.axis('off')

In [None]:
dataset.shape