In [None]:
import sys
import os
from time import time
import random
import torch
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LassoLars
from sklearn.preprocessing import StandardScaler

sys.path.append(os.pardir)
sys.path.append(os.path.join(os.pardir, os.pardir))
notebook_dir = os.path.dirname(os.path.abspath("__file__"))

from src.dataset.dataloaders import get_dataset
from src.dictionary.basic_dictionary_learning import base_algo1, batched_algo1


In [9]:
def generate_synthetic(dictionary, nb_elt_per_comb_lin, n, coef_generator = None):
    """
    Generate a synthetic dataset by selecting elements from a dictionary (matrix),
    linearly combining them, and creating a dataset.

    Parameters:
    - dictionary (np.ndarray): A matrix of shape (nb_atom, d) where each row is an atom and columns are features.
    - nb_elt_per_comb_lin (int): The number of elements to select for each linear combination.
    - n (int): The number of synthetic data points to generate.

    Returns:
    - np.ndarray: A synthetic dataset of shape (n, d), where each row is a result of linear combinations of selected atoms.
    - list: A list of indices used to create the dataset.
    """
    if coef_generator is None:
        coef_generator = np.random.randn
    if not isinstance(dictionary, np.ndarray):
        raise ValueError("The dictionary parameter must be a numpy array.")
    if len(dictionary.shape) != 2:
        raise ValueError("The dictionary must be a 2D matrix.")
    if nb_elt_per_comb_lin <= 0:
        raise ValueError("nb_elt_per_comb_lin must be greater than 0.")
    if n <= 0:
        raise ValueError("n must be greater than 0.")
    
    nb_atom, d = dictionary.shape
    if nb_atom < nb_elt_per_comb_lin:
        raise ValueError("nb_elt_per_comb_lin cannot be greater than the number of atoms in the dictionary.")
    
    synthetic_data = []
    combinations_used = []
    
    for _ in range(n):
        # Randomly select atoms for linear combination
        selected_indices = random.sample(range(nb_atom), nb_elt_per_comb_lin)
        
    
        # Generate random coefficients for the linear combination
        coefficients = coef_generator(nb_elt_per_comb_lin)
    
        combinaison = list(zip(selected_indices, map( lambda x : float(x), coefficients)))
        combinations_used.append(combinaison)
        # Create the linear combination of selected atoms
        combined_data = sum(coef * dictionary[idx] for idx,coef in combinaison)
    
        # Add the combined data to the synthetic dataset
        synthetic_data.append(combined_data)
    
    return np.array(synthetic_data), combinations_used
    

dico = np.array([[1,0,0,0,0,0,0,0,0], [0,1,0,0,0,0,0,0,0]])
data = generate_synthetic(dico, 2, 10)
def train_loader():
    while True:
        data = torch.from_numpy(generate_synthetic(dico, 2, 2)[0])
        #print(data.shape)
        yield data

In [10]:
m = dico.shape[-1]
k = 3
D = batched_algo1(train_loader(), m=m, k=k, lbd=0, tmax=int(1e4))

100%|██████████| 10000/10000 [00:14<00:00, 707.50it/s]


In [15]:
print(D)

tensor([[-0.8818, -0.0710, -0.8005],
        [ 0.3758,  0.9316,  0.5376],
        [ 0.1344, -0.1515, -0.0079],
        [ 0.0335, -0.1050,  0.0660],
        [ 0.0170, -0.0975,  0.0780],
        [ 0.1591, -0.1629, -0.0259],
        [ 0.0402, -0.1082,  0.0610],
        [ 0.1756, -0.1705, -0.0381],
        [ 0.0624, -0.1184,  0.0448]])
