In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import sys

sys.path.append("..")

# Pipeline

In [2]:
CALCULATE_RAW_EPSILONS = False
GENERATE_AVERAGE_NOISE = False
GENERATE_FOURIER_NOISE = False
GENERATE_AVERAGE_IMAGES = False
GENERATE_FOURIER_IMAGES = True
GENERATE_ALL_IMAGES = True

In [2]:
NOISE_TYPE = "ceramic"
NUM_RAW_IMAGES_FOR_NOISE_GENERATION = 100
NUM_NOISE_IMAGES = 500

## Lets make some noise

In [2]:
from src.data_generation.datasets.generator_noise_average import (
    generate_average_noise_dataset,
)

In [4]:
generate_average_noise_dataset(
    path="../data/noise/ceramic/",
    num_images=1000,
    path_to_raw="../data/raw/ceramic/1channel/",
)

100%|██████████| 1000/1000 [06:51<00:00,  2.43it/s]


In [5]:
path_repo = os.path.dirname(os.getcwd())
path_raw = os.path.join(path_repo, "data/raw/", NOISE_TYPE, "/1channel")
path_average_noise = os.path.join(path_repo, "data/noise/", NOISE_TYPE)
# path_fourier_noise = os.path.join(path_repo ,"data/fourier_noise/", NOISE_TYPE)
# path_fourier_noise_freq = os.path.join(path_repo ,"data/fourier_noise/", NOISE_TYPE, "freq")
# path_fourier_noise_ampl = os.path.join(path_repo ,"data/fourier_noise/", NOISE_TYPE, "ampl")

## Artificial images generation

In [3]:
from src.data_generation.datasets.generator import generate_dataset

In [6]:
params = {
    # # params for blackbox
    # "width": (30,150),
    # "height": (30,100),
    # "x": (0, 640),
    # "y": (0, 480),
    # # params for pizza
    # "nr_of_pizzas": (10,20),
    # "center_point": (320, 240),
    # "channels": 1,
    # "strength": (10,20),
    # params for average
    "path_average_noise": path_average_noise,
    # # params for fourier
    # "path_fourier_noise_freq": path_fourier_noise_freq,
    # "path_fourier_noise_ampl": path_fourier_noise_ampl,
    # "domain": "ampl",
    # "pass_value": 4,
    # "noise_proportion": 0.6,
    # # params for bubble
    # "spray_particles": 800,
    # "spray_diameter": 8,
    # "fringes_color": None,
    # "range_of_blobs": (30,40)
}

### Fourier + Average

#### Images

In [14]:
path_generated_fourier_freq = os.path.join(
    path_repo, "data/generated/average", NOISE_TYPE, "moved"
)
generate_dataset(
    noise_type=["average"],
    path=path_generated_fourier_freq,
    name_prefix=f"moved_{NOISE_TYPE}",
    n_copies=30,
    epsilon_range=(0.0, 1.0),
    epsilon_step=0.001,
    # seed=23,
    zipfile=False,
    filename=f"fourier_freq_{NOISE_TYPE}_10k.zip",
    parameters_filename="moved_parameters.csv",
    **params,
)

100%|██████████| 1000/1000 [14:22<00:00,  1.16it/s]


#### Params

In [12]:
if GENERATE_FOURIER_IMAGES:
    fourier_csv = pd.read_csv(
        os.path.join(path_generated_fourier_freq, "freq_parameters.csv"),
        index_col=False,
    )
    fourier_train, fourier_test = train_test_split(
        fourier_csv,
        test_size=3000,
        random_state=12,
        shuffle=True,
        stratify=fourier_csv["epsilon"],
    )

    fourier_train.to_csv(
        os.path.join(
            path_generated_fourier_freq, f"fourier_freq_{NOISE_TYPE}_train.csv"
        )
    )
    fourier_test.to_csv(
        os.path.join(path_generated_fourier_freq, f"fourier_freq_{NOISE_TYPE}_test.csv")
    )

    fourier_csv = pd.read_csv(
        os.path.join(path_generated_fourier_ampl, "ampl_parameters.csv"),
        index_col=False,
    )
    fourier_train, fourier_test = train_test_split(
        fourier_csv,
        test_size=3000,
        random_state=12,
        shuffle=True,
        stratify=fourier_csv["epsilon"],
    )

    fourier_train.to_csv(
        os.path.join(
            path_generated_fourier_ampl, f"fourier_ampl_{NOISE_TYPE}_train.csv"
        )
    )
    fourier_test.to_csv(
        os.path.join(path_generated_fourier_ampl, f"fourier_ampl_{NOISE_TYPE}_test.csv")
    )

ValueError: test_size=3000 should be either positive and smaller than the number of samples 1 or a float in the (0, 1) range

### All

#### Images

In [None]:
if GENERATE_ALL_IMAGES:
    path_generated_all = os.path.join(path_repo, "data/generated/all", NOISE_TYPE)
    generate_dataset(
        noise_type=["bubble", "pizza", "average", "blackbox"],
        path=path_generated_all,
        name_prefix=f"all_{NOISE_TYPE}",
        n_copies=10,
        epsilon_range=(0.0, 1.0),
        epsilon_step=0.001,
        seed=23,
        zipfile=True,
        filename=f"all_{NOISE_TYPE}_10k.zip",
        **params,
    )

 61%|██████    | 612/1000 [20:57<13:17,  2.05s/it]


KeyboardInterrupt: 

#### Params

In [None]:
if GENERATE_ALL_IMAGES:
    all_csv = pd.read_csv(
        os.path.join(path_generated_all, "parameters.csv"), index_col=False
    )
    all_train, all_test = train_test_split(
        all_csv,
        test_size=3000,
        random_state=12,
        shuffle=True,
        stratify=all_csv["epsilon"],
    )

    all_train.to_csv(os.path.join(path_generated_all, f"all_{NOISE_TYPE}_train.csv"))
    all_test.to_csv(os.path.join(path_generated_all, f"all_{NOISE_TYPE}_test.csv"))

### Average

#### Images

In [7]:
# if GENERATE_AVERAGE_IMAGES:
path_generated_average = os.path.join(
    path_repo, "data/generated/perlin_ceramic_moved_test/pure_average/"
)
generate_dataset(
    noise_type=["average"],
    path=path_generated_average,
    name_prefix=f"average_{NOISE_TYPE}",
    n_copies=15,
    epsilon_range=(0.0, 1.0),
    epsilon_step=0.001,
    # seed=23,
    seed=32,
    zipfile=False,
    # filename=f"average_{NOISE_TYPE}_10k.zip",
    **params,
)

100%|██████████| 1000/1000 [06:22<00:00,  2.61it/s]


#### Params

In [None]:
if GENERATE_AVERAGE_IMAGES:
    average_csv = pd.read_csv(
        os.path.join(path_generated_average, "parameters.csv"), index_col=False
    )
    average_train, average_test = train_test_split(
        average_csv,
        test_size=3000,
        random_state=12,
        shuffle=True,
        stratify=average_csv["epsilon"],
    )

    average_train.to_csv(
        os.path.join(path_generated_average, f"average_{NOISE_TYPE}_train.csv")
    )
    average_test.to_csv(
        os.path.join(path_generated_average, f"average_{NOISE_TYPE}_test.csv")
    )