In [1]:
import os
import cv2
import pandas as pd
import numpy as np
import glob 
from sklearn.model_selection import train_test_split
import sys
sys.path.append('..')

# Pipeline

In [2]:
CALCULATE_RAW_EPSILONS = False
GENERATE_AVERAGE_NOISE = False
GENERATE_FOURIER_NOISE = True
GENERATE_AVERAGE_IMAGES = False
GENERATE_FOURIER_IMAGES = True
GENERATE_ALL_IMAGES = False

In [3]:
NOISE_TYPE = "steel"
NUM_RAW_IMAGES_FOR_NOISE_GENERATION = 100
NUM_NOISE_IMAGES = 500

## Lets make some noise

In [4]:
from src.data_generation.datasets.generator_noise_average import generate_average_noise_dataset
from src.data_generation.datasets.generator_noise_fourier import generate_fourier_noise_dataset

In [5]:
path_repo = os.path.dirname(os.getcwd())
path_raw = os.path.join(path_repo ,"data/raw/", NOISE_TYPE)
path_average_noise = os.path.join(path_repo ,"data/average_noise/", NOISE_TYPE)
path_fourier_noise = os.path.join(path_repo ,"data/fourier_noise/", NOISE_TYPE)
path_fourier_noise_freq = os.path.join(path_repo ,"data/fourier_noise/", NOISE_TYPE, "freq")
path_fourier_noise_ampl = os.path.join(path_repo ,"data/fourier_noise/", NOISE_TYPE, "ampl")

In [6]:
if CALCULATE_RAW_EPSILONS:
    import sys
    sys.path.append('../src')
    from methods.analytic.exact import AnalyticalMathodOld

    paths = glob.glob(os.path.join(path_raw, "*.png"))
    method = AnalyticalMathodOld()
    eps_est = pd.DataFrame(np.array(paths), columns=['path'])
    eps_est["epsilon"] = eps_est["path"].apply(lambda x: method.calculate_epsilon(cv2.imread(x, cv2.IMREAD_GRAYSCALE)))
    eps_est["epsilon"] = eps_est["epsilon"].apply(lambda x: x[0])
    eps_est["epsilon"] = eps_est["epsilon"].apply(lambda x: round(x, 3))
    eps_est.to_csv(os.path.join(path_fourier_noise, "raw_epsilons.csv"))

In [7]:
if GENERATE_AVERAGE_NOISE:
    generate_average_noise_dataset(
        path_to_raw=path_raw,
        path=path_average_noise,
        num_images=NUM_NOISE_IMAGES,
        num_used_raw_images=NUM_RAW_IMAGES_FOR_NOISE_GENERATION,
        seed=23)

In [8]:
if GENERATE_FOURIER_NOISE:
    generate_fourier_noise_dataset(
        path=path_fourier_noise_ampl,
        raw_epsilons_path=path_fourier_noise,
        num_images=NUM_NOISE_IMAGES,
        seed=23,
        pass_value=4,
        domain="amplitude"
    )
    generate_fourier_noise_dataset(
        path=path_fourier_noise_freq,
        raw_epsilons_path=path_fourier_noise,
        num_images=NUM_NOISE_IMAGES,
        seed=23,
        pass_value=6,
        domain="freq"
    )

100%|██████████| 500/500 [00:20<00:00, 24.35it/s]
100%|██████████| 500/500 [00:06<00:00, 73.78it/s]


## Artificial images generation

In [9]:
from src.data_generation.datasets.generator import generate_dataset

In [10]:
params = {
    # params for blackbox
    "width": (30,150),
    "height": (30,100),
    "x": (0, 640),
    "y": (0, 480),
    # params for pizza
    "nr_of_pizzas": (10,20),
    "center_point": (320, 240),
    "channels": 1,
    "strength": (10,20),
    # params for average
    "path_average_noise": path_average_noise,
    # params for fourier
    "path_fourier_noise_freq": path_fourier_noise_freq,
    "path_fourier_noise_ampl": path_fourier_noise_ampl,
    "domain": "ampl",
    "pass_value": 6,
    # params for bubble
    "spray_particles": 800,
    "spray_diameter": 8,
    "fringes_color": None,
    "range_of_blobs": (30,40)
}

### Fourier + Average

#### Images

In [11]:
if GENERATE_FOURIER_IMAGES:
    path_generated_fourier_freq = os.path.join(path_repo ,"data/generated/fourier", NOISE_TYPE, "freq")
    params["domain"] = "freq"
    generate_dataset(
        noise_type=["fourier", "average"],
        path=path_generated_fourier_freq,
        name_prefix=f"fourier_{NOISE_TYPE}_freq",
        n_copies=1,
        epsilon_range=(0.0, 1.0),
        epsilon_step=0.1,
        seed=23,
        zipfile=False,
        filename=f"fourier_freq_{NOISE_TYPE}_10k.zip",
        parameters_filename="freq_parameters.csv",
        **params
    )
    path_generated_fourier_ampl = os.path.join(path_repo ,"data/generated/fourier", NOISE_TYPE, "ampl")
    params["domain"] = "ampl"
    generate_dataset(
        noise_type=["fourier", "average"],
        path=path_generated_fourier_ampl,
        name_prefix=f"fourier_{NOISE_TYPE}_ampl",
        n_copies=1,
        epsilon_range=(0.0, 1.0),
        epsilon_step=0.1,
        seed=23,
        zipfile=False,
        filename=f"fourier_ampl_{NOISE_TYPE}_10k.zip",
        parameters_filename="ampl_parameters.csv",
        **params
    )

  0%|          | 0/10 [00:00<?, ?it/s]

100%|██████████| 10/10 [00:00<00:00, 16.66it/s]
100%|██████████| 10/10 [00:00<00:00, 23.46it/s]


#### Params

In [12]:
if GENERATE_FOURIER_IMAGES:
    fourier_csv = pd.read_csv(os.path.join(path_generated_fourier_freq, "freq_parameters.csv"), index_col=False)
    fourier_train, fourier_test = train_test_split(fourier_csv, test_size=3000, random_state=12, shuffle=True, stratify=fourier_csv['epsilon'])

    fourier_train.to_csv(os.path.join(path_generated_fourier_freq, f"fourier_freq_{NOISE_TYPE}_train.csv"))
    fourier_test.to_csv(os.path.join(path_generated_fourier_freq, f"fourier_freq_{NOISE_TYPE}_test.csv"))

    fourier_csv = pd.read_csv(os.path.join(path_generated_fourier_ampl, "ampl_parameters.csv"), index_col=False)
    fourier_train, fourier_test = train_test_split(fourier_csv, test_size=3000, random_state=12, shuffle=True, stratify=fourier_csv['epsilon'])

    fourier_train.to_csv(os.path.join(path_generated_fourier_ampl, f"fourier_ampl_{NOISE_TYPE}_train.csv"))
    fourier_test.to_csv(os.path.join(path_generated_fourier_ampl, f"fourier_ampl_{NOISE_TYPE}_test.csv"))

ValueError: test_size=3000 should be either positive and smaller than the number of samples 10 or a float in the (0, 1) range

### All

#### Images

In [None]:
if GENERATE_ALL_IMAGES:
    path_generated_all = os.path.join(path_repo ,"data/generated/all", NOISE_TYPE)
    generate_dataset(
        noise_type=["bubble", "pizza", "average", "blackbox"],
        path=path_generated_all,
        name_prefix=f"all_{NOISE_TYPE}",
        n_copies=10,
        epsilon_range=(0.0, 1.0),
        epsilon_step=0.001,
        seed=23,
        zipfile=True,
        filename=f"all_{NOISE_TYPE}_10k.zip",
        **params
    )

#### Params

In [None]:
if GENERATE_ALL_IMAGES:
    all_csv = pd.read_csv(os.path.join(path_generated_all, "parameters.csv"), index_col=False)
    all_train, all_test = train_test_split(all_csv, test_size=3000, random_state=12, shuffle=True, stratify=all_csv['epsilon'])

    all_train.to_csv(os.path.join(path_generated_all, f"all_{NOISE_TYPE}_train.csv"))
    all_test.to_csv(os.path.join(path_generated_all, f"all_{NOISE_TYPE}_test.csv"))

### Average

#### Images

In [None]:
if GENERATE_AVERAGE_IMAGES:
    path_generated_average = os.path.join(path_repo ,"data/generated/average", NOISE_TYPE)
    generate_dataset(
        noise_type=["average"],
        path=path_generated_average,
        name_prefix=f"average_{NOISE_TYPE}",
        n_copies=10,
        epsilon_range=(0.0, 1.0),
        epsilon_step=0.001,
        seed=23,
        zipfile=True,
        filename=f"average_{NOISE_TYPE}_10k.zip",
        **params
    )

#### Params

In [None]:
if GENERATE_AVERAGE_IMAGES:
    average_csv = pd.read_csv(os.path.join(path_generated_average, "parameters.csv"), index_col=False)
    average_train, average_test = train_test_split(average_csv, test_size=3000, random_state=12, shuffle=True, stratify=average_csv['epsilon'])

    average_train.to_csv(os.path.join(path_generated_average, f"average_{NOISE_TYPE}_train.csv"))
    average_test.to_csv(os.path.join(path_generated_average, f"average_{NOISE_TYPE}_test.csv"))