In [37]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import sys
sys.path.append('..')

# Pipeline

In [38]:
GENERATE_NOISE = False
GENERATE_IMAGES = False

In [39]:
NOISE_TYPE = "steel"
NUM_RAW_IMAGES_FOR_NOISE_GENERATION = 100
NUM_NOISE_IMAGES = 500

## Lets make some noise

In [40]:
from src.data_generation.datasets.generator_noise_average import generate_noise_dataset

In [41]:
path_repo = os.path.dirname(os.getcwd())
path_raw = os.path.join(path_repo ,"data/raw/", NOISE_TYPE)
path_noise = os.path.join(path_repo ,"data/noise/", NOISE_TYPE)

In [42]:
if GENERATE_NOISE:
    generate_noise_dataset(
        path_to_raw=path_raw,
        path=path_noise,
        num_images=NUM_NOISE_IMAGES,
        num_used_raw_images=NUM_RAW_IMAGES_FOR_NOISE_GENERATION,
        seed=23)

## Artificial images generation

In [43]:
from src.data_generation.datasets.generator import generate_dataset

In [44]:
params = {
    # params for blackbox
    "width": (30,150),
    "height": (30,100),
    "x": (0, 640),
    "y": (0, 480),
    # params for pizza
    "nr_of_pizzas": (10,20),
    "center_point": (320, 240),
    "channels": 1,
    "strength": (10,20),
    # params for average
    "noise_path": path_noise,
    # params for bubble
    "spray_particles": 800,
    "spray_diameter": 8,
    "fringes_color": None,
    "range_of_blobs": (30,40)
}

### All

#### Images

In [45]:
if GENERATE_IMAGES:
    path_generated_all = os.path.join(path_repo ,"data/generated/all", NOISE_TYPE)
    generate_dataset(
        noise_type=["bubble", "pizza", "average", "blackbox"],
        path=path_generated_all,
        n_copies=10,
        epsilon_range=(0.0, 1.0),
        epsilon_step=0.001,
        seed=23,
        zipfile=True,
        filename=f"all_{NOISE_TYPE}_10k.zip",
        **params
    )

#### Params

In [46]:
all_csv = pd.read_csv(os.path.join(path_generated_all, "parameters.csv"), index_col=False)
all_train, all_test = train_test_split(all_csv, test_size=3000, random_state=12, shuffle=True, stratify=all_csv['epsilon'])

all_train.to_csv(os.path.join(path_generated_all, f"all_{NOISE_TYPE}_train.csv"))
all_test.to_csv(os.path.join(path_generated_all, f"all_{NOISE_TYPE}_test.csv"))

### Average

#### Images

In [47]:
if GENERATE_IMAGES:
    path_generated_average = os.path.join(path_repo ,"data/generated/average", NOISE_TYPE)
    generate_dataset(
        noise_type=["average"],
        path=path_generated_average,
        n_copies=10,
        epsilon_range=(0.0, 1.0),
        epsilon_step=0.001,
        seed=23,
        zipfile=True,
        filename=f"average_{NOISE_TYPE}_10k.zip",
        **params
    )

#### Params

In [48]:
average_csv = pd.read_csv(os.path.join(path_generated_average, "parameters.csv"), index_col=False)
average_train, average_test = train_test_split(average_csv, test_size=3000, random_state=12, shuffle=True, stratify=all_csv['epsilon'])

average_train.to_csv(os.path.join(path_generated_average, f"average_{NOISE_TYPE}_train.csv"))
average_test.to_csv(os.path.join(path_generated_average, f"average_{NOISE_TYPE}_test.csv"))