In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import sys
sys.path.append('..')

# Pipeline

In [2]:
GENERATE_AVERAGE_NOISE = False
GENERATE_FOURIER_NOISE = False
GENERATE_AVERAGE_IMAGES = False
GENERATE_FOURIER_IMAGES = True
GENERATE_ALL_IMAGES = False

In [3]:
NOISE_TYPE = "steel"
NUM_RAW_IMAGES_FOR_NOISE_GENERATION = 100
NUM_NOISE_IMAGES = 500

## Lets make some noise

In [4]:
from src.data_generation.datasets.generator_noise_average import generate_average_noise_dataset
from src.data_generation.datasets.generator_noise_fourier import generate_fourier_noise_dataset

In [5]:
path_repo = os.path.dirname(os.getcwd())
path_raw = os.path.join(path_repo ,"data/raw/steel/1channel")
path_average_noise = os.path.join(path_repo ,"data/average_noise/", NOISE_TYPE)
path_fourier_noise = os.path.join(path_repo ,"data/fourier_noise/", NOISE_TYPE)

In [15]:
generate_average_noise_dataset(
        path_to_raw=path_raw,
        path=path_average_noise,
        num_images=NUM_NOISE_IMAGES,
        num_used_raw_images=NUM_RAW_IMAGES_FOR_NOISE_GENERATION,
        seed=23)

100%|██████████| 500/500 [03:19<00:00,  2.51it/s]


In [6]:
generate_fourier_noise_dataset(
        path=path_fourier_noise,
        num_images=NUM_NOISE_IMAGES,
        path_to_raw=path_raw,
        seed=25,
        pass_value=4 
    )

Available raw images:  100000


100%|██████████| 500/500 [00:18<00:00, 26.79it/s]


## Artificial images generation

In [6]:
from src.data_generation.datasets.generator import generate_dataset

In [7]:
params = {
    # params for blackbox
    "width": (30,150),
    "height": (30,100),
    "x": (0, 640),
    "y": (0, 480),
    # params for pizza
    "nr_of_pizzas": (10,20),
    "center_point": (320, 240),
    "channels": 1,
    "strength": (10,20),
    # params for average
    "path_average_noise": path_average_noise,
    "path_fourier_noise": path_fourier_noise,
    # params for bubble
    "spray_particles": 800,
    "spray_diameter": 8,
    "fringes_color": None,
    "range_of_blobs": (30,40)
}

### Fourier + Average

#### Images

In [8]:
path_generated_fourier = os.path.join(path_repo ,"data/generated/fourier/single")
generate_dataset(
        noise_type=["fourier"],
        path=path_generated_fourier,
        name_prefix=f"fourier_single_{NOISE_TYPE}",
        n_copies=10,
        epsilon_range=(0.0, 1.0),
        epsilon_step=0.001,
        seed=25,
        zipfile=False,
        **params
    )

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [07:45<00:00,  2.15it/s]


#### Params

In [16]:
if GENERATE_FOURIER_IMAGES:
    fourier_csv = pd.read_csv(os.path.join(path_generated_fourier, "parameters.csv"), index_col=False)
    fourier_train, fourier_test = train_test_split(fourier_csv, test_size=3000, random_state=12, shuffle=True, stratify=fourier_csv['epsilon'])

    fourier_train.to_csv(os.path.join(path_generated_fourier, f"fourier_{NOISE_TYPE}_train.csv"))
    fourier_test.to_csv(os.path.join(path_generated_fourier, f"fourier_{NOISE_TYPE}_test.csv"))

### All

#### Images

In [11]:
if GENERATE_ALL_IMAGES:
    path_generated_all = os.path.join(path_repo ,"data/generated/all", NOISE_TYPE)
    generate_dataset(
        noise_type=["bubble", "pizza", "average", "blackbox"],
        path=path_generated_all,
        name_prefix=f"all_{NOISE_TYPE}",
        n_copies=10,
        epsilon_range=(0.0, 1.0),
        epsilon_step=0.001,
        seed=23,
        zipfile=True,
        filename=f"all_{NOISE_TYPE}_10k.zip",
        **params
    )

#### Params

In [12]:
if GENERATE_ALL_IMAGES:
    all_csv = pd.read_csv(os.path.join(path_generated_all, "parameters.csv"), index_col=False)
    all_train, all_test = train_test_split(all_csv, test_size=3000, random_state=12, shuffle=True, stratify=all_csv['epsilon'])

    all_train.to_csv(os.path.join(path_generated_all, f"all_{NOISE_TYPE}_train.csv"))
    all_test.to_csv(os.path.join(path_generated_all, f"all_{NOISE_TYPE}_test.csv"))

NameError: name 'path_generated_all' is not defined

### Average

#### Images

In [None]:
if GENERATE_AVERAGE_IMAGES:
    path_generated_average = os.path.join(path_repo ,"data/generated/average", NOISE_TYPE)
    generate_dataset(
        noise_type=["average"],
        path=path_generated_average,
        name_prefix=f"average_{NOISE_TYPE}",
        n_copies=10,
        epsilon_range=(0.0, 1.0),
        epsilon_step=0.001,
        seed=23,
        zipfile=True,
        filename=f"average_{NOISE_TYPE}_10k.zip",
        **params
    )

100%|██████████| 1000/1000 [08:43<00:00,  1.91it/s]


#### Params

In [None]:
if GENERATE_AVERAGE_IMAGES:
    average_csv = pd.read_csv(os.path.join(path_generated_average, "parameters.csv"), index_col=False)
    average_train, average_test = train_test_split(average_csv, test_size=3000, random_state=12, shuffle=True, stratify=average_csv['epsilon'])

    average_train.to_csv(os.path.join(path_generated_average, f"average_{NOISE_TYPE}_train.csv"))
    average_test.to_csv(os.path.join(path_generated_average, f"average_{NOISE_TYPE}_test.csv"))