### A script for generating frames with early-type galaxies, disk galaxies, and galaxies with polar structure.

#### Contents
1. [Utilites.](#utils)
2. [Generation of frames with random galaxies of early types using 2D Sersic profile with Gaussian noise background.](#ETypeGalaxies)
3. [Generation of frames with random galaxies with a polar structure using two Sersic profiles with Gaussian noise background.](#PSGalaxies)
4. [Generation of frames with early-type galaxies, disk galaxies and galaxies with a polar structure, superimposed on Gaussian noise.](#YOLOField)

In [78]:
import cv2
import json
import math
import matplotlib.pyplot as plt
import numpy as np


from os.path import basename, exists, join
from pathlib import Path

<a id="utils"></a>
#### Utilites

In [88]:
def read_json(json_path: str) -> dict:
    """
    Reads a json file and returns a dictionary with its contents
    :param json_path: the path to the json file
    """
    try:
        with open(json_path) as json_file:
            json_dict = json.load(json_file)
    except json.JSONDecodeError:
        raise ValueError(f"{json_path} is incorrect .json file")

    return json_dict


def save_fits(image, output_path: str, header=None):
    """
    Saves the image to a fits file
    :param image: image in the form of a numpy array
    :param output_path: the path to the fits file where the image will be saved
    :param header: fits file header that will be saved along with the image
    """
    hdu = pyfits.PrimaryHDU(image)
    if header:
        hdu.header = header
    if exists(output_path):
        hdu.writeto(output_path, overwrite=True)
    else:
        hdu.writeto(output_path)
        
        
def int_to_suffix(i: int, n_digit: int) -> str:
    """
    Generates a suffix of length n_digit for a non-negative integer i:
    (200, 4) -> "0200", (19, 3) -> "019"
    """
    suffix = ""
    power = int(10**(n_digit - 1))
    while i < power:
        suffix += "0"
        power //= 10
    while power > 0:
        suffix += str(i // power)
        i %= power
        power //= 10
    return suffix

In [4]:
def mag_to_flux(mu:float, mu_0=0) -> float:
    """
    Conversion from mag to a flux according to the Weber—Fechner law
    """
    return 10**((mu_0 - mu) / 2.5)


def flux_to_mag(f:float, mu_0=0) -> float:
    """
    Conversion from flux to mag according to the Weber—Fechner law
    """
    return mu_0 - 2.5 * math.log10(f)


def nu_n(n: float) -> float:
    """
    Analytical fit for the nu_{n} Sersic parameter 
    """
    return 2 * n - 1 / 3 + 4 / (405 * n) + 46 / (25515 * n**2)


def sersic_Lt_to_I0(Lt: float, n: float, re: float) -> float:
    """
    Calculation of surface brightness I0 from the total luminosity of the Sersic profile
    """
    nu = nu_n(n)
    return Lt * nu**(2 * n) / (2 * np.pi * n * math.gamma(2 * n) * re**2)


def sersic_mu_to_Ie(mu: float, n: float, re: float, mu_0=0) -> float
    """
    Calculation effective surface brightness Ie from the profile apparent magnitude
    """
    Lt = mag_to_flux(mu, mu_0)
    I0 = sersic_Lt_to_I0(Lt, n, re)
    nu = nu_n(n)
    Ie = I0 * math.exp(-nu)
    return Ie

In [None]:
def normalize(image, mult_factor: float):
    """
    Normalizes a single-channel image specified as a numpy array
    Creates a 3-channel uint8 image
    :param image: image in the form of a numpy float32 array
    :param mult_factor: the multiplier by which the entire image is multiplied before normalization begins, 
    """
    image = mult_factor * image
    
    # filtering out negative pixels
    positive_mask = image >= 0
    filtered_image = positive_mask * image + 1
    
    # convert to a logarithmic scale
    log_image = np.log(filtered_image)
    normalize_channel = cv2.normalize(log_image, None, alpha=0, beta=255, 
                                      norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
    
    normalize_image = cv2.merge((normalize_channel, normalize_channel, normalize_channel))
    return normalize_image

In [None]:
def is_collision(new_annotation: list, old_annotations: list) -> bool:
    """
    Checks by annotations that the new profile does not overlap with the old ones
    Annotation is a list of [x_0, y_0, re], where (x_0, y_0) is a profile center,
    re is an effective radius
    """
    x_0 = new_annotation[0]
    y_0 = new_annotation[1]
    re = new_annotation[2]
    for annotation in old_annotations:
        if (annotation[0] - x_0)**2 + (annotation[1] - y_0)**2 < (max(re, annotation[2]))**2:
            return True
    return False

<a id="ETypeGalaxies"></a>
#### Generation of frames with random galaxies of early types using 2D Sersic profile with Gaussian noise background

In [55]:
from astropy.io import fits as pyfits
from astropy.modeling.models import Sersic2D
from photutils.datasets import make_noise_image


class SersicGenerator:
    """
    Generates a random Sersiс profile according to a given distribution.
    The position angle theta is always uniformly distributed in the range of [0, pi]
    The ellipticity parameter (ellip = 1 - b/a) is uniformly distributed in the interval
    [ellip_min, ellip_max]
    The location of the profile center is randomly selected within the range [0, x_max] and [0, y_max]. 
    x_max and y_max correspond to the integer size of the image in pixels.
    The effective radius, the surface brightness at the effective radius, and the Sersic index are selected
    from a mixture of distributions. 
    Each component of the mixture is determined by the probability of choosing this component,
    intervals of uniform distribution of visible magnitude [m_min, m_max], Sersic index
    [n_min, n_max] and effective radius [re_min, re_max].
    The generated image presents a numpy array.
    """
    def __init__(self, config: dict):
        self.ellip_min = config["ellip_min"]
        self.ellip_max = config["ellip_max"]
        self.x_max = config["x_max"]
        self.y_max = config["y_max"]
        self.x, self.y = np.meshgrid(np.arange(self.x_max), np.arange(self.y_max))
        self.p = []
        self.m_min = []
        self.m_max = []
        self.n_min = []
        self.n_max = []
        self.re_min = []
        self.re_max = []
        for elem in config["mixture"]:
            self.p.append(elem["p"])
            self.m_min.append(elem["m_min"])
            self.m_max.append(elem["m_max"])
            self.n_min.append(elem["n_min"])
            self.n_max.append(elem["n_max"])
            self.re_min.append(elem["re_min"])
            self.re_max.append(elem["re_max"])
        if sum(self.p) < 1.0 - 1.0e-9 or sum(self.p) > 1.0 + 1.0e-9:
            raise ValueError(f"Mixture is incorrect, because probability sum ({sum(self.p)}) is not equal to 1")
            
    def get_profile(self, annotate=False):
        """
        Generates a random Sersic profile
        If annotate==True, it also returns the list [x_0, y_0, re] where (x_0, y_0) is a profile center,
        re is an effective radius
        """
        theta = np.random.uniform(0, np.pi)
        ellip = np.random.uniform(self.ellip_min, self.ellip_max)
        x_0 = np.random.uniform(0, self.x_max)
        y_0 = np.random.uniform(0, self.y_max)
        
        mix_id = np.random.choice(len(self.p), p=self.p)
        m = np.random.uniform(self.m_min[mix_id], self.m_max[mix_id])
        n = np.random.uniform(self.n_min[mix_id], self.n_max[mix_id])
        re = np.random.uniform(self.re_min[mix_id], self.re_max[mix_id])
        
        Ie = sersic_mu_to_Ie(m, n, re)
        mod = Sersic2D(amplitude=Ie, r_eff=re, n=n, x_0=x_0, y_0=y_0, ellip=ellip, theta=theta)
        if annotate:
            return mod(self.x, self.y), [x_0, y_0, re]
        return mod(self.x, self.y), None
    
    
def make_field(config_path: str, n: int, noise_level: float, output_path: str, seed=43):
    """
    Creates an image with n random galaxies of early types and Gaussian noise
    :param config_path: the path to the json configuration for SersicGenerator
    :param noise_level: standard deviation of Gaussian noise
    :param output_path: the file where the image will be stored.
    """
    config = read_json(config_path)
    sersic_generator = SersicGenerator(config)
    
    np.random.seed(seed)
    result = None
    for _ in range(n):
        img, _ = sersic_generator.get_profile()
        if result is not None:
            result += img
        else:
            result = img
            
    error = make_noise_image(result.shape, mean=0.0, stddev=noise_level, seed=123)
    result += error
        
    save_fits(result, output_path)

**Example of a configuration for a SersicGenerator**
```json
{
   "ellip_min":0.0,
   "ellip_max":0.6,
   "x_max":1000,
   "y_max":1000,
   "mixture":[
      {
         "p":0.02,
         "m_min":11,
         "m_max":14,
         "n_min":3,
         "n_max":5,
         "re_min":25,
         "re_max":50
      },
      {
         "p":0.23,
         "m_min":14,
         "m_max":16,
         "n_min":1.5,
         "n_max":3,
         "re_min":10,
         "re_max":25
      },
      {
         "p":0.75,
         "m_min":16,
         "m_max":19,
         "n_min":0.8,
         "n_max":1.5,
         "re_min":5,
         "re_max":10
      }
   ]
}
```

<a id="PSGalaxies"></a>
#### Generation of frames with random galaxies with a polar structure using two Sersic profiles with Gaussian noise background.

In [15]:
class PSGalaxyGenerator:
    """
    Generates a random galaxy with a polar structure according to a given distribution.
    The host galaxy is generated using a single Sersic according to the same principles 
    as in the SersicGenerator class.
    The polar structure is created using the second Sersic profile, some of whose parameters are linked 
    to those of the host galaxy.
    The ellipticity parameter is set independently, 
    the position of the center of the profile coincides with the center of the host galaxy,
    the apparent magnitude m is given by the apparent magnitude of the host and the correction from the uniform
    distributions [c_m_min, c_m_max]: m_ps = m + c_m, 
    the Sersic index is selected independently, 
    the effective radius depends on the effective radius of the host through a uniformly distributed coefficient 
    c_re: ps_re = c_re * re, 
    the positional angle depends on the positional angle of the host theta_ps = theta + phi, 
    where phi is uniformly distributed.
    The generated image presents a numpy array.
    """
    def __init__(self, host_config: dict, ps_config: dict):
        self.ellip_min = host_config["ellip_min"]
        self.ellip_max = host_config["ellip_max"]
        self.x_max = host_config["x_max"]
        self.y_max = host_config["y_max"]
        self.x, self.y = np.meshgrid(np.arange(self.x_max), np.arange(self.y_max))
        self.p = []
        self.m_min = []
        self.m_max = []
        self.n_m = []
        self.n_sd = []
        self.re_min = []
        self.re_max = []
        for elem in host_config["mixture"]:
            self.p.append(elem["p"])
            self.m_min.append(elem["m_min"])
            self.m_max.append(elem["m_max"])
            self.n_m.append(elem["n_m"])
            self.n_sd.append(elem["n_sd"])
            self.re_min.append(elem["re_min"])
            self.re_max.append(elem["re_max"])
        if sum(self.p) < 1.0 - 1.0e-9 or sum(self.p) > 1.0 + 1.0e-9:
            raise ValueError(f"Mixture is incorrect, because probability sum ({sum(self.p)}) is not equal to 1")
            
        self.ps_ellip_min = ps_config["ellip_min"]
        self.ps_ellip_max = ps_config["ellip_max"]
        self.c_m_min = ps_config["c_m_min"]
        self.c_m_max = ps_config["c_m_max"]
        self.ps_n_min = ps_config["n_min"]
        self.ps_n_max = ps_config["n_max"]
        self.c_re_min = ps_config["c_re_min"]
        self.c_re_max = ps_config["c_re_max"]
        self.phi_min = ps_config["phi_min"]
        self.phi_max = ps_config["phi_max"]
            
            
    def get_profile(self):
        """
        Generates a random galaxy with a polar structure using two Sersic profiles
        """
        # The host galaxy is generated first
        theta = np.random.uniform(0, np.pi)
        ellip = np.random.uniform(self.ellip_min, self.ellip_max)
        x_0 = np.random.uniform(0, self.x_max)
        y_0 = np.random.uniform(0, self.y_max)
        
        mix_id = np.random.choice(len(self.p), p=self.p)
        m = np.random.uniform(self.m_min[mix_id], self.m_max[mix_id])
        n = np.random.normal(self.n_m[mix_id], self.n_sd[mix_id])
        re = np.random.uniform(self.re_min[mix_id], self.re_max[mix_id])
        
        Ie = sersic_mu_to_Ie(m, n, re)
        host_mod = Sersic2D(amplitude=Ie, r_eff=re, n=n, x_0=x_0, y_0=y_0, ellip=ellip, theta=theta)
        print(f"Host: m:{m:.4f}, re:{re:.4f}, n:{n:.4f}, ellip:{ellip:.4f}, theta:{theta:.4f}, x,y:{x_0:.4f},{y_0:.4f}")
        host = host_mod(self.x, self.y)
        
        # Then the polar structure is generated
        ps_theta = theta + np.random.uniform(self.phi_min, self.phi_max)
        ps_ellip = np.random.uniform(self.ps_ellip_min, self.ps_ellip_max)
        ps_m = m + np.random.uniform(self.c_m_min, self.c_m_max)
        ps_n = np.random.uniform(self.ps_n_min, self.ps_n_max) 
        ps_re = re * np.random.uniform(self.c_re_min, self.c_re_max)
        
        ps_Ie = sersic_mu_to_Ie(ps_m, ps_n, ps_re)
        ps_mod = Sersic2D(amplitude=ps_Ie, r_eff=ps_re, n=ps_n, x_0=x_0, y_0=y_0, ellip=ps_ellip, theta=ps_theta)
        print(f"PS:   m:{ps_m:.4f}, re:{ps_re:.4f}, n:{ps_n:.4f}, ellip:{ps_ellip:.4f}, theta:{ps_theta:.4f}, x,y:{x_0:.4f},{y_0:.4f}")
        ps = ps_mod(self.x, self.y)
        
        return host + ps

**Example of a polar structure configuration (ps_config) for a PSGalaxyGenerator**
```json
{
   "ellip_min":0.6,
   "ellip_max":0.9,
   "c_m_min":1.3,
   "c_m_max":0.3,
   "n_min":0.1,
   "n_max":0.5,
   "c_re_min":1,
   "c_re_max":3,
   "phi_min":1.22,
   "phi_max":1.92	
}
```

<a id="YOLOField"></a>
#### Generation of frames with early-type galaxies, disk galaxies and galaxies with a polar structure, superimposed on Gaussian noise

In [146]:
class LateGalaxyGenerator:
    """
    Generates a random disk galaxy according to a given distribution of parameters.
    The free uniformly distributed parameters are the location of the center of the galaxy disk
    (the center of the bulge coincides with the center of the disk), 
    the positional angle of the disk theta is uniformly distributed from 0 to pi,
    the bulge angle is similarly distributed
    The disk is determined by its apparent magnitude m, ellipticity ellipsis, Sersik index n,
    and effective radius re.
    The bulge is determined by independent distributions of ellipticity and the Sersik index and corrections for
    the apparent magnitude b_m = m + c_m and the effective radius b_re = re * c_re
    """
    def __init__(self, config: dict):
        # common parameters
        self.x_max = config["x_max"]
        self.y_max = config["y_max"]
        self.x, self.y = np.meshgrid(np.arange(self.x_max), np.arange(self.y_max))
        
        # disk parameters
        self.m_min = config["m_min"]
        self.m_max = config["m_max"]
        self.re_min = config["re_min"]
        self.re_max = config["re_max"]
        self.n_min = config["n_min"]
        self.n_max = config["n_max"]
        self.ellip_min = config["ellip_min"]
        self.ellip_max = config["ellip_max"]
        
        # bulge parameters
        self.c_m_min = config["c_m_min"]
        self.c_m_max = config["c_m_max"]
        self.c_re_mean = config["c_re_mean"]
        self.c_re_sd = config["c_re_sd"]
        self.b_n_min = config["b_n_min"]
        self.b_n_max = config["b_n_max"]
        self.b_ellip_min = config["b_ellip_min"]
        self.b_ellip_max = config["b_ellip_max"]
        
    def get_profile(self, annotate=False):
        """
        Generates a random disk galaxy
        If annotate==True, it also returns the list [x_0, y_0, re] where (x_0, y_0) is a profile center,
        re is an effective radius
        """
        x_0 = np.random.uniform(0, self.x_max)
        y_0 = np.random.uniform(0, self.y_max)
        
        # disk parameters
        m = np.random.uniform(self.m_min, self.m_max)
        re = np.random.uniform(self.re_min, self.re_max)
        n = np.random.uniform(self.n_min, self.n_max)
        ellip = np.random.uniform(self.ellip_min, self.ellip_max)
        theta = np.random.uniform(0, np.pi)
        Ie = sersic_mu_to_Ie(m, n, re)
        mod = Sersic2D(amplitude=Ie, r_eff=re, n=n, x_0=x_0, y_0=y_0, ellip=ellip, theta=theta)
        
        # bulge parameters
        b_m = m + np.random.uniform(self.c_m_min, self.c_m_max)
        b_re = re * np.random.normal(self.c_re_mean, self.c_re_sd)
        b_n = np.random.uniform(self.b_n_min, self.b_n_max)
        b_ellip = np.random.uniform(self.b_ellip_min, self.b_ellip_max)
        b_Ie = sersic_mu_to_Ie(b_m, b_n, b_re)
        b_mod = Sersic2D(amplitude=b_Ie, r_eff=b_re, n=b_n, x_0=x_0, y_0=y_0, ellip=b_ellip, theta=theta)
        
        if annotate:
            return mod(self.x, self.y) + b_mod(self.x, self.y), [x_0, y_0, re]
        return mod(self.x, self.y) + b_mod(self.x, self.y), None

**Example of a disk galaxy configuration (ps_config) for a LateGalaxyGenerator**
```json
{
   "x_max":416,
   "y_max":416,
   "m_min":14,
   "m_max":16,
   "re_min":10,
   "re_max":20,
   "n_min":0.9,
   "n_max":1.1,
   "ellip_min":0.0,
   "ellip_max":0.7,
   "c_m_min":0.1,
   "c_m_max":1.1,
   "c_re_mean":0.4,
   "c_re_sd":0.2,
   "b_n_min":1.0,
   "b_n_max":3.0,
   "b_ellip_min":0.0,
   "b_ellip_max":0.5
}
```

In [143]:
class SpecialPSGalaxyGenerator:
    """
    Generates a random galaxy with a polar structure according to a given distribution of parameters.
    The free uniformly distributed parameters are the location of the center of the galaxy
    (the center of the polar ring coincides with the center of the host galaxy), 
    its positional angle theta (from 0 to pi) 
    The host galaxy is defined by its apparent magnitude, ellipticity, Sersic index, and effective radius. 
    The polar ring is defined by the same set of parameters except for the positional angle:
    the phi phase is used instead: ps_theta = theta + phi.
    The parameter distribution is a mixture of distributions near some optimal points,
    each component of the distribution is set separately.
    """
    def __init__(self, config: dict):
         # common parameters
        self.x_max = config["x_max"]
        self.y_max = config["y_max"]
        self.x, self.y = np.meshgrid(np.arange(self.x_max), np.arange(self.y_max))
        # probabilities for mixture
        self.p = []
        # host parameters
        self.m_min = []
        self.m_max = []
        self.n_min = []
        self.n_max = []
        self.re_min = []
        self.re_max = []
        self.ellip_min = []
        self.ellip_max = []
        # polar structure parameters: 
        self.ps_m_min = []
        self.ps_m_max = []
        self.ps_n_min = []
        self.ps_n_max = []
        self.ps_re_min = []
        self.ps_re_max = []
        self.ps_ellip_min = []
        self.ps_ellip_max = []
        self.ps_phi_min = []
        self.ps_phi_max = []
        for elem in config["mixture"]:
            # host:
            self.p.append(elem["p"])
            self.m_min.append(elem["m_min"])
            self.m_max.append(elem["m_max"])
            self.n_min.append(elem["n_min"])
            self.n_max.append(elem["n_max"])
            self.re_min.append(elem["re_min"])
            self.re_max.append(elem["re_max"])
            self.ellip_min.append(elem["ellip_min"])
            self.ellip_max.append(elem["ellip_max"])
            # ps:
            self.ps_m_min.append(elem["ps_m_min"])
            self.ps_m_max.append(elem["ps_m_max"])
            self.ps_n_min.append(elem["ps_n_min"])
            self.ps_n_max.append(elem["ps_n_max"])
            self.ps_re_min.append(elem["ps_re_min"])
            self.ps_re_max.append(elem["ps_re_max"])
            self.ps_ellip_min.append(elem["ps_ellip_min"])
            self.ps_ellip_max.append(elem["ps_ellip_max"])
            self.ps_phi_min.append(elem["ps_phi_min"])
            self.ps_phi_max.append(elem["ps_phi_max"])
        if sum(self.p) < 1.0 - 1.0e-9 or sum(self.p) > 1.0 + 1.0e-9:
            raise ValueError(f"Mixture is incorrect, because probability sum ({sum(self.p)}) is not equal to 1")
    
    def get_profile(self, annotate=False):
        """
        Generates a random galaxy with a polar structure using two Sersic profiles
        If annotate==True, it also returns the list [x_0, y_0, re] where (x_0, y_0) is a profile center,
        re is an effective radius
        """
        # choosing a center
        x_0 = np.random.uniform(0, self.x_max)
        y_0 = np.random.uniform(0, self.y_max)
        
        # choosing a mixture
        mix_id = np.random.choice(len(self.p), p=self.p)
        
        # generating a host
        m = np.random.uniform(self.m_min[mix_id], self.m_max[mix_id])
        re = np.random.uniform(self.re_min[mix_id], self.re_max[mix_id])
        n = np.random.uniform(self.n_min[mix_id], self.n_max[mix_id])
        ellip = np.random.uniform(self.ellip_min[mix_id], self.ellip_max[mix_id])
        theta = np.random.uniform(0, np.pi)
        Ie = sersic_mu_to_Ie(m, n, re)
        host_mod = Sersic2D(amplitude=Ie, r_eff=re, n=n, x_0=x_0, y_0=y_0, ellip=ellip, theta=theta)
        host = host_mod(self.x, self.y)
        
        # generating a polar structure
        ps_m = np.random.uniform(self.ps_m_min[mix_id], self.ps_m_max[mix_id])
        ps_re = np.random.uniform(self.ps_re_min[mix_id], self.ps_re_max[mix_id])
        ps_n = np.random.uniform(self.ps_n_min[mix_id], self.ps_n_max[mix_id])
        ps_ellip = np.random.uniform(self.ps_ellip_min[mix_id], self.ps_ellip_max[mix_id])
        ps_theta = theta + np.random.uniform(self.ps_phi_min[mix_id], self.ps_phi_max[mix_id])
        ps_Ie = sersic_mu_to_Ie(ps_m, ps_n, ps_re)
        ps_mod = Sersic2D(amplitude=ps_Ie, r_eff=ps_re, n=ps_n, x_0=x_0, y_0=y_0, ellip=ps_ellip, theta=ps_theta)
        ps = ps_mod(self.x, self.y)
        if annotate:
            return (host + ps), [x_0, y_0, ps_re]
        return (host + ps), None    

    
class YOLOFieldGenerator:
    """
    Generates training images with early-type galaxies, disk galaxies and galaxies with a polar structure 
    superimposed on Gaussian noise.
    Galaxies of early types are generated using SersicGenerator.
    Disk galaxies are generated using LateGalaxyGenerator.
    Galaxies with polar structures are generated using SpecialPSGalaxyGenerator.
    """
    def __init__(self, common_config:dict, early_config: dict, disk_config: dict, ps_early_config: dict):
        self.n_min = common_config["n_min"]
        self.n_max = common_config["n_max"]
        self.d_n_min = common_config["d_n_min"]
        self.d_n_max = common_config["d_n_max"]
        self.ps_ns = common_config["ps_ns"]
        self.ps_ps = common_config["ps_ps"]
        self.noise_level = common_config["noise_level"]
        
        self.early_generator = SersicGenerator(early_config)
        self.disk_generator = LateGalaxyGenerator(disk_config)
        self.ps_early_generator = SpecialPSGalaxyGenerator(ps_early_config)
        
    def _annotations_to_string(self, galaxy_annotations: list) -> str:
        """
        From the list of extended annotations of galaxies makes a string 
        The extended Galaxy Annotation is a list of 4 elements: [x_0, y_0, re(или ps_re), label(0 или 1)].
        The output string looks like 
        " ".join([f"x_min_1,y_min_1,x_max_1,y_max_1,label_1", ... f"x_min_k,y_min_k,x_max_k,y_max_k,label_k"].\
        This annotation string is used to train YOLO detection model 
        """
        str_annotations = []
        for annotation in galaxy_annotations:
            x_0 = int(annotation[0])
            y_0 = int(annotation[1])
            box_size = int(3.5 * annotation[2])
            x_min = max(0, x_0 - box_size // 2)
            x_max = min(self.early_generator.x_max - 1, x_0 + box_size // 2)
            y_min = max(0, y_0 - box_size // 2)
            y_max = min(self.early_generator.y_max - 1, y_0 + box_size // 2)
            str_annotations.append(f"{x_min},{y_min},{x_max},{y_max},{annotation[3]}")
        return " ".join(str_annotations)
            
    def get_field(self, annotate=False, collision=True):
        """
        Generates an image with early-type and dsidk galaxies and galaxies with polar structures 
        superimposed on Gaussian noise
        If annotate==True, it also returns a string annotation of the galaxies of the field.
        Each galaxy is annotated with five numbers: x_min, y_min, x_max, y_max, and label. 
        The label is 1 if the galaxy has a polar structure, and 2 for disk galaxies and 0 otherwise
        If collision=False, then the generated galaxies will not overlap (up to r_e).
        """ 
        n = np.random.randint(self.n_min, self.n_max + 1)
        d_n = np.random.randint(self.d_n_min, self.d_n_max + 1)
        ps_n = np.random.choice(self.ps_ns, p=self.ps_ps)
        
        result = None
        
        # first, we generate galaxies of early types and apply noise
        galaxy_annotations = []
        for _ in range(n):
            img, annotation = self.early_generator.get_profile(annotate=True)
            if not collision:
                while is_collision(annotation, galaxy_annotations):
                    img, annotation = self.early_generator.get_profile(annotate=True)
            annotation.append(0)
            galaxy_annotations.append(annotation)
            if result is not None:
                result += img
            else:
                result = img
        error = make_noise_image(result.shape, mean=0.0, stddev=self.noise_level, 
                                 seed=np.random.randint(100))
        result += error
        
        # generating disk galaxies
        for _ in range(d_n):
            img, annotation = self.disk_generator.get_profile(annotate=True)
            if not collision:
                while is_collision(annotation, galaxy_annotations):
                    img, annotation = self.disk_generator.get_profile(annotate=True)
            annotation.append(2)
            galaxy_annotations.append(annotation)
            if result is not None:
                result += img
            else:
                result = img
        
        # generating galaxies with polar rings
        for _ in range(ps_n):
            img, annotation = self.ps_early_generator.get_profile(annotate=True)
            if not collision:
                while is_collision(annotation, galaxy_annotations):
                    img, annotation = self.ps_early_generator.get_profile(annotate=True)
            annotation.append(1)
            galaxy_annotations.append(annotation)
            result += img
        
        # if an annotation is needed, then convert it to a string
        if annotate:
            return result, self._annotations_to_string(galaxy_annotations)
            
        return result, None
    
    
def generate_dataset(n_field: int, common_config_path: str, early_config_path: str, 
                     disk_config_path: str, ps_early_config_path: str, 
                     png_dir: str, fits_dir: str, annotation_path: str, base_path: str, 
                     seed: int, collision=False, mult_factor=1.0e12):
    """
    Generates a dataset of .png images with early-type galaxies, disk galaxies and galaxies with polar rings
    with superimposed white noise
    :param n_field: the number of images in the dataset
    :param common_config_path: the path to the general configuration
    :param early_config_path: the path to the configuration defining early-type galaxies
    :param disk_config_path: the path to the configuration defining disk galaxies
    :param ps_early_config_path: the path to the configuration defining galaxies with polar rings
    :param png_dir: the directory where the .png images will be saved
    :param fits_dir: the directory where the fits images will be saved, if fits_dir==None, 
                     then the fits images will not be recorded
    :param annotation_path: the path where the file with the annotation of all fields will be written
    :param base_path: the path to the directory where the images for training will be stored 
                      (relevant for the system in which the training will take place)
    :param seed: the grain of the random number sensor, initialized before generation
    :param collision: if False, then galaxies are generated that do not overlap with each other up to re
    :param mult_factoe: the multiplier by which the synthetic image will be multiplied 
                        before preprocessing is performed to create png files
    """
    # create the necessary directories, if there are none
    for dir_path in (png_dir, fits_dir):
        if dir_path:
            if not exists(dir_path):
                Path(dir_path).mkdir(parents=True, exist_ok=True)
    # initializing the generators
    common_config = read_json(common_config_path)
    early_config = read_json(early_config_path)
    disk_config = read_json(disk_config_path)
    ps_early_config = read_json(ps_early_config_path)
    
    field_generator = YOLOFieldGenerator(common_config, early_config, disk_config, ps_early_config)
    
    np.random.seed(seed)
    field_annotations = []
    # image generation
    for i in range(n_field):
        field, annotation = field_generator.get_field(annotate=True, collision=collision)
    
        # saving fits
        if fits_dir: 
            path = join(fits_dir, basename(fits_dir)) + "_" + str(int_to_suffix(i, len(str(n_field)))) + ".fits" 
            save_fits(field, path)
        
        # creating and saving .png images
        normalize_image = normalize(field, mult_factor)
        path = join(png_dir, basename(png_dir)) + "_" + str(int_to_suffix(i, len(str(n_field)))) + ".png" 
        cv2.imwrite(path, normalize_image)
        
        # forming a large string with annotations
        field_annotations.append(join(base_path, basename(path)) + " " + annotation)
    
    # writing annotations
    with open(annotation_path, "w") as wf:
        wf.write("\n".join(field_annotations))

**Example of a common configuration for a YOLOFieldGenerator**
```json
{
   "n_min":2,
   "n_max":8,
   "ps_ns":[
      0,
      1
   ],
   "ps_ps":[
      0.85,
      0.15
   ],
   "noise_level":5e-12
}
```

In [147]:
# generating a training sample of 800 images

# n_field = 800
# common_config_path = "./train_configs/train_common_1.json"
# early_config_path = "./train_configs/train_sersic_mixture_1.json"
# ps_early_config_path = "./train_configs/train_ps_mixture_1.json"
# png_dir = "./galaxies_train_1/galaxies_train_1_png"
# fits_dir = None
# annotation_path = "./galaxies_train_1/galaxies_train_1.txt"
# base_path = "/content/TensorFlow-2.x-YOLOv3/galaxies/galaxies_train_1/galaxies_train_1_png"
# generate_dataset(n_field, common_config_path, early_config_path, ps_early_config_path, 
#                  png_dir, fits_dir, annotation_path, base_path, seed=43)

# n_field = 800
# common_config_path = "./train_configs/train_common_2.json"
# early_config_path = "./train_configs/train_sersic_mixture_2.json"
# ps_early_config_path = "./train_configs/train_ps_mixture_2.json"
# png_dir = "./galaxies_train_2/galaxies_train_2_png"
# fits_dir = None
# annotation_path = "./galaxies_train_2/galaxies_train_2.txt"
# base_path = "/content/TensorFlow-2.x-YOLOv3/galaxies/galaxies_train_2/galaxies_train_2_png"
# generate_dataset(n_field, common_config_path, early_config_path, ps_early_config_path, 
#                  png_dir, fits_dir, annotation_path, base_path, seed=43)

# generation of a training sample of 1200 images with the addition of disk galaxies

# n_field = 1200
# common_config_path = "./train_configs/train_common_3.json"
# early_config_path = "./train_configs/train_sersic_mixture_3.json"
# disk_config_path = "./train_configs/train_disk_3.json"
# ps_early_config_path = "./train_configs/train_ps_mixture_3.json"
# png_dir = "./galaxies_train_3/galaxies_train_3_png"
# fits_dir = None
# annotation_path = "./galaxies_train_3/galaxies_train_3.txt"
# base_path = "/content/TensorFlow-2.x-YOLOv3/galaxies/galaxies_train_3/galaxies_train_3_png"
# generate_dataset(n_field, common_config_path, early_config_path, disk_config_path, ps_early_config_path, 
#                  png_dir, fits_dir, annotation_path, base_path, seed=43)

In [149]:
# generating a validation sample of 200 images

# n_field = 200
# common_config_path = "./train_configs/train_common_1.json"
# early_config_path = "./train_configs/train_sersic_mixture_1.json"
# ps_early_config_path = "./train_configs/train_ps_mixture_1.json"
# png_dir = "./galaxies_test_1/galaxies_test_1_png"
# fits_dir = None
# annotation_path = "./galaxies_test_1/galaxies_test_1.txt"
# base_path = "/content/TensorFlow-2.x-YOLOv3/galaxies/galaxies_test_1/galaxies_test_1_png"
# generate_dataset(n_field, common_config_path, early_config_path, ps_early_config_path, 
#                  png_dir, fits_dir, annotation_path, base_path, seed=51)

# n_field = 200
# common_config_path = "./train_configs/train_common_2.json"
# early_config_path = "./train_configs/train_sersic_mixture_2.json"
# ps_early_config_path = "./train_configs/train_ps_mixture_2.json"
# png_dir = "./galaxies_test_2/galaxies_test_2_png"
# fits_dir = None
# annotation_path = "./galaxies_test_2/galaxies_test_2.txt"
# base_path = "/content/TensorFlow-2.x-YOLOv3/galaxies/galaxies_test_2/galaxies_test_2_png"
# generate_dataset(n_field, common_config_path, early_config_path, ps_early_config_path, 
#                  png_dir, fits_dir, annotation_path, base_path, seed=51)

# generation of a validation sample of 400 images with the addition of disk galaxies
# n_field = 400
# common_config_path = "./train_configs/train_common_3.json"
# early_config_path = "./train_configs/train_sersic_mixture_3.json"
# disk_config_path = "./train_configs/train_disk_3.json"
# ps_early_config_path = "./train_configs/train_ps_mixture_3.json"
# png_dir = "./galaxies_test_3/galaxies_test_3_png"
# fits_dir = None
# annotation_path = "./galaxies_test_3/galaxies_test_3.txt"
# base_path = "/content/TensorFlow-2.x-YOLOv3/galaxies/galaxies_test_3/galaxies_test_3_png"
# generate_dataset(n_field, common_config_path, early_config_path, disk_config_path, ps_early_config_path, 
#                  png_dir, fits_dir, annotation_path, base_path, seed=51)

In [150]:
# generating a demo sample of 15 images 1500x1500

# n_field = 15
# common_config_path = "./train_configs/demo_common_1.json"
# early_config_path = "./train_configs/demo_sersic_mixture_1.json"
# ps_early_config_path = "./train_configs/demo_ps_mixture_1.json"
# png_dir = "./galaxies_demo_1/galaxies_demo_1_png"
# fits_dir = "./galaxies_demo_1/galaxies_demo_1_fits"
# annotation_path = "./galaxies_demo_1/galaxies_demo_1.txt"
# base_path = "/content/TensorFlow-2.x-YOLOv3/galaxies/galaxies_demo_1/galaxies_demo_1_png"
# generate_dataset(n_field, common_config_path, early_config_path, ps_early_config_path, 
#                  png_dir, fits_dir, annotation_path, base_path, seed=51)

# generating a demo sample of 15 images 800x800
# n_field = 15
# common_config_path = "./train_configs/demo_common_2.json"
# early_config_path = "./train_configs/demo_sersic_mixture_2.json"
# ps_early_config_path = "./train_configs/demo_ps_mixture_2.json"
# png_dir = "./galaxies_demo_2/galaxies_demo_2_png"
# fits_dir = "./galaxies_demo_2/galaxies_demo_2_fits"
# annotation_path = "./galaxies_demo_2/galaxies_demo_2.txt"
# base_path = "/content/TensorFlow-2.x-YOLOv3/galaxies/galaxies_demo_2/galaxies_demo_2_png"
# generate_dataset(n_field, common_config_path, early_config_path, ps_early_config_path, 
#                  png_dir, fits_dir, annotation_path, base_path, seed=51)

# generation of a demo sample of 15 800x800 fields with the addition of disk galaxies
n_field = 15
common_config_path = "./train_configs/demo_common_3.json"
early_config_path = "./train_configs/demo_sersic_mixture_3.json"
disk_config_path = "./train_configs/demo_disk_3.json"
ps_early_config_path = "./train_configs/demo_ps_mixture_3.json"
png_dir = "./galaxies_demo_3/galaxies_demo_3_png"
fits_dir = "./galaxies_demo_3/galaxies_demo_3_fits"
annotation_path = "./galaxies_demo_3/galaxies_demo_3.txt"
base_path = "/content/TensorFlow-2.x-YOLOv3/galaxies/galaxies_demo_3/galaxies_demo_3_png"
generate_dataset(n_field, common_config_path, early_config_path, disk_config_path, ps_early_config_path, 
                 png_dir, fits_dir, annotation_path, base_path, seed=51)