# Spracovanie a analýza dát galaxií s využitím hlbokého učenia

## Priprava pracovného prostredia na spracovanie dát

Inštalácia knižníc

In [None]:
!pip install opencv-python 
!pip install python_utils
!pip install python-csv
!pip install pandas
!pip install jsonlib
!pip install aplpy

import potrebných knižníc

In [None]:
import pandas as pd
import os
import json
import csv
import matplotlib.pyplot as plt
import aplpy
import cv2

## Príprava dát 

Konvertovanie FIT súborov do jpg súborov

In [None]:
path = './data/'
destination = './pictures2/'

def convert_data():
    i = 0
    
    for index, file in enumerate(os.listdir(path)):
        file_name = file.split(".")[0]

        if i < 12200:
            i += 1
            continue
        # if file_name + '.jpg' in os.listdir(destination):
            # continue
        

        gc = aplpy.FITSFigure(path + file).show_grayscale(invert=False, stretch='power', exponent=0.5)

        plt.savefig(destination + '{}.jpg'.format(file_name), format='jpg', bbox_inches="tight")


Vytvorenie textových súborov so súradnicami pre neurónovú sieť YOLO

In [None]:
data_path = "detekcia-edge-on-galaxii-classifications.csv"
data = pd.read_csv(data_path)
# os.makedirs("txt", exist_ok=True)
destination = f"{os.getcwd()}/yolo/galaxies/labels"

null_size_data = []

for i in range(len(data)):
    tasks = json.loads(data.iloc[i, 11])
    is_galaxy = tasks[0]["value"]
    if is_galaxy != "Áno":
        continue

    annotations = tasks[1]["value"]
    subject_data = json.loads(data.iloc[i, 12])
    metadata = json.loads(data.iloc[i, 10])
    subject_id = str(data.iloc[i,13])
    try:
        image_name = subject_data[subject_id]["Filename"].split(".")[0]
        image_data = metadata["subject_dimensions"][0]
        image_width = image_data["naturalWidth"]
        image_height = image_data["naturalHeight"]
    except TypeError:
        null_size_data.append(image_name)

    with open(os.path.join(destination, f"{image_name}.txt"), mode='w') as txt_file:
        for annotation in annotations:
            x = annotation["x"]
            y = annotation["y"]
            width = annotation["width"]
            height = annotation["height"]

            fieldnames1 = [0, round((x+(width/2))/image_width, 5), round((y+(height/2))/image_height, 5), round(width/image_width, 5), round(height/image_height,5)]

            writer = csv.DictWriter(txt_file, fieldnames=fieldnames1, delimiter=' ')
            writer.writeheader()
            print(f"Just made normalized txt file for {image_name}")

print(f"Corrupted size data: {len(null_size_data)}")
print(null_size_data)

## Treshold

Vystrihnutie jednotlyvých galaxií pomocou súradníc

In [None]:
# paths to folder with images, labels and path where you want to save your cropped images
#detected images had a red rectangle when cropped, use the images you used for detecting
image_path = r'./testovacia'
labels_path = r'./yolov5/runs/detect/exp36/labels'
path = r'./exp36_cropped_galaxies'


def crop_image(coords: str, img: np.ndarray, image_name: str):
    #height and width of full image
    dh, dw, _ = img.shape

    #translating normalized coordinations for cutting the image
    box = coords
    # outputed yolov5 labels gives 6 values 6th is confidence
    class_id, x_center, y_center, w, h, _ = box.strip().split()
    x_center, y_center, w, h = float(x_center), float(y_center), float(w), float(h)
    x_center = round(x_center * dw)
    y_center = round(y_center * dh)
    w = round(w * dw)
    h = round(h * dh)
    x = round(x_center - w / 2)
    y = round(y_center - h / 2)

    cropped_image = img[y:y + h, x:x + w]

    # Saving the image
    cv2.imwrite(os.path.join(path, image_name), cropped_image)


def crop_and_save():
    labels = os.listdir(labels_path)
    print("Processing images...")
    #finfing image for each label
    for label in labels:
        label_name = label.split('.')[0]

        img = cv2.imread(fr"{image_path}/{label_name}.jpg")

        file = open(fr"{labels_path}/{label}", 'r')
        lines = file.readlines()
        #sending each coord to crop_image
        for i, line in enumerate(lines):
            crop_image(line, img, f"{label_name} - {i}.jpg")


crop_and_save()

Tresholdovanie vystrihnutých galaxií

In [None]:
image_path = r'./cropped_galaxies'
path = r'./exp36_masks'

images = os.listdir(image_path)

# image is loaded with imread command
for image_file in images:
    image_name = image_file.split(".")[0]
    image = cv2.imread(fr"{image_path}/{image_name}.jpg")

    # cv2.cvtColor is applied over the
    # image input with applied parameters
    # to convert the image in grayscale
    image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    thresh, im_bw = cv2.threshold(image_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    print("Thresh: ", thresh)

    kernel = np.ones((2, 2), np.uint8)
    mask = cv2.erode(im_bw, kernel)
    #mask = cv2.morphologyEx(im_bw, cv2.MORPH_OPEN, kernel)

    #saving image
    cv2.imwrite(os.path.join(path, f"{image_name}.jpg"), mask)

## Vrátenie vystrihnutých masiek do fullsize fotky


In [None]:
whole_galaxies_path = r'C:\Users\Administrator\Desktop\skola\3.rok\BP\codes\crop\test_set\test_whole_galaxies'
labels_path = r'C:\Users\Administrator\Desktop\skola\3.rok\BP\codes\crop\test_set\test_coords'
masks_path = r'C:\Users\Administrator\Desktop\skola\3.rok\BP\codes\crop\test_set\test_cropped_masks'
dest_path = r'C:\Users\Administrator\Desktop\skola\3.rok\BP\codes\crop\test_set\image_masks'


def paste_image(coords: str, img: np.ndarray, image_name: str, mask: np.ndarray):
    dh, dw = img.shape

    box = coords
    class_id, x_center, y_center, w, h, _ = box.strip().split()
    x_center, y_center, w, h = float(x_center), float(y_center), float(w), float(h)
    x_center = round(x_center * dw)
    y_center = round(y_center * dh)
    w = round(w * dw)
    h = round(h * dh)

    x = round(x_center - w / 2)
    y = round(y_center - h / 2)

    img[y:y + mask.shape[0], x:x + mask.shape[1]] = mask

    # Saving the image
    cv2.imwrite(os.path.join(dest_path, image_name), img)


def paste_image_and_save():
    masks = os.listdir(masks_path)
    print("Processing images...")
    missing_images = []
    missing_coords = []

    for mask in masks:
        print(mask)
        mask_image = cv2.imread(fr"{masks_path}\{mask}")
        # deleting 3rd dimension of mask
        mask_image = mask_image[:, :, 0]

        mask_name, _, rank = mask.split(".")[0].split(' ')
        rank = int(rank)
        print(f"{mask}: {mask_name}, {rank}")
        img = cv2.imread(fr"{whole_galaxies_path}\{mask_name}.jpg", 0)

        # painting image to black
        try:
            img *= 0
        except TypeError:
            missing_images.append(mask_name)

        try:
            file = open(fr"{labels_path}\{mask_name}.txt", 'r')
            coords = file.readlines()[rank]

            paste_image(coords, img, mask, mask_image)
            file.close()
        except FileNotFoundError:
            missing_coords.append(mask_name)

    print("Printing missing images:")
    for image in missing_images:
        print(image)

    print("Printing missing coordinates:")
    for coords in missing_coords:
        print(coords)

paste_image_and_save()

# Extrakcia dát zo súborov FIT

In [None]:
import aplpy
import cv2
import numpy as np
from PIL import Image
from astropy.io import fits
from matplotlib import pyplot as plt

# Define the paths to the FITS file and the mask in JPG format
fits_file_path = 'fpC-005087-r2-0160.fit.gz'
mask_file_path = 'fpC-005087-r2-0160 - 1.jpg'

# Open the FITS file
with fits.open(fits_file_path) as hdul:
    # Access the data array of the primary HDU (assuming it's the first extension)
    data = hdul[0].data

    # Open the mask image
    mask_image = Image.open(mask_file_path)
    # mask_image = cv2.imread(mask_file_path)
    desired_width = data.shape[1]
    desired_height = data.shape[0]

    # Resize the mask image while maintaining the original shape
    resized_mask = mask_image.resize((desired_width, desired_height), resample=Image.NEAREST)
    # resized_mask.show()
    resized_mask.save("fpC-005087-r2-0160_resized_mask.jpg")

    resized_mask = cv2.imread("fpC-005087-r2-0160_resized_mask.jpg")
    resized_mask = resized_mask[:, :, 0]

    
    resized_mask = resized_mask[::-1]
    bin_mask = (resized_mask < 50)
    new_image = np.copy(data)
    new_image[bin_mask] = resized_mask[bin_mask]

    # Create a new FITS file to save the extracted data
    # hdu = fits.PrimaryHDU(new_image)
    # hdul_new = fits.HDUList([hdu])
    hdul_new = hdul
    hdul_new[0].data = new_image

    # Save the extracted data to a new FITS file
    output_file_path = 'fpC-005087-r2-0160_cropped.fit'
    hdul_new.writeto(output_file_path, overwrite=True)

    ### show before with aplpy
    gc = aplpy.FITSFigure(fits_file_path)
    gc.show_grayscale(invert=False, stretch='power', exponent=0.5)
    plt.show()

    ### show result with aplpy
    gc = aplpy.FITSFigure(output_file_path)
    gc.show_grayscale(invert=False, stretch='power', exponent=0.5)
    plt.show()
