In [7]:
# Import the relevant modules:

# Import the modules for vector calculations
import pandas as pd
import polars as pL
import numpy as np
import scipy as sp
import math

# Import the modules for data preprocessing
import os
from astroquery.sdss import SDSS
from astroquery.vizier import Vizier
from astropy import coordinates as coords
import asyncio 
import aiohttp
from astropy.io import fits
from GalaxyHelperFunc import *
from tqdm import tqdm
import time

# Import the modules for data visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Import the modules for Deep-Learning
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as data_utils
import torch.distributions as distributions
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint

In [8]:
# Download the galaxy data from the SDSS database

astro = """
SELECT TOP 10000
    p.objid, p.ra, p.dec, p.u, p.g, p.r, p.i, p.z,
    p.run, p.rerun, p.camcol, p.field, p.field, p.specobjid, 
    s.z AS redshift, s.plate, s.mjd, s.fiberid
FROM PhotoObj AS p
JOIN SpecObj AS s 
    ON s.bestobjid = p.objid
WHERE
    p.type = 3
    AND s.class = 'GALAXY'
    AND p.clean = 1
"""

Galaxy_Dataset = SDSS.query_sql(astro, timeout = 600)

# Convert the data into a pandas dataframe
Galaxy_Dataframe = Galaxy_Dataset.to_pandas()

# Convert data into a polars dataframe
Galaxy_Polars = pL.DataFrame(Galaxy_Dataframe)

# Display the first 5 rows of the dataframe
Galaxy_Polars.head()

objid,ra,dec,u,g,r,i,z,run,rerun,camcol,field,field1,specobjid,redshift,plate,mjd,fiberid
u64,f64,f64,f64,f64,f64,f64,f64,i32,i32,i32,i32,i32,u64,f64,i32,i32,i32
1237648703509037704,196.428899,-0.630877,22.59034,22.28877,21.18809,20.0661,19.74049,752,301,2,353,353,4561064317748008960,0.6838333,4051,55337,159
1237648703509037971,196.389789,-0.694249,23.15419,22.82531,20.76846,19.84812,19.58682,752,301,2,353,353,4562238875456788480,0.4224373,4052,55600,336
1237648703509037997,196.417728,-0.667848,22.90089,22.56379,20.98486,19.68926,18.92513,752,301,2,353,353,4561063218236381184,0.6842009,4051,55337,155
1237648703509038045,196.479788,-0.718012,23.89636,22.43867,20.76097,19.67946,19.32264,752,301,2,353,353,4562228704789682176,0.5035014,4052,55589,299
1237648703509038048,196.482531,-0.745879,24.28602,23.22475,20.93204,19.74528,19.13798,752,301,2,353,353,4561052223120103424,0.6223917,4051,55337,115


In [3]:
Galaxy_Dataframe["run"]

0        250
1        250
2        259
3        259
4        259
        ... 
29995    756
29996    756
29997    756
29998    756
29999    756
Name: run, Length: 30000, dtype: int32

In [14]:
# Once the coordinates, ra and dec, of the galaxies have been extracted, we can use these to extract the fits images of the galaxies.
def fits_Image_to_Tensor(fits_Image):
    """
    This function converts a fits image into a tensor
    """
    # Convert the fits image into a numpy array
    fits_Image = fits_Image.astype(np.float32)
    
    # Convert the numpy array into a tensor
    tensor = torch.from_numpy(fits_Image)
    
    return tensor

def Dataset_Generator(Galaxy_Dataframe):
    """
    This function takes in the position of the galaxy, uses this to extract the fits 
    image of the galaxy and then returns it as a tensor.
    """
    # Create an empty list to store the galaxy images
    Galaxy_Images = []
    
    # Loop through the rows of the dataframe
    for index, row in tqdm(total = len(Galaxy_Dataframe), iterable = Galaxy_Dataframe.iterrows(), desc = "Processing Galaxies"):
        if index % 10 == 0:
            print(f"Processing galaxy {index} of {len(Galaxy_Dataframe)}")
        # Extract the position of the galaxy
        position = coords.SkyCoord(row['ra'], row['dec'], unit = 'deg')

        # Extract the fits image of the galaxy
        for attempt in range(5):
            try:
                fits_Image = SDSS.get_images(coordinates=position, band = 'r')[0][0].data
            except (ConnectionError, TimeoutError) as error:
                print(f" {error} at {position.ra.deg}, {position.dec.deg} at attempt {attempt}, trying again...")
                time.sleep(2)
            break
        if attempt == 4:
            print(f"Failed to download the image at {position.ra.deg}, {position.dec.deg}")
            continue
        # Convert the fits image into a tensor
        Galaxy = fits_Image_to_Tensor(fits_Image)
        print(Galaxy[0])
        break
        
        # Append the tensor to the list
        Galaxy_Images.append(Galaxy)

    return Galaxy_Images

# Generate the dataset
Galaxy_Images = Dataset_Generator(Galaxy_Dataframe)
Galaxy_Images = torch.stack(Galaxy_Images)
Galaxy_Images.shape

Processing Galaxies:   0%|          | 0/10000 [00:00<?, ?it/s]

Processing galaxy 0 of 10000


Processing Galaxies:   0%|          | 0/10000 [00:00<?, ?it/s]

tensor([ 0.0154, -0.0231,  0.0058,  ...,  0.0532,  0.0389,  0.0052])





RuntimeError: stack expects a non-empty TensorList

In [11]:
Galaxy_Images

NameError: name 'Galaxy_Images' is not defined

In [3]:
# Once the coordinates, ra and dec, of the galaxies have been extracted, we can use these to extract the fits images of the galaxies.
# Generate the dataset
positions = Galaxy_Dataframe[["ra", "dec"]].values.tolist()
with tqdm(total = len(positions), desc = "Total Progress") as pbar:
    fits_data = await fetch_images(positions, pbar)
    print("Fits Downloaded")
counter = 0
Galaxy_Dataset = []
for data in fits_data:
    Galaxy_Dataset.append(fits_to_tensor(data))
    counter+=1
    if counter % 100 != 0:
        print(counter)

Total Progress:   0%|          | 0/10000 [00:00<?, ?it/s]

Error: object list can't be used in 'await' expression




Error: object list can't be used in 'await' expression




Error: object list can't be used in 'await' expression




Error: object list can't be used in 'await' expression




Error: object list can't be used in 'await' expression




Error: object list can't be used in 'await' expression




Error: object list can't be used in 'await' expression




Error: object list can't be used in 'await' expression




Error: object list can't be used in 'await' expression




Error: object list can't be used in 'await' expression




Error: object list can't be used in 'await' expression




Error: object list can't be used in 'await' expression




Error: object list can't be used in 'await' expression




Error: object list can't be used in 'await' expression




Error: object list can't be used in 'await' expression




Error: object list can't be used in 'await' expression




Error: object list can't be used in 'await' expression




In [5]:
dir(SDSS)

['AVAILABLE_TEMPLATES',
 'IMAGING_URL_SUFFIX',
 'MAX_CROSSID_RADIUS',
 'QUERY_URL_SUFFIX_DR_10',
 'QUERY_URL_SUFFIX_DR_NEW',
 'QUERY_URL_SUFFIX_DR_OLD',
 'SPECTRA_URL_SUFFIX',
 'TEMPLATES_URL',
 'TIMEOUT',
 'XID_URL_SUFFIX_DR_10',
 'XID_URL_SUFFIX_NEW',
 'XID_URL_SUFFIX_OLD',
 '_SDSSClass__sanitize_query',
 '__abstractmethods__',
 '__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_args_to_payload',
 '_cache_location',
 '_download_file',
 '_get_crossid_url',
 '_get_query_url',
 '_last_query',
 '_last_url',
 '_parse_result',
 '_rectangle_sql',
 '_request',
 '_response_hook',
 '_session',
 'cache_location',
 'clear_cache',
 'get_i