In [None]:
import pandas as pd
import numpy as np
from astropy.cosmology import Planck13
from astropy.io import fits
from astropy.table import Table
from tqdm import tqdm
%matplotlib inline
import matplotlib.pyplot as plt

import disperse

In [None]:
cosmo = Planck13
H0 = cosmo.H0.value
Om = cosmo.Om0
Ol = 0.69288
Ok = 0.0
print(f'H0 = {cosmo.H0.value}')
print(f'Om = {cosmo.Om0}')
print(f'Ol = {0.69288}')

In [None]:
import pickle

In [None]:
with open('srclist_lockman_032_wvsh.pkl', 'rb') as f:
    data = pickle.load(f)

In [None]:
data

In [None]:
data['RA'].min(), data['RA'].max()

In [None]:
data['DEC'].min(), data['DEC'].max()

In [None]:
data[data['EXT_LIKE'] > 0]

In [None]:
galaxies = pd.read_csv('SDSS/SDSS_DR16.csv')

In [None]:
galaxies

In [None]:
galaxies = pd.read_csv('SDSS/SDSS_DR16.csv')
galaxies = galaxies[galaxies['class'] == 'GALAXY']
galaxies = galaxies[(150 <= galaxies['ra']) & (galaxies['ra'] <= 172)]
galaxies = galaxies[(50 <= galaxies['dec']) & (galaxies['dec'] <= 65.5)]
galaxies = galaxies[(0.00 < galaxies['z']) & (galaxies['z'] <= 1.7)]
galaxies = galaxies[['ra', 'dec', 'z']]
galaxies.columns = ['RA', 'DEC', 'Z']
galaxies.drop_duplicates(subset=['RA', 'DEC', 'Z'])
galaxies.reset_index(drop=True, inplace=True)
galaxies

In [None]:
clusters = data.copy()
clusters = clusters[clusters['EXT_LIKE'] > 0]
clusters = clusters[['ID_SRC', 'RA', 'DEC']]
clusters.reset_index(drop=True, inplace=True)
clusters

In [None]:
z = [round(0.05 + 0.002 * i, 5) for i in range(0, 751)]
# z = [round(0.05 + 0.05 * i, 5) for i in range(0, 31)]
# print(z)
# print('>>> ', z[-1])
ID, RA, DEC, Z = [], [], [], []
for i in range(clusters.shape[0]):
    row = clusters.iloc[i]
    ID = ID + [int(row['ID_SRC'])] * len(z)
    RA = RA + [row['RA']] * len(z)
    DEC = DEC + [row['DEC']] * len(z)
    Z = Z + z.copy()
len(ID), len(RA), len(DEC), len(Z)

In [None]:
clusters = pd.DataFrame({'ID': ID, 'RA': RA, 'DEC': DEC, 'Z': Z})
clusters

In [None]:
DPS = disperse.Disperse3D(
    galaxies, '_disperse_03/bin/',
    H0, Om, Ol, Ok,
    clusters=clusters
)

In [None]:
DPS.count_cart_coords()

In [None]:
SIGMA = 5.0
SMOOTH = 1
BOARD = 'smooth'
ANGLE = 30

In [None]:
sigmas = [
         0.2, 0.4, 0.6, 0.8,
    1.0, 1.2, 1.4, 1.6, 1.8,
    2.0, 2.2, 2.4, 2.6, 2.8,
    3.0, 3.2, 3.4, 3.6, 3.8, 
    4.0, 4.2, 4.4, 4.6, 4.8, 
    5.0, 5.2, 5.4, 5.6, 5.8, 
    6.0, 6.2, 6.4, 6.6, 6.8,
    7.0, 7.2, 7.4, 7.6, 7.8,
    8.0
]

In [None]:
for sigma in sigmas:
    SIGMA = sigma
    print('### ', SIGMA)
    DPS.apply_disperse(SIGMA, SMOOTH, BOARD, ANGLE)
    DPS.save(f'lockman_dumps/{SIGMA}/')

In [None]:
cl_dists = []
for sigma in tqdm(sigmas):
    DPS = disperse.Disperse3D.read(f'lockman_dumps/{sigma}/')
    
    cl_conn, fil_conn, cl_min_dists \
        = DPS.count_conn([3] * DPS.clusters.shape[0])
    cl_dists.append(np.array(cl_min_dists)[None,:])
    
dists = np.concatenate(cl_dists, axis=0)

In [None]:
dists = dists.T

In [None]:
dists.shape

In [None]:
rads = list(range(1, 11))

In [None]:
true_sigmas = []

for i, rad in enumerate(rads):
    t = np.zeros(clusters.shape[0])
    for j, sigma in tqdm(enumerate(sigmas)):
#         DPS = disperse.Disperse3D.read(f'lockman_dumps/{sigma}/')

#         cl_conn, fil_conn, cl_dists \
#             = DPS.count_conn([rad] * DPS.clusters.shape[0])
#         cl_conn = np.array(cl_conn)
#         t[cl_conn > 0] = sigma
        t[dists[:,j] <= rad] = sigma

    true_sigmas.append(t)
    
true_sigmas = np.array(true_sigmas)

In [None]:
true_sigmas = true_sigmas.T

In [None]:
true_sigmas.shape

In [None]:
true_sigmas_4 = true_sigmas[:,3]

In [None]:
with open('coma_log_reg.pkl', 'rb') as f:
    log_reg = pickle.load(f)

In [None]:
log_reg_scores = log_reg.predict_proba(dists)[:,1].reshape(-1)

In [None]:
log_reg_scores

In [None]:
np.corrcoef(true_sigmas[:,4], log_reg_scores)

In [None]:
clusters

In [None]:
t_01 = clusters.assign(sigma_4_score=true_sigmas[:,3])
t_01 = t_01.assign(log_reg_score=log_reg_scores)
t_01

In [None]:
t_02 = data[['ID_SRC', 'EXT_LIKE']]
t_02.columns = ['ID', 'EXT_LIKE']
t_02

In [None]:
res = pd.merge(t_01, t_02, left_on='ID', right_on='ID')
res = res.sort_values(by=['ID', 'Z'])
res.reset_index(drop=True, inplace=True)

In [None]:
res

In [None]:
res['ID'].unique()

In [None]:
font = {'size': 16}
plt.rc('font', **font)
fig = plt.figure(figsize=(24, 12))
plt.grid()

ID = 8409
l = 300

plt.plot(
    res[res['ID'] == ID]['Z'][:l],
    res[res['ID'] == ID]['log_reg_score'][:l] * 8,
    label='log_reg'
)
plt.plot(
    res[res['ID'] == ID]['Z'][:l],
    res[res['ID'] == ID]['sigma_4_score'][:l],
    label='sigma',
    alpha=0.5,
    c='r'
)
plt.xlabel('Z')
plt.ylabel('score')
plt.legend()

In [None]:
res[res['ID'] == ID]['EXT_LIKE']

In [None]:
res[res['ID'] == 29]['log_reg_score'].plot()

In [None]:
res[res['ID'] == 29]['sigma_4_score'].plot()

In [None]:
DPS.clusters

In [None]:
# x1 = -115.432782
# y1 = 31.776151
# z1 = 182.911628
# x2 = -228.104405
# y2 = 62.792215
# z2 = 361.447997
# ((x1 - x2)**2 + (y1 - y2)**2 + (z1 - z2)**2)**0.5