### TESTING

 Functions that have been tested:

- stellar_locus(x, offset)
- classification(data, counterpart_id: str, j: str, k: str, g: str, i: str, pstar: str)
- blanks(data, r_limit, groupid: str, distance: str)
- TODO N(data, background, Ndata, Nback, r, band: str, bins=40, mag_min=9, mag_max=22)
- TODO q_div_n(n_real, n_normalized, q0, bincentres, window_size, mag_min=9, mag_max=22, n=100)
- TODO likelihood(data, counterpart_id: str, f250: str, e250: str, SG: str, distance: str, band: str, k, fwhm, qn_gal: tuple, qn_stars: tuple)
- reliability(data, counterpart_id: str, groupid: str, likelihood: str, q0)
- N_false(data, reliability: str, r_thresh)
- completeness(data, reliability: str, f250: str, e250: str, q0, r_thresh, snr_thresh=4)
- cleanness(data, reliability: str, r_thresh)
- TODO multiplicity_reliability(sources, groupid: str, groupsize: str, distance: str, reliability: str, r_thresh=0.8, max_counterparts=10)
- TODO euclidean_counts(data, flux: str, s_range: tuple, N, area)
- TODO dn_dz_domega(data, min_z, max_z, n, area)
- TODO lensing_probabilities(data, redshift_source: str, redshift_lens: str, redshift_error_source: str, redshift_error_lens: str)
- TODO optimal_lens_probability(data, reliability: str, lensing_probability: str, z_source: str, false_id_percent, reliability_thresh=0.8, minimum_z_source=2.5, n=100)
- TODO cumulative_counts(data, flux: str, s_range: tuple, N, area)
- TODO lensing_fraction(data, lensed_candidates, f500: str, s_range: tuple, N, area)
- TODO genuine_multiples(data, distance: str, groupid: str, redshift: str, redshift_errors: str, maximum_radius=8)

In [1]:
# =========================================================================================================
# Dependencies
# =========================================================================================================

import pytest
import ipytest
import utils
import os
import pandas as pd
from pandas import read_csv

In [2]:
# =========================================================================================================
# Import test data
# =========================================================================================================

@pytest.fixture(scope='module')
def test_data():
    """ RETURNS OUR TEST DATA """
    ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
    file_path = ROOT + '/testing/test_data.csv'
    data = read_csv(file_path)
    return data

@pytest.fixture(scope='module')
def test_back():
    """ RETURNS OUR TEST DATA """
    ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
    file_path = ROOT + '/testing/test_data.csv'
    back = read_csv(file_path)
    return back

In [3]:
# See our test data
file_path = 'test_data.csv'
data = read_csv(file_path)
print('Mock Herschel Data')
print(data)

file_path = 'test_back.csv'
back = read_csv(file_path)
print('Mock Background data')
print(back)

Mock Herschel Data
    counterpart_id  J     K    g     i  pstar  SG_flag  group_id  distance  \
0                1  0   0.0  0.0   0.0   0.98        0       1.0         4   
1                2  4   2.0  4.0   2.0   0.50        1       1.0         8   
2                3  4   3.5  4.0   2.0   0.50        2       1.0        12   
3                4  4   3.0  0.0  10.0   0.50        3       2.0         5   
4                5  4   3.9  0.0  10.0   0.50        4       2.0         3   
5                6  0  10.0  0.0  10.0   0.75        5       3.0         2   
6                7  0  10.0  0.0  10.0   0.50        6       3.0        10   
7                8  0   0.0  NaN   NaN   0.98        0       3.0         6   
8                9  4   2.0  4.0   2.0   0.50        1       4.0        13   
9               10  4   3.5  4.0   2.0   0.50        2       4.0        14   
10              11  4   3.0  NaN   NaN   0.50        3       NaN        10   
11              12  4   3.9  NaN   NaN   0.50

### Function Testing

In [4]:
# =========================================================================================================
# stellar_locus()
# =========================================================================================================

@pytest.mark.parametrize("x, offset, expected",
                         [[0, 0, 0.2228],
                          [2, 0, 0.76],
                          [1000, 0, 0.7768]])


def test_stellar_locus(x, offset, expected):
    result = utils.stellar_locus(x, offset)
    assert expected == result


### Star-Galaxy Classifier

Recall that our method is:

    0 - Star based on stellar probability > 0.95
    1 - Galaxy based on J-K/g-i colour-colour plot
    2 - Star based on J-K/g-i colour-colour plot
    3 - Galaxy based on J-K > something
    4 - Star based on J-K < something
    5 - Star based on stellar probability > 0.7
    6 - Galaxy based on all other

In [5]:
# =========================================================================================================
# classification()
# =========================================================================================================

print(data[['J', 'K', 'g', 'i', 'pstar', 'SG_flag']])

def test_classification(test_data):
    df = test_data
    expected = df['SG_flag']
    result = utils.classification(test_data, 'counterpart_id', 'J', 'K', 'g', 'i', 'pstar')
    assert all(expected == result)



    J     K    g     i  pstar  SG_flag
0   0   0.0  0.0   0.0   0.98        0
1   4   2.0  4.0   2.0   0.50        1
2   4   3.5  4.0   2.0   0.50        2
3   4   3.0  0.0  10.0   0.50        3
4   4   3.9  0.0  10.0   0.50        4
5   0  10.0  0.0  10.0   0.75        5
6   0  10.0  0.0  10.0   0.50        6
7   0   0.0  NaN   NaN   0.98        0
8   4   2.0  4.0   2.0   0.50        1
9   4   3.5  4.0   2.0   0.50        2
10  4   3.0  NaN   NaN   0.50        3
11  4   3.9  NaN   NaN   0.50        4
12  0  10.0  NaN   NaN   0.75        5
13  0  10.0  NaN   NaN   0.50        6


### Blanks

We test the blanks function at r = 0, 9 and 15 for the minimum, maximum and a mid-value. For r = 0 we should expect all
the groups to appear blank which equals 8. At the maximum radius, r = 15, only the no counterpart IDs will be blanks,
which equals 2, and for the mid-value only no counterpart sources and those where all counterparts have a larger
distance will be viewed as blanks, which is 4.

In [6]:
# =========================================================================================================
# blanks()
# =========================================================================================================

print(data[['group_id', 'distance']])

@pytest.mark.parametrize("r, expected",
                         [[0, 8],
                          [9, 4],
                          [15, 2]])

def test_blanks(test_data, r, expected):
    df = test_data
    result = utils.blanks(df, r, 'group_id', 'distance')
    assert expected == result

    group_id  distance
0        1.0         4
1        1.0         8
2        1.0        12
3        2.0         5
4        2.0         3
5        3.0         2
6        3.0        10
7        3.0         6
8        4.0        13
9        4.0        14
10       NaN        10
11       NaN         1
12       NaN         0
13       NaN         0


### N

In [7]:
# =========================================================================================================
# N()
# =========================================================================================================


### q/n

In [8]:
# =========================================================================================================
# q_div_n()
# =========================================================================================================

### Likelihood

In [9]:
# =========================================================================================================
# likelihood()
# =========================================================================================================

### Reliability

**** Explain what is going on ****

In [10]:
# =========================================================================================================
# reliability()
# =========================================================================================================

print(data[["counterpart_id", "group_id", "likelihood"]])

@pytest.mark.parametrize("q0, expected",
                         [[0.5, pd.Series([1/16.5, 5/16.5, 10/16.5, 2/6.5, 4/6.5, 2/12.5, 4/12.5, 6/12.5, 4/12.5, 8/12.5, 1/2, 5/6, 0, 0])],
                          [1, pd.Series([1/16, 5/16, 10/16, 2/6, 4/6, 2/12, 4/12, 6/12, 4/12, 8/12, 1/2, 5/6, 0, 0])]])
def test_reliability(test_data, q0, expected):
    df = test_data
    result = utils.reliability(df, 'counterpart_id', 'group_id', 'likelihood', q0)
    assert all(expected == result)


    counterpart_id  group_id  likelihood
0                1       1.0           1
1                2       1.0           5
2                3       1.0          10
3                4       2.0           2
4                5       2.0           4
5                6       3.0           2
6                7       3.0           4
7                8       3.0           6
8                9       4.0           4
9               10       4.0           8
10              11       NaN           1
11              12       NaN           5
12              13       NaN           0
13              14       NaN           0


### N False

**** Explain what is going on ****

In [11]:
# =========================================================================================================
# N_false()
# =========================================================================================================

print(data[["reliability"]])

@pytest.mark.parametrize("r_thresh, expected",
                         [[0, 9.3],
                          [0.5, 1.8],
                          [1, 0]])
def test_N_false(test_data, r_thresh, expected):
    df = test_data
    result, _ = utils.N_false(df, 'reliability', r_thresh)
    assert expected == result


    reliability
0          0.05
1          0.15
2          0.80
3          0.40
4          0.60
5          0.10
6          0.20
7          0.70
8          0.40
9          0.60
10         0.20
11         0.50
12         0.00
13         0.00


### Completeness

**** Explain what is going on ****

In [12]:
# =========================================================================================================
# completeness()
# =========================================================================================================

print(data[['reliability', 'f250', 'e250']])

@pytest.mark.parametrize("r_thresh, expected",
                         [[0, 2],
                          [0.5, 5/7],
                          [1, 0]])
def test_completeness(test_data, r_thresh, expected):
    df = test_data
    result = utils.completeness(df, 'reliability', 'f250', 'e250', q0=1, r_thresh=r_thresh)
    assert expected == result

    reliability  f250  e250
0          0.05    10     2
1          0.15    10     5
2          0.80    10     2
3          0.40    10     5
4          0.60    10     2
5          0.10    10     5
6          0.20    10     2
7          0.70    10     5
8          0.40    10     2
9          0.60    10     5
10         0.20    10     2
11         0.50    10     5
12         0.00    10     2
13         0.00    10     5


### Cleanness

**** Explain what is going on ****

In [13]:
# =========================================================================================================
# cleanness()
# =========================================================================================================

print(data[['reliability']])

@pytest.mark.parametrize("r_thresh, expected",
                         [[0, 9.3/2],
                          [0.5, 1.8/5/7]])
def test_cleanness(test_data, r_thresh, expected):
    df = test_data
    result = utils.cleanness(df, 'reliability', r_thresh=r_thresh)
    assert expected == result


    reliability
0          0.05
1          0.15
2          0.80
3          0.40
4          0.60
5          0.10
6          0.20
7          0.70
8          0.40
9          0.60
10         0.20
11         0.50
12         0.00
13         0.00


### Multiplicity Reliablity

In [14]:
# =========================================================================================================
# multiplicity_reliability()
# =========================================================================================================


In [15]:
# =========================================================================================================
# Test
# =========================================================================================================

ipytest.run('-vv')

platform win32 -- Python 3.9.5, pytest-6.2.5, py-1.10.0, pluggy-1.0.0 -- c:\users\bradl\desktop\venv\scripts\python.exe
cachedir: .pytest_cache
rootdir: C:\Users\bradl\Desktop\HerschelATLASDR3_2\testing
collecting ... collected 17 items

tmpccoc3i8n.py::test_cleanness[0-4.65] <- ..\..\..\AppData\Local\Temp\ipykernel_12680\1138122948.py FAILED [  5%]
tmpccoc3i8n.py::test_cleanness[0.5-0.05142857142857143] <- ..\..\..\AppData\Local\Temp\ipykernel_12680\1138122948.py FAILED [ 11%]
tmpccoc3i8n.py::test_blanks[0-8] <- ..\..\..\AppData\Local\Temp\ipykernel_12680\1413748118.py PASSED [ 17%]
tmpccoc3i8n.py::test_blanks[9-4] <- ..\..\..\AppData\Local\Temp\ipykernel_12680\1413748118.py PASSED [ 23%]
tmpccoc3i8n.py::test_blanks[15-2] <- ..\..\..\AppData\Local\Temp\ipykernel_12680\1413748118.py PASSED [ 29%]
tmpccoc3i8n.py::test_classification <- ..\..\..\AppData\Local\Temp\ipykernel_12680\2814409873.py PASSED [ 35%]
tmpccoc3i8n.py::test_N_false[0-9.3] <- ..\..\..\AppData\Local\Temp\ipykernel_1268