# Downloading and converting KiDS KV450 data to Sacc

In [1]:
import sacc
import numpy as np
import urllib.request
import os
import subprocess


## First we download the data from the kids website

The old CFHTLenS data sets seem not to be up any more, but hopefully this will stay up longer.

In [2]:
dirname = 'KiDS-450_COSMIC_SHEAR_DATA_RELEASE'
filename = dirname + '.tar.gz'
url = f'http://kids.strw.leidenuniv.nl/cs2016/{filename}'

# Check if already downloaded
if os.path.exists(dirname):
    print("Data directory already downloaded")
else:
    print("Downloading file")
    urllib.request.urlretrieve(url, filename)
    # Unpack and remove tgz file
    print("Unpacking file")
    os.system('tar -xvf ' + filename)
    os.system('rm ' + filename)
    print("Done")

Data directory already downloaded


## Now we make a new empty Sacc and gradually fill it

In [3]:
S = sacc.Sacc()

## First with the n(z) data

These are fairly straightforward.  We use the extra_columns feature to save the n(z) error.
Note that this is the statistical error only, not the systematics.

There are 4 bins

In [4]:
nz_files = [
"Nz_DIR_z0.1t0.3.asc",
"Nz_DIR_z0.3t0.5.asc",
"Nz_DIR_z0.5t0.7.asc",
"Nz_DIR_z0.7t0.9.asc",
]

for i, nz_file in enumerate(nz_files):
    # KiDS tomographic bins are numbered from 1
    i += 1

    # Load n(z)
    z, nz, err_z = np.loadtxt(f'KiDS-450_COSMIC_SHEAR_DATA_RELEASE/Nz_DIR/Nz_DIR_Mean/{nz_file}').T
    
    # Sacc wants bin centers, whereas this file as bin edges, so convert
    z += 0.5*(z[1]-z[0])
    
    S.add_tracer('NZ', f'bin_{i}', z, nz, extra_columns={'err_z': err_z})


# Now the data vector itself

The xi_plus and xi_minus data are indicated by a 1 or a 2.

Normally we would use astropy to read this, but the header line in the data file has a missing space that messes this up, as the number of columns doesn't match it.

So we will parse it manually.

In [5]:
data_file = open('KiDS-450_COSMIC_SHEAR_DATA_RELEASE/DATA_VECTOR/KiDS-450_xi_pm_tomographic_data_vector.dat')

for line in data_file:
    line = line.strip()

    # skip blank and comment lines
    if line.startswith('#') or not line:
        continue

    # Parse the line
    _, theta, xi, type_indicator, i_bin, j_bin = line.split()

    theta = float(theta)
    xi = float(xi)
    
    # Use the standard types from sacc to indicate types
    if type_indicator == '1':
        data_type = sacc.standard_types.galaxy_shear_xi_plus
    elif type_indicator == '2':
        data_type = sacc.standard_types.galaxy_shear_xi_minus
    else:
        raise ValueError("Unknown type")

    tracer_i = f'bin_{i_bin}'
    tracer_j = f'bin_{j_bin}'
    
    # This is a helper function for 2pt data specifically
    S.add_theta_xi(data_type, tracer_i, tracer_j, theta, xi)

## And finally we add in the covariance

In [6]:
covmat_file = './KiDS-450_COSMIC_SHEAR_DATA_RELEASE/COV_MAT/xipmcutcov_KiDS-450_analytic_inc_m.dat'

# Number of data points in the file.  And make the empty cov mat
n = len(S)
C = np.zeros((n,n))

for line in open(covmat_file):
    #Again, skip blank lines
    line = line.strip()
    if line.startswith('#') or not line:
        continue

    # Parse the line
    i, j, c_ij = line.split()
    
    # Oh, those wacky KiDS with their one-based arrays
    i = int(i) - 1
    j = int(j) - 1
    
    # This file does have both the (equal) i,j and j,i values included.
    C[i,j] = c_ij

# Check symmetric
assert np.allclose(C.T, C, 1e-18)
    
# And add.  The order matches the data vector order, according to the docs, so this should be fine.
S.add_covariance(C)

## And save the data

We say which n(z) version we are using in the file name, for clarity.

In [7]:
S.save_fits('kids_450_dir.sacc', overwrite=True)

## Concatenate with DES data

If the DES data has also been downloaded and converted (see the other notebook in this directory)
then we can try concatenating our files together

In [8]:
if os.path.exists("./des-converted.sacc"):
    D = sacc.Sacc.load_fits('./des-converted.sacc')
    combo = sacc.concatenate_data_sets(S, D, labels=['kids', 'des'])
    
    combo.save_fits("./des_kids_combined.sacc", overwrite=True)


In [9]:
xx=sacc.Sacc.load_fits("./des_kids_combined.sacc")

In [10]:
# Bins now include both DES and KiDS sources.
print(xx.tracers.keys())

dict_keys(['bin_1_kids', 'bin_2_kids', 'bin_3_kids', 'bin_4_kids', 'source_0_des', 'source_1_des', 'source_2_des', 'source_3_des', 'lens_0_des', 'lens_1_des', 'lens_2_des', 'lens_3_des', 'lens_4_des'])
