In [272]:
import numpy as np

# add more as we develop them
allowed_types = [
    "shear_xi_plus",
    "shear_xi_minus",
    "shear_ee",
    "shear_bb",
    "galaxy_density_cl",
    "galaxy_density_w",
    "ggl_gamma_t",
    "ggl_gamma_x",
    "ggl_cl",
]



class DataPoint:
    def __init__(self, data_type, tracers, value, **tags):
        self.data_type = data_type
        self.tracers = tracers
        self.value = value
        self.tags = tags
    
    def __repr__(self):
        return f"<Data {self.data_type} {self.tracers} {self.value} {self.tags}"
    
    def get_tag(self, tag):
        return self.tags.get(tag)
    

class Tracer:
    subclasses = {}
    def __init__(self, name):
        self.name = name

    def __init_subclass__(cls, tracer_type):
        cls.subclasses[tracer_type] = cls

    @classmethod
    def make(cls, tracer_type, name, *args, **kwargs):
        subclass = cls.subclasses[tracer_type]
        return subclass(name, *args, **kwargs)
        
class MiscTracer(Tracer, tracer_type='misc'):
    def __init__(self, name):
        super().__init__(name)

        
class NZTracer(Tracer, tracer_type='NZ'):
    def __init__(self, name, z, nz):
        super().__init__(name)
        self.name = name
        self.z = z
        self.nz = nz

# Define other tracer types as they come along

                
                

        
class Sacc:
    """
    A class containing a selection of LSST summary statistic measurements,
    their covariance, and the metadata necessary to compute theoretical
    predictions for them.
    """
    def __init__(self):
        self.data = []
        self.tracers = {}
        self.covariance = None
        self._mean = None

    def __len__(self):
        return len(self.data)

    def add_tracer(self, tracer_type, name, *args, **kwargs):
        """
        Add a new tracer
        """
        T = Tracer.make(tracer_type, name, *args, *kwargs)
        self.tracers[name] = T

    def add_data_point(self, data_type, tracers, value, **tags):
        """
        Add a new data point
        """
        if self.covariance is not None:
            raise ValueError("You cannot add a data point after setting the covariance")
        tracers = tuple(tracers)
        d = DataPoint(data_type, tracers, value, **tags)
        self.data.append(d)


    def cut(self, mask):
        """
        Remove data points and corresponding covariance elements following mask.

        Mask must be either a boolean array or a list of indices to remove.
        
        True = cut data point
        False = keep data point
        
        indices = data points to cut
        """
        mask = np.array(mask)
            
        if mask.dtype == np.bool:
            if not len(mask)==len(self):
                raise ValueError("Mask passed in is wrong size")
            self.data = [d for i,d in enumerate(self.data) if not mask[i]]
        else:
            # slow
            self.data = [d for i,d in enumerate(self.data) if not i in mask]
        print("Mask the covariane too!")

    def indices(self, data_type=None, tracers=None, **select):
        """
        Find the indices of all points matching the given selection
        """
        indices = []
        if tracers is not None:
            tracers = tuple(tracers)
        for i,d in enumerate(self.data):
            if not ((tracers is None) or (d.tracers == tracers)):
                continue
            if not ((data_type is None or d.data_type == data_type)):
                continue
            ok = True
            for name,val in select.items():
                if name.endswith("__lt"):
                    name = name[:-4]
                    if not d.get_tag(name) < val:
                        ok=False
                        break
                elif name.endswith("__gt"):
                    name = name[:-4]
                    if not d.get_tag(name) > val:
                        ok=False
                        break
                else:
                    if not d.get_tag(name) == val:
                        ok=False
                        break
                        
            if ok:
                indices.append(i)
        return np.array(indices)

    def get_tags(self, tags, data_type=None, tracers=None, **select):
        """
        Get the value of a one or more named tags for a subset of the data
        """
        indices = set(self.indices(data_type=data_type, tracers=tracers, **select))
        tags = [[d.get_tag(tag) for i,d in enumerate(self.data) if i in indices]
                for tag in tags]
        return tags
    
    def get_tag(self, tag, data_type=None, tracers=None, **select):
        """
        Get the value of a named tag for a subset of the data
        """
        return self.get_tags([tag], data_type=data_type, tracers=tracers, **select)[0]
    
    def get_data_points(self, data_type=None, tracers=None, **select):
        """
        Get data point objects for a subset of the data
        """
        indices = self.indices(data_type=data_type, tracers=tracers, **select)
        return [self.data[i] for i in indices]

    def get_mean(self, data_type=None, tracers=None, **select):
        """
        Get the vector of mean values for a selected subset of the data
        """
        indices = self.indices(data_type=data_type, tracers=tracers, **select)
        return self.mean[indices]
    
    def get_tracer_combinations(self, data_type=None):
        """
        Get all sets of tracers used (e.g. tomographic bin pairs)
        """
        indices = self.indices(data_type=data_type)
        return list(set([self.data[i].tracers for i in indices]))
        

    @property
    def mean(self):
        """
        Get the vector of mean values for the entire data set.
        """
        if self._mean is None:
            self._mean = np.array([d.value for d in self.data])
        return self._mean

    @mean.setter
    def mean(self, mu):
        """
        Set the vector of mean values for the entire data set.
        """
        if not len(mu) == len(self.data):
            raise ValueError("Tried to set mean with thing of length {}"
                " but data is length {}".format(len(mu),len(self.data)))
        for m, d in zip(mu, self.data):
            d.value = m


    @classmethod
    def load(cls, filename):
        pass

    def save(self, filename):
        pass

    def add_covariance(self, covariance):
        pass




In [281]:
# We will need the two point library
!pip install twopoint



In [282]:
# Get an example data file
!wget http://desdr-server.ncsa.illinois.edu/despublic/y1a1_files/chains/2pt_NG_mcal_1110.fits

--2018-11-15 17:53:36--  http://desdr-server.ncsa.illinois.edu/despublic/y1a1_files/chains/2pt_NG_mcal_1110.fits
Resolving desdr-server.ncsa.illinois.edu... 141.142.161.38
Connecting to desdr-server.ncsa.illinois.edu|141.142.161.38|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 6600960 (6.3M) [application/octet-stream]
Saving to: ‘2pt_NG_mcal_1110.fits’


2018-11-15 17:53:38 (3.67 MB/s) - ‘2pt_NG_mcal_1110.fits’ saved [6600960/6600960]



In [273]:
import twopoint
T = twopoint.TwoPointFile.from_fits("./2pt_NG_mcal_1110.fits")

In [274]:
def make_sacc():
    S = Sacc()
    for kernel in T.kernels:
        for i in range(kernel.nbin):
            tracer_type = 'NZ'
            name = f'{kernel.name}_{i}'
            S.add_tracer(tracer_type, name, kernel.z, kernel.nzs[i])

    table = {
        (twopoint.Types.galaxy_shear_plus_real, twopoint.Types.galaxy_shear_plus_real): 'xi_plus',
        (twopoint.Types.galaxy_shear_minus_real, twopoint.Types.galaxy_shear_minus_real): 'xi_minus',
        (twopoint.Types.galaxy_position_real, twopoint.Types.galaxy_position_real): 'w_theta',
        (twopoint.Types.galaxy_position_real, twopoint.Types.galaxy_shear_plus_real): 'gamma_t',
    }

    for spectrum in T.spectra:
        for i in range(len(spectrum)):
            bin1 = spectrum.bin1[i] - 1
            bin2 = spectrum.bin2[i] - 1
            tracer1 = spectrum.kernel1
            tracer2 = spectrum.kernel2
            tracers = [f'{tracer1}_{bin1}', f'{tracer2}_{bin2}']
            value = spectrum.value[i]
            angle = spectrum.angle[i]
            angle_unit = spectrum.angle_unit
            tags = {'scale':angle, 'scale_unit':angle_unit}
            data_type = table[(spectrum.type1, spectrum.type2)]
            S.add_data_point(data_type, tracers, value, **tags)
    return S

In [280]:
S = make_sacc()
ind1 = S.indices(data_type='xi_plus', tracers=('nz_source_0', 'nz_source_2'))
ind2 = S.indices(data_type='xi_plus', tracers=['nz_source_0', 'nz_source_2'], scale__gt=100.0)
print(ind1)
print(ind2)
S.cut(ind2)
ind3 = S.indices(data_type='xi_plus', tracers=['nz_source_0', 'nz_source_2'])
print(ind3)
S.data[0]
S.tracers['nz_source_0'].z

[40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59]
[56 57 58 59]
[40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55]


array([0.0051, 0.0151, 0.0251, 0.0351, 0.0451, 0.0551, 0.0651, 0.0751,
       0.0851, 0.0951, 0.1051, 0.1151, 0.1251, 0.1351, 0.1451, 0.1551,
       0.1651, 0.1751, 0.1851, 0.1951, 0.2051, 0.2151, 0.2251, 0.2351,
       0.2451, 0.2551, 0.2651, 0.2751, 0.2851, 0.2951, 0.3051, 0.3151,
       0.3251, 0.3351, 0.3451, 0.3551, 0.3651, 0.3751, 0.3851, 0.3951,
       0.4051, 0.4151, 0.4251, 0.4351, 0.4451, 0.4551, 0.4651, 0.4751,
       0.4851, 0.4951, 0.5051, 0.5151, 0.5251, 0.5351, 0.5451, 0.5551,
       0.5651, 0.5751, 0.5851, 0.5951, 0.6051, 0.6151, 0.6251, 0.6351,
       0.6451, 0.6551, 0.6651, 0.6751, 0.6851, 0.6951, 0.7051, 0.7151,
       0.7251, 0.7351, 0.7451, 0.7551, 0.7651, 0.7751, 0.7851, 0.7951,
       0.8051, 0.8151, 0.8251, 0.8351, 0.8451, 0.8551, 0.8651, 0.8751,
       0.8851, 0.8951, 0.9051, 0.9151, 0.9251, 0.9351, 0.9451, 0.9551,
       0.9651, 0.9751, 0.9851, 0.9951, 1.0051, 1.0151, 1.0251, 1.0351,
       1.0451, 1.0551, 1.0651, 1.0751, 1.0851, 1.0951, 1.1051, 1.1151,
      

In [277]:
len(S.data)

896

In [271]:
ind2

array([56, 57, 58, 59])