In [1]:
import cupy as cp
import pandas as pd
import numpy as np
from numba import jit

# Disable if running on GPU
from numba import jit, config
config.DISABLE_JIT = True
import cProfile
import sys
sys.path.append("../gaia_tools/")
import transformation_constants
import transformation_functions
import data_analysis
import covariance_generation as cov

In [34]:
print('Grabbing needed columns')
icrs_data = pd.read_csv('/home/svenpoder/DATA/Gaia_2MASS Data_DR2/gaia_rv_data_bayes.csv', nrows = 10)
print('Importing DR3')
path = '/home/svenpoder/DATA/Gaia_DR3/GaiaDR3_RV_RGB_fidelity.csv'
gaia_dr3 = pd.read_csv(path)
icrs_data = gaia_dr3[icrs_data.columns]

Grabbing needed columns
Importing DR3


In [35]:
## TRANSFORMATION CONSTANTS
v_sun = transformation_constants.V_SUN

r_0 = 8277.0
z_0 = 25.0

v_sun[0][0] = 11.1
v_sun[1][0] = 251.5*(r_0/8277)
v_sun[2][0] = 8.59*(r_0/8277)


## APPLY INITIAL CUT
galcen_data = data_analysis.get_transformed_data(icrs_data,
                                       include_cylindrical = True,
                                       z_0 = z_0,
                                       r_0 = r_0,
                                       v_sun = v_sun,
                                       debug = True,
                                       is_bayes = True,
                                       is_source_included = True)

galcen_data = galcen_data[(galcen_data.r < 15000) & (galcen_data.r > 5000)]
galcen_data = galcen_data[(galcen_data.z < 200) & (galcen_data.z > -200)]
galcen_data.reset_index(inplace=True, drop=True)

galcen_data = galcen_data[::10]

## DECLARE FINAL INPUT DATA
icrs_data = icrs_data.merge(galcen_data, on='source_id')[icrs_data.columns]
print("Final size of sample {}".format(galcen_data.shape))

Starting galactocentric transformation loop over all data points.. 
Time elapsed for data coordinate transformation: 2.524829123984091 sec
Final size of sample (169498, 11)


In [5]:
# Generate covariance matrices for INPUT
C_icrs = cov.generate_covmat(icrs_data)

In [6]:
# Declare backend
NUMPY_LIB = cp
dtype = cp.float32

In [7]:
# Export INPUT to GPU with needed columns
trans_needed_columns = ['source_id', 'ra', 'dec', 'r_est', 'pmra', 'pmdec', 'radial_velocity',]
icrs_data = NUMPY_LIB.asarray(icrs_data[trans_needed_columns], dtype=cp.float32)
C_icrs = NUMPY_LIB.asarray(C_icrs, dtype=cp.float32)

In [20]:
def transform_all(icrs_data, C_icrs, r_0, z_0, v_sun):

    r_0 = r_0
    z_0 = z_0
    v_sun = v_sun

    galcen_data = transformation_functions.get_transformed_data(icrs_data,
                                        include_cylindrical = True,
                                        z_0 = z_0,
                                        r_0 = r_0,
                                        v_sun = v_sun,
                                        is_bayes = True,
                                        NUMPY_LIB = NUMPY_LIB,
                                        dtype = dtype)

    # ["ra", "dec","r_est","pmra","pmdec","radial_velocity"] -> [:,1::]
    galactocentric_cov = cov.transform_cov_matrix(C = C_icrs, 
                                        df = icrs_data[:,1::],
                                        coordinate_system = 'Cartesian',
                                        z_0 = z_0,
                                        r_0 = r_0,
                                        is_bayes = True,
                                        NUMPY_LIB = NUMPY_LIB,
                                        dtype = dtype)

    # ["x", "y","r","phi","v_r","v_phi"] -> [0,1,6,7,8,9]
    cyl_cov_gpu = cov.transform_cov_matrix(C = galactocentric_cov, 
                                        df = galcen_data[:,[0,1,6,7,8,9]],
                                        coordinate_system = 'Cylindrical',
                                        z_0 = z_0,
                                        r_0 = r_0,
                                        is_bayes = False,
                                        NUMPY_LIB = NUMPY_LIB,
                                        dtype = dtype)

    sig_vphi = NUMPY_LIB.array([cyl_cov_gpu[:,4,4]])
    sig_vr = NUMPY_LIB.array([cyl_cov_gpu[:,3,3]])
    source_id = NUMPY_LIB.array([icrs_data[:,0]])
    galcen_data = NUMPY_LIB.concatenate(([galcen_data, sig_vphi.T, sig_vr.T, source_id.T]), axis=1)

    final_data_columns = ['x', 'y', 'z', 'v_x', 'v_y', 'v_z', 'r', 'phi', 'v_r', 'v_phi',
                'sig_vphi', 'sig_vr', 'source_id']
    
    galcen_data = pd.DataFrame(galcen_data.get(), columns=final_data_columns)

    return galcen_data

In [105]:
r0_range = [x for x in np.linspace(7800, 8500, 4)]
r0_range.append(8277)
r0_range.sort()
r0_range
print('Old Binning Scheme')
print('\n')
for r_0 in r0_range:
    galcen_data = transform_all(icrs_data, C_icrs, r_0, z_0, v_sun)
    bin_collection = data_analysis.get_collapsed_bins(data = galcen_data,
                                                        theta = (0, 1),
                                                        BL_r_min = 5000,
                                                        BL_r_max = 15000,
                                                        BL_z_min = -200,
                                                        BL_z_max = 200,
                                                        N_bins = (10, 1),
                                                        r_drift = False,
                                                        debug = False)


    print('R0 = {}'.format(r_0))
    print('Total number of stars across bins: {}'.format(i))
    print('Bins of increasing r --->')                                                 
    i = 0
    for bin in bin_collection.bins:
        n_bin = len(bin.data)
        print('| {}'.format(n_bin), end=" ")
        i += n_bin
    print('\n')
    

Old Binning Scheme


R0 = 7800.0
Total number of stars across bins: 1694405
Bins of increasing r --->
| 421896 | 473551 | 301729 | 181148 | 127575 | 66097 | 24010 | 7420 | 2294 | 512 

R0 = 8033.333333333333
Total number of stars across bins: 1606232
Bins of increasing r --->
| 361744 | 497503 | 336750 | 198459 | 138791 | 80062 | 30642 | 9699 | 2939 | 829 

R0 = 8266.666666666666
Total number of stars across bins: 1657418
Bins of increasing r --->
| 289906 | 505365 | 378622 | 220442 | 148362 | 94167 | 38877 | 12873 | 3780 | 1223 

R0 = 8277
Total number of stars across bins: 1693617
Bins of increasing r --->
| 286717 | 505320 | 380277 | 221674 | 148818 | 94771 | 39324 | 12991 | 3829 | 1250 

R0 = 8500.0
Total number of stars across bins: 1694971
Bins of increasing r --->
| 194393 | 487644 | 421778 | 251837 | 157863 | 108469 | 49047 | 16710 | 4938 | 1583 



In [104]:
r0_range = [x for x in np.linspace(7800, 8500, 4)]
r0_range.append(8277)
r0_range.sort()

print('New Binning Scheme')
print('\n')
for r_0 in r0_range:
    galcen_data = transform_all(icrs_data, C_icrs, r_0, z_0, v_sun)

    galcen_data['r_orig'] = galcen_data.r
    galcen_data['r'] = galcen_data.r/r_0 

    r_min = 5000/8277
    r_max = 15000/8277


    bin_collection = data_analysis.get_collapsed_bins(data = galcen_data,
                                                        theta = (0, 1),
                                                        BL_r_min = r_min,
                                                        BL_r_max = r_max,
                                                        BL_z_min = -200,
                                                        BL_z_max = 200,
                                                        N_bins = (10, 1),
                                                        r_drift = False,
                                                        debug = False)
    print('R0 = {}'.format(r_0))
    print('Total number of stars across bins: {}'.format(i))                                                 
    print('Bins of increasing r/r_0 --->')
    i = 0
    for bin in bin_collection.bins:
        n_bin = len(bin.data)
        print('| {}'.format(n_bin), end=" ")
        i += n_bin
    print('\n')
    

New Binning Scheme


R0 = 7800.0
Total number of stars across bins: 1694959
Bins of increasing r/r_0 --->
| 315339 | 474126 | 352513 | 215600 | 144123 | 97850 | 45352 | 16419 | 5230 | 1787 

R0 = 8033.333333333333
Total number of stars across bins: 1668339
Bins of increasing r/r_0 --->
| 302118 | 489972 | 365885 | 218616 | 146431 | 96449 | 42272 | 14651 | 4517 | 1501 

R0 = 8266.666666666666
Total number of stars across bins: 1682412
Bins of increasing r/r_0 --->
| 287317 | 504700 | 379694 | 221508 | 148754 | 94813 | 39449 | 13068 | 3864 | 1262 

R0 = 8277
Total number of stars across bins: 1694429
Bins of increasing r/r_0 --->
| 286717 | 505320 | 380277 | 221674 | 148818 | 94771 | 39324 | 12991 | 3829 | 1250 

R0 = 8500.0
Total number of stars across bins: 1694971
Bins of increasing r/r_0 --->
| 261846 | 518004 | 393706 | 224622 | 150564 | 93132 | 36700 | 11592 | 3317 | 922 



In [29]:
r_0 = 8150
galcen_data = transform_all(icrs_data, C_icrs, r_0, z_0, v_sun)

In [30]:
bin_collection_old = data_analysis.get_collapsed_bins(data = galcen_data,
                                                        theta = (0, 1),
                                                        BL_r_min = 5000,
                                                        BL_r_max = 15000,
                                                        BL_z_min = -200,
                                                        BL_z_max = 200,
                                                        N_bins = (10, 1),
                                                        r_drift = False,
                                                        debug = False)

In [31]:
r_min = 5000/8277
r_max = 15000/8277

bin_collection_new = data_analysis.get_collapsed_bins(data = galcen_data,
                                                      theta = r_0,
                                                      BL_r_min = r_min,
                                                      BL_r_max = r_max,
                                                      BL_z_min = -200,
                                                      BL_z_max = 200,
                                                      N_bins = (10, 1),
                                                      r_drift = True,
                                                      debug = False)

In [32]:
for i, bin in enumerate(bin_collection_old.bins):
    print(i)
    print(len(bin_collection_old.bins[i].data))
    print(len(bin_collection_new.bins[i].data))
    print(np.mean(bin_collection_old.bins[i].data.v_phi))
    print(np.mean(bin_collection_new.bins[i].data.v_phi))

0
32693
29561
-223.21603
-222.43484
1
50335
49751
-228.18697
-227.93152
2
35936
37175
-229.06459
-229.07814
3
20630
21985
-227.68185
-228.01587
4
14467
14872
-224.52776
-224.9061
5
8696
9490
-221.92928
-222.48607
6
3439
4096
-220.65163
-220.43832
7
1123
1383
-219.75937
-220.40474
8
353
435
-215.89459
-215.68567
9
87
133
-215.23866
-218.41888
