# Likelihood Analysis with Asymmetric Drift



In [None]:
import sys
import matplotlib.pyplot as plt
import numpy as np
import time
from functools import reduce

sys.path.append("../gaia_tools/")
import data_analysis
import transformation_constants
import covariance_generation
from import_functions import import_data

In [None]:
path = "/hdfs/local/sven/gaia_tools_data/gaia_rv_data_bayes.csv"
data_icrs = import_data(path = path, debug = False)

---

In [None]:
galcen_data = data_analysis.get_transformed_data(data_icrs,
                                       include_cylindrical = True,
                                       debug = True,
                                       is_bayes = True,
                                       is_source_included = True)

cov_df = covariance_generation.generate_covmatrices(df = data_icrs,
                                       df_crt = galcen_data,
                                       transform_to_galcen = True,
                                       transform_to_cylindrical = True,
                                       is_bayes = True,
                                       debug=False)

# append covariance information to galactocentric data
galcen_data['cov_mat'] = cov_df['cov_mat']

galcen_data = galcen_data[(galcen_data.r < 12000) & (galcen_data.r > 5000)]
galcen_data = galcen_data[(galcen_data.z < 200) & (galcen_data.z > -200)]
galcen_data.reset_index(inplace=True, drop=True)

data_icrs = data_icrs.merge(galcen_data, on='source_id')[data_icrs.columns]

In [None]:
# LIKELIHOOD SUM FUNCTION
'''
This function uses the Gaia data in ICRS:
1) Transforms it into a Galactocentric frame using the theta arguments given
2) Generates the covariance matrices (also transforms) and appends them to the Galactocentric data
3) Bins the data and generates a 'BinCollection' object
4) Iterates over the bins and computes a likelihood value for each
5) Sums the likelihood values over the bins
'''

def get_likelihood_sum(data_icrs,
                        vc_list,
                       r = transformation_constants.R_0, 
                       z = transformation_constants.Z_0, 
                       Usun = transformation_constants.V_SUN[0][0],
                       Vsun = transformation_constants.V_SUN[1][0],
                       num_r_bin = 10,
                       num_z_bin = 4):
 
    
    # theta = (r, z, Usun, Vsun, transformation_constants.V_SUN[2][0])
    
    # v_sun = np.array([[theta[2]], 
    #                           [theta[3]], 
    #                           [theta[4]]])
    # # 1
    # galcen_data = data_analysis.get_transformed_data(data_icrs, 
    #                                    include_cylindrical = True, 
    #                                    r_0 = theta[0],
    #                                    v_sun = v_sun,
    #                                    debug = True,
    #                                    is_bayes = True, 
    #                                    is_source_included = True)
    
    # # 2
    # cov_df = covariance_generation.generate_covmatrices(df = data_icrs, 
    #                                        df_crt = galcen_data, 
    #                                        transform_to_galcen = True, 
    #                                        transform_to_cylindrical = True,
    #                                        z_0 = theta[1],
    #                                        r_0 = theta[0],
    #                                        is_bayes = True,
    #                                        debug=True)
    
    # galcen_data['cov_mat'] = cov_df['cov_mat']
    

    # galcen_data = galcen_data[(galcen_data.r < 12000) & (galcen_data.r > 5000) ]
    # galcen_data.reset_index(inplace=True, drop=True)


    # min_val = np.min(galcen_data.r)
    # max_val = np.max(galcen_data.r)
    
    
    # # 3
    # bin_collection = data_analysis.get_collapsed_bins(data = galcen_data, 
    #                                                              theta = theta, 
    #                                                              BL_r_min = min_val - 1, 
    #                                                              BL_r_max = max_val + 1, 
    #                                                              BL_z_min = -200, 
    #                                                              BL_z_max = 200, 
    #                                                              N_bins = (num_r_bin, num_z_bin ),
    #                                                              r_drift = False, 
    #                                                              debug = True)


    # Setup likelihood array
    n = reduce(lambda x, y: x*y, bin_collection.N_bins)
    likelihood_array = np.zeros(n)


    # Keep track how many data points are used in likelihood computation
    point_count = []
    
    # 4
    start = time.time()
    for i, bin in enumerate(bin_collection.bins):
        
        likelihood_value = bin.get_likelihood_w_asymmetry(vc_list[i], debug=True)
        
        if(likelihood_value == 0):
            print("0!!")
            val = 0

        else:
            #print(bin.N_points)
            point_count.append(bin.N_points)
            
            # get bin likelihood
            val = likelihood_value

            # convert chi-squared
            #val = val*(-2)/star_count

        likelihood_array[i] = val
    
    print("Number of points in analysis: {0}".format(np.sum(point_count)))
    print("Bin Collection data shape: {0}".format(bin_collection.data.shape))
    
    likelihood_sum = np.sum(likelihood_array)
    
    end = time.time()
    print("Likelihood time = %s" % (end - start))
    
    return likelihood_sum, bin_collection, likelihood_array

In [None]:
# Function that generates the neccessary variables for 
# plotting the profiles

def generate_plot_vars(bin_r, bin_z, parameter, vc_list):
    
    # The varied range in x-axis    
    x = vc_list
    
    # The likelihood values
    y = []

    for i, item in enumerate(x):
        print(i, item)
        
        vc_list = [item for idx in range(0,11)]

        val = get_likelihood_sum(data_icrs,
                                vc_list,
                                num_r_bin = bin_r,
                                num_z_bin = bin_z)[0]
        
        print("Likelihood: {0}".format(val))
        y.append(val)

    return x, y, parameter

In [None]:
# The plotting function

def generate_likelihood_plot(x, y, bin_r, bin_z, parameter, save = False):
    
    fig = plt.figure(figsize = (8,8))
    plt.plot(x, y, '-', color='blue')
    plt.title("Likelihood dependence on ${0}$".format(parameter), pad = 45, fontdict={'fontsize': 20})
    plt.suptitle(r"({0}x{1} bins)".format(bin_r, bin_z), y=0.93, fontsize=15)
    plt.grid()

    idx_max = np.argmax(y)
    plt.axvline(x=x[idx_max], ls="--", label="Max")


    if(parameter == "R_0"):
        unit = "pc"
    else:
        unit = "km/s"
    
    plt.xlabel('${0}$ [{1}]'.format(parameter, unit), fontdict={'fontsize': 18}, labelpad = 25)
    plt.ylabel('Log Likelihood',fontdict={'fontsize': 18}, labelpad = 25)
    plt.subplots_adjust(left=0.2)

    plt.legend()

    title_string = "../out/Likelihood_{0}_{1}x{2}".format(parameter, bin_r, bin_z)

    if(save):
        plt.savefig(title_string+'.png', dpi=300)

---
## $V_\odot$ Likelihood
$V_\odot$ is varied, rest of the parameters are fixed

In [None]:
parameter = "V_odot"

bin_r = 5
bin_z = 1

vc_list = [item for item in range(-300, -150, 10)]
x, y, parameter = generate_plot_vars(bin_r, bin_z, parameter, vc_list)

In [None]:
generate_likelihood_plot(x, y, bin_r, bin_z, "V_\odot")
plt.legend()

In [None]:
# REFERENCE PLOT
generate_likelihood_plot(x, y, bin_r, bin_z, "V_\odot")

# Reid & Brunthaler + GRAVITY: 245.6 +- 1.4 [km/s]
v_circ = 233.4
# plt.axvline(x=v_circ, label="Reid & Brunthaler + GRAVITY\n (2020)", color="orange")
# plt.axvline(x=v_circ+1.5, ls="--", color="orange")
# plt.axvline(x=v_circ-1.5, ls="--", color="orange")

#plt.xlim(175, 325)
plt.legend()
#plt.savefig('../out/Vodot_profile'+'.png', dpi=300)

---

---