In [1]:
import numpy as np
import matplotlib.pyplot as plt

# To allow the importation of plotting function module anywhere
import sys
import platform
if platform.system() == 'Linux':
    sys.path.insert(0,'/nethome/palmerio/Dropbox/Plotting_GUI/Src')
elif platform.system() == 'Darwin': 
    sys.path.insert(0,'/Users/palmerio/Dropbox/Plotting_GUI/Src')
import plotting_functions as pf

import corner
import pandas as pd
import seaborn as sns
from scipy.stats import norm
from pathlib import Path
plt.style.use('presentation')


In [2]:
def calculate_Cvar(fname, pflx_key='pflx_band_phtflux',
                          flnc_key='flnc_band_phtflnc',
                          t90_key='t90',
                          lac=False,
                          pflx_min=None,
                          verbose=False):
    """
        Read the fname provided with pandas and extract T90, fluence and peak flux.
        The mean flux is then calculated as fluence/T90.
        Cvar is then calculated as mean flux/peakflux.
        lac means long and complete; if True the sample will be cut at the desired minimum pflx
    """
    # Read the entire file
    df_obs = pd.read_csv(fname, sep='|', header=2, low_memory=False)
    # Strip the colum names to remove whitespaces
    df_obs.rename(columns=lambda x:x.strip(), inplace=True)
    # Activate verbose to check the column names
    if verbose:
        for i,col in enumerate(df_obs.columns):
            print(i,col)

    # Only keep necessary data in new sub-dataframe
    necessary_data = {'pflx':df_obs[pflx_key], 'flnc':df_obs[flnc_key], 't90':df_obs[t90_key]}
    df_sub = pd.DataFrame(data=necessary_data)
    # Convert the values read from strings to numbers
    for key in ['pflx', 'flnc', 't90']:
        df_sub[key] = pd.to_numeric(df_sub[key], errors='coerce')
    # Remove NaN values
    df_sub = df_sub.dropna(axis=0)
    # Calculate mean flux and Cvar
    df_sub['mflx'] = df_sub['flnc'] / df_sub['t90']
    df_sub['Cvar'] = df_sub['mflx'] / df_sub['pflx']
    # Only keep the long and complete part of the sample
    if lac:
        df_sub = df_sub[(df_sub['t90'] > 2) & (df_sub['pflx'] > pflx_min)]
    return df_sub

In [3]:
def cut_Cvar_sample(df, mask, title=None):
    """
        Apply some mask on the Cvar data frame and examine in detail the Cvar vs T90 plane.
    """
    subdf = df[mask].copy()
    fig, axes = pf.cool_hist2d(np.log10(subdf['t90']),
                               np.log10(subdf['Cvar']),
                               c=np.log10(subdf['pflx']),
                               xlabel=r'$\rm \log(T_{90}~[s])$',
                               ylabel=r'$\rm \log(C_{var})$',
                               left_kdeplot_kwargs={'color':'k', 'label':None, 'legend':False},
                               top_kdeplot_kwargs={'color':'k', 'label':None, 'legend':False},
                               left_hist_kwargs={'color':'lightgray','alpha':0.9,'bins':20, 'edgecolor':'k', 'label':None},
                               top_hist_kwargs={'color':'lightgray','alpha':0.9,'bins':20, 'edgecolor':'k', 'label':None},
                               cmap='Greys', cblabel=r'$\log(N~[\rm ph\,s^{-1}\,cm^{-2}])$')
    
    corner.hist2d(np.asarray(np.log10(subdf['t90'])), np.asarray(np.log10(subdf['Cvar'])),
                             plot_datapoints=False,
                             plot_density=False,
                             ax = axes['center'],
                             no_fill_contours=True,
                             color='k',
                             contour_kwargs={'linewidths':2})

    if title is not None:
        axes['center'].text(0.3, 0.9, title, transform=axes['center'].transAxes, **{'size':18})
    return subdf

In [None]:
def fit_Cvar_T90_slope(df, N_points=1000, show_plot=True):
    """
        Fit the slope of the Cvar vs T90 relation by minimizing the scatter of the decorrelated Cvar sample
    """
    
    slope = np.linspace(-1,1, N_points)
    sigma = np.zeros(slope.shape)
    for i in range(slope.shape[0]):
        sigma[i] = np.std(np.log10(df['Cvar'])+slope[i]*np.log10(df['t90']))
    best_slope = slope[sigma.argmin()]
    if show_plot:
        fig, ax = plt.subplots(tight_layout=True)
        ax.plot(slope, sigma)
        ax.set_xlabel('Slope of the correlation')
        ax.set_ylabel('Scatter of Cvar')
        ax.axvline(best_slope, label=f'best slope: {best_slope:.3f}',ls='--', color='k')
        ax.legend()
    
    mu, std = norm.fit(np.log10(df['Cvar'])+best_slope*np.log10(df['t90']))
    mu_t90, std_t90 = norm.fit(np.log10(df['t90']))

    fig, axes = pf.cool_hist2d(np.log10(df['t90']),
                np.log10(df['Cvar'])+best_slope*np.log10(df['t90']),
                c=np.log10(df['pflx']),
                xlabel=r'$\rm \log(T_{90}~[s])$',
                ylabel=r'$\rm \log(C_{var})$'+f'+{best_slope:.3f}'+r'$\,\rm \log(T_{90}~[s])$',
                cblabel=r'$\log(N~[\rm ph\,s^{-1}\,cm^{-2}])$',
                top_hist_kwargs={'label': None,
                                 'bins': 20,
                                 'color': 'lightgrey',
                                 'edgecolor':'k'},
                left_hist_kwargs={'label': None,
                                 'bins': 20,
                                 'color': 'lightgrey',
                                 'edgecolor':'k'},)

    x = np.linspace(-1,1,1000)
    x2 = np.linspace(0.3,3,1000)
    axes['left'].plot(pf.gaussian(x,mu,std,normed=True),x, label=r'$\mu = ${:.2f}~$\sigma =${:.2f}'.format(mu, std))
    axes['top'].plot(x2,pf.gaussian(x2,mu2,std2,normed=True), label=r'$\mu = ${:.2f}~$\sigma =${:.2f}'.format(mu_t90, std_t90))

    axes['left'].legend()
    axes['top'].legend()
    fit_results = {'best_slope':best_slope,
                   'mu_decorr':mu,
                   'sigma_decorr':std,
                   'mu_t90':mu_t90,
                   'sigma_t90':std_t90}
    return fit_results

In [None]:
def summary_of_Cvar(fname,
                    sample_name,
                    pflx_key='pflx_band_phtflux',
                    flnc_key='flnc_band_phtflnc',
                    t90_key='t90',
                    lac=False,
                    pflx_min=None,
                    fig_title='Long and complete GBM sample\nin the BATSE band (50-300 keV)'):
    """
        Convenience function to rapidly execute each step in the Cvar chain and output the relevant information only.
    """
    # Compute Cvar
    df = calculate_Cvar(fname,pflx_key=pflx_key,
                          flnc_key=flnc_key,
                          t90_key=t90_key,
                          lac=lac,
                          pflx_min=pflx_min)
    fig_corner = corner.corner(np.log10(df), labels=['log Peak flux',
                                          'log Fluence',
                                          'log T90',
                                          'log Mean flux',
                                          'log Cvar'])
    fig_corner.suptitle(fig_title, **{'size':20})
    # Cut out bad part of sample
    T90_condition = (df['t90'] >= 2)
    Pflx_condition = (df['pflx'] >= pflx_min)
#     outliers_condition = (df['t90'] <= 10**0.6) & (df['Cvar'] <= 10**-0.5)
    final_mask = T90_condition & Pflx_condition & ~outliers_condition
    
    df_lac = cut_Cvar_sample(df, final_mask, title=fig_title)
    
    # Fit the Cvar vs T90 plane
    fit_results = fit_Cvar_T90_slope(df_lac)
    print(" Results of Cvar analysis for sample {}".format(sample_name))
    print(fit_results)
    return

filename = Path('../catalogs/GBM_cat/fermi_GBM_cat_total.dat')
df_GBM_b = calculate_Cvar(filename, pflx_min=0.9, lac=True)
# Examine with a quicklook the sample
fig = corner.corner(np.log10(df_GBM_b), labels=['log Peak flux',
                                          'log Fluence',
                                          'log T90',
                                          'log Mean flux',
                                          'log Cvar'])
fig.suptitle('Long and complete GBM sample\nin the BATSE band (50-300 keV)', **{'size':20})

T90_condition = (df_GBM_b['t90'] >= 2)
Pflx_condition = (df_GBM_b['pflx'] >= 0.9)
outliers_condition = (df_GBM_b['t90'] <= 10**0.6) & (df_GBM_b['Cvar'] <= 10**-0.5)

final_mask = T90_condition & Pflx_condition & ~outliers_condition
df_GBM_b_lac = cut_Cvar_sample(df_GBM_b, final_mask, title='Long and complete GBM sample\nin the BATSE band (50-300 keV)')