In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys
sys.path.append('../')
import utilities as ut

In [16]:
def plot_feature_distribution(feature, df, nbins, save=None):

    if type(feature) is not list:
        feature = [feature]

    cols = 4
    rows = int(np.ceil(len(feature)/4))

    fig, ax = plt.subplots(rows, cols, figsize=(22, 5*rows))
    ax = ax.flatten()

    for j, ft in enumerate(feature):
        #g = df.groupby('category')
        start, stop = df[ft].min(), df[ft].max()

        bins = np.linspace(start, stop, nbins + 1)
        centres = [np.mean(bins[i:i+2]) for i in range(len(bins)-1)]
        width = bins[1] - bins[0]

        a = np.histogram(df[ft], bins, density=True)[0]

        ax[j].bar(centres, a, label='Sim. Signal', edgecolor='k', width=width, alpha=0.7)
        ax[j].set_title(f'Distribution of {ft}')
        ax[j].legend(frameon=False)

    if save is not None:
        plt.savefig(save, dpi=800)

    return fig, ax

In [7]:
real, sim = ut.Data(*ut.Consts().get_real_tuple()), ut.Data(*ut.Consts().get_simulated_tuple())
rkeys, skeys = real.get_keys(), sim.get_keys()

In [10]:
len(rkeys), len(skeys)
# Many more real than simulated features...

(4323, 2941)

In [14]:
all_keys = [key for key in rkeys if key in skeys]
# Only keep keys common to both
len(all_keys)

pd.DataFrame({'Feature': all_keys}).to_csv('common_features.txt', sep=' ', index=None)

In [79]:
chi2 = pd.read_csv('common_features.txt', sep=' ')['Feature']
chi2 = [feat for feat in chi2 if 'chi2' in feat.lower()]
# Get all the CHI2 features that are common to the sets
chi2.remove('PVCHI2')
chi2.remove('Lb_DTF_PV_Psi_chi2')
chi2.remove('Lb_DTF_PV_L1520-JPs_chi2')
chi2.remove('Lb_DTF_PV_L1520_chi2')
chi2.remove('Lb_DTF_PV_JPs_chi2')
chi2.remove('Lb_DTF_PV_chi2')
len(chi2)

127

In [None]:
pd.DataFrame({'Feature': ['PVCHI2', 'Lb_DTF_PV_Psi_chi2', 'Lb_DTF_PV_L1520-JPs_chi2', 'Lb_DTF_PV_L1520_chi2', 'Lb_DTF_PV_JPs_chi2', 'Lb_DTF_PV_chi2']}).to_csv('error_features.txt', sep=' ', index=None)