In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle as pkl
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.preprocessing import StandardScaler

In [None]:
with open('Data/train_data_v2.pickle', 'rb') as file:
    X,y,z = pkl.load(file)

features = [
        'fj_jetNTracks',
        'fj_nSV',
        'fj_tau0_trackEtaRel_0',
        'fj_tau0_trackEtaRel_1',
        'fj_tau0_trackEtaRel_2',
        'fj_tau1_trackEtaRel_0',
        'fj_tau1_trackEtaRel_1',
        'fj_tau1_trackEtaRel_2',
        'fj_tau_flightDistance2dSig_0',
        'fj_tau_flightDistance2dSig_1',
        'fj_tau_vertexDeltaR_0',
        'fj_tau_vertexEnergyRatio_0',
        'fj_tau_vertexEnergyRatio_1',
        'fj_tau_vertexMass_0',
        'fj_tau_vertexMass_1',
        'fj_trackSip2dSigAboveBottom_0',
        'fj_trackSip2dSigAboveBottom_1',
        'fj_trackSip2dSigAboveCharm_0',
        'fj_trackSipdSig_0',
        'fj_trackSipdSig_0_0',
        'fj_trackSipdSig_0_1',
        'fj_trackSipdSig_1',
        'fj_trackSipdSig_1_0',
        'fj_trackSipdSig_1_1',
        'fj_trackSipdSig_2',
        'fj_trackSipdSig_3',
        'fj_z_ratio',
    ]

In [None]:
min_max_scaler = preprocessing.StandardScaler()
X_scaled = min_max_scaler.fit_transform(X)

In [None]:
model = LogisticRegression()
model.fit(X_scaled,y)

In [None]:
feat_imp = abs(model.coef_[0])

f1, ax1 = plt.subplots(figsize=(10, 10))
plt.bar(features, feat_imp)
ax1.set_xticklabels(
    features,
    rotation=60,
    horizontalalignment='right'
)
ax1.set_title('Feature Importance')
ax1.set_ylabel('Normalized Importance (A.U.)')
plt.show()

In [None]:
df = pd.DataFrame(X_scaled, columns=features)
corr = df.corr()
mask = np.triu(np.ones_like(corr, dtype=bool))
f, ax = plt.subplots(figsize=(10, 10))
ax.set_title('Feature Correlation Matrix')
ax = sns.heatmap(
    corr, 
    mask=mask,
    xticklabels=True, 
    yticklabels=True,
    vmin=0, vmax=1, center=0.5,
    cmap=sns.color_palette("YlOrBr", as_cmap=True),
    linewidths=1,
    square=True
)
ax.set_xticklabels(
    ax.get_xticklabels(),
    rotation=60,
    horizontalalignment='right'
);