# Softmax regression with tensorflow

This is a first attempt at using tensorflow to do categorization of two common processes: $\rm t\bar{t}$ and Drell-Yan production.  Begin with imported modules.

In [1]:
import sys
from itertools import product

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tqdm import tqdm

sys.path.append('/home/naodell/work/CMS/amumu')
import nllfitter.plot_tools as pt

ImportError: No module named tensorflow

Carry out some initial configuration.

In [None]:
ntuple_dir  = 'data/flatuples/mumu_2012'
lumi        = 19.8e3
selection   = ('mumu', 'combined')
period      = 2012
output_path = 'plots/fits/{0}_{1}'.format('_'.join(selection), period)

We will be discriminating ttbar and Drell-Yan which will use a few datasets,

In [None]:
datasets    = ['ttbar_lep', 'ttbar_semilep', 'zjets_m-50', 'zjets_m-10to50']

Now we'll define a set of features that could be helpful in discriminating the two processes,

In [None]:
cont_features = [
            'lepton1_pt', 'lepton1_eta', 'lepton1_phi',
            #'lepton1_iso', 'lepton1_q', 'lepton1_flavor', 'lepton1_trigger',
            'lepton2_pt', 'lepton2_eta', 'lepton2_phi',
            #'lepton2_iso', 'lepton2_q', 'lepton2_flavor', 'lepton2_trigger',
            #'lepton_delta_eta', 'lepton_delta_phi', 'lepton_delta_r',
            'dilepton_mass', 'dilepton_pt', 'dilepton_eta', 'dilepton_phi',
            #'dilepton_pt_over_m',

            'met_mag', 'met_phi',
            #'bjet_pt', 'bjet_eta', 'bjet_phi', 'bjet_d0',
            #'jet_pt', 'jet_eta', 'jet_phi', 'jet_d0', 
            #'dijet_mass', 'dijet_pt', 'dijet_eta', 'dijet_phi', 
            #'dijet_pt_over_m',

            #'lepton1_b_mass', 'lepton1_b_pt', 
            #'lepton1_b_delta_eta', 'lepton1_b_delta_phi', 'lepton1_b_delta_r',
            #'lepton2_b_mass', 'lepton2_b_pt', 
            #'lepton2_b_delta_eta', 'lepton2_b_delta_phi', 'lepton2_b_delta_r',

            #'dilepton_j_mass', 'dilepton_j_pt', 
            #'dilepton_j_delta_eta', 'dilepton_j_delta_phi', 'dilepton_j_delta_r',
            #'dilepton_b_mass', 'dilepton_b_pt', 
            #'dilepton_b_delta_eta', 'dilepton_b_delta_phi', 'dilepton_b_delta_r',
            #'four_body_mass',
            #'four_body_delta_phi', 'four_body_delta_eta', 'four_body_delta_r',

            #'t_xj', 't_xb', 't_bj'
           ]
disc_features = ['n_jets', 'n_fwdjets', 'n_bjets', 
                 'lepton1_flavor', 'lepton2_flavor'
                 'lepton1_q', 'lepton2_q'
                ]

It's important to distinguish between continuously valued and nominal variables since they will need to be preprocessed differently.  A few preselection cuts will be required,

In [None]:
cuts     = 'lepton1_pt > 25 and abs(lepton1_eta) < 2.1 \
            and lepton2_pt > 25 and abs(lepton2_eta) < 2.1 \
            and lepton1_q != lepton2_q and 12 < dilepton_mass < 70'

Now we'll load the data that is saved in flat ntuples using a DataManager class,

In [None]:
data_manager = pt.DataManager(input_dir     = ntuple_dir,
                              dataset_names = datasets,
                              selection     = selection[0],
                              period        = period,
                              scale         = lumi,
                              cuts          = cuts
                             )