### General imports

In [31]:
import hax
from hax import cuts
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy
import pickle

import matplotlib as mpl
from copy import deepcopy

%matplotlib inline
from matplotlib.colors import LogNorm
plt.rc('font', size=18)                         # Use big fonts...
plt.rcParams['figure.figsize'] = (10.0, 7.0)    # ... and big plots
# Colormaps
try:
    # These tree lines to set backdrop colour to white 
    # (https://stackoverflow.com/questions/34023932/mollview-use-matplotlib-colormaps-and-change-background-color)
    from pylab import cm
    cmap = cm.viridis
    cmap.set_under('w')
    plt.set_cmap(cmap)
except:
    print("Colormap viridis not found. Using default map instead (sorry Chris...)")
plt.close()
import time
import datetime

[1.0, 1.0, 1.0, 1.0]
(0.26700400000000002, 0.0048739999999999999, 0.32941500000000001, 1.0)


In [None]:
def get_run_list(path, start_at=None, stop_at=None):
    '''
    Get a list of all runs between two datasets (inclusive) in a folder.
    '''
    file_list = np.sort(os.listdir(path))
    
    file_list = [file[:-5] for file in file_list if file[-5:]=='.root']
    
    if start_at:
        start_index = np.where(np.array(file_list) == start_at)[0][0]
        file_list = file_list[start_index:]
    if stop_at:
        stop_index = np.where(np.array(file_list) == stop_at)[0][0]
        file_list = file_list[:stop_index+1]
    print('Run list contains %d files' % len(file_list))
    return file_list

In [1]:
def get_time(d, run_names):    
    '''
    Get the time of event in old data (pre-fix)
    '''
    next_run_indices = np.where(np.diff(d['event_number'].values) < 0)[0]
    def num(x):
        return len([a for a in next_run_indices if a<x])
    run_numbers = [num(r) for r in d.index]
    d['run_number'] = run_numbers

    all_timestamps = []
    for run_number, run_name in zip(np.unique(run_numbers), run_names):
        d_sub = d[d['run_number']==run_number]
        all_timestamps.append(
            ((d_sub['event_time'].values - d_sub['event_time'].iloc[0]) * 4) * 1e-9 +
            time.mktime(datetime.datetime.strptime(run_name, "%y%m%d_%H%M%S").timetuple()))

    d['unix_time'] = np.concatenate(all_timestamps)
    d['time_since_start'] = d['unix_time'] - d['unix_time'].iloc[0]
    return d

In [None]:
def draw_box(x, y, **kwargs):
    """Draw rectangle, given x-y boundary tuples"""
    # Arcane syntax of the week: matplotlib's Rectangle...
    plt.gca().add_patch(mpl.patches.Rectangle(
        (x[0], y[0]), x[1] - x[0], y[1] - y[0], facecolor='none', **kwargs))
    

In [None]:
def get_trend(x, y, x_range, bins, mode='median', pct=None):
    '''
    Get a 1D line going through a 2D histogram.
    First bins in x, then gets mean, median or percentile in that bin for the y-values.
    '''
    bin_edges = np.linspace(x_range[0], x_range[1], bins+1)
    bin_width = (x_range[1] - x_range[0])/bins
    left_edges = bin_edges[:-1]
    right_edges = bin_edges[1:]
    bin_centers = 0.5*(left_edges + right_edges)
    
    y_list = []
    for l, r in zip(left_edges, right_edges):
        # Take x bin
        y_sel = y[(x >= l) & (x < r)]
        
        # Check if it contains data at all
        if len(y_sel) == 0:
            _y = float('nan')
            print('Warning: getting empty slice, I will return NaN...')
            y_list.append(_y)
            continue
        
        if mode=='median':
            _y = np.median(y_sel)
        elif mode=='mean':
            _y = np.mean(y_sel)
        elif mode=='percentile':
            if pct is not None:
                _y = np.percentile(y_sel, pct)
            else:
                raise SyntaxError('Enter a percentile!')
        else:
            raise NotImplementedError('Enter median, mean or percentile, you entered this: %s' % mode)
            
        y_list.append(_y)
    
    y_list = np.array(y_list)
    return bin_centers, y_list

In [None]:
def my_interp(x, y, kind='quadratic'):
    '''
    Interpolate data and extend beyond bounds by assuming rightmost and leftmost datapoint.
    '''
    f = scipy.interpolate.interp1d(x, y, kind=kind, fill_value=(y[0], y[-1]), bounds_error=False)
    return f

### Fitting 2D

In [None]:
def build_fit_lists(x, y, x_range, y_range, x_bins, y_bins):
    x_bin_edges = np.linspace(x_range[0], x_range[1], x_bins+1)
    y_bin_edges = np.linspace(y_range[0], y_range[1], y_bins+1)
    
    
    # Loop over all bins first in x then in y and count the number of entries in that bin
    # Slightly inefficient code
    counts = []
    for left_y, right_y in zip(y_bin_edges[:-1], y_bin_edges[1:]):
        x_sub = x[(y >= left_y) & (y < right_y)]
        for left_x, right_x in zip(x_bin_edges[:-1], x_bin_edges[1:]):
            counts.append(sum((x_sub >= left_x) & (x_sub < right_x)))
    
    
    
    x_list = [0.5*(left_x + right_x)
              for left_y, right_y in zip(y_bin_edges[:-1], y_bin_edges[1:])
              for left_x, right_x in zip(x_bin_edges[:-1], x_bin_edges[1:])
              ]
    y_list = [0.5*(left_y + right_y)
              for left_y, right_y in zip(y_bin_edges[:-1], y_bin_edges[1:])
              for left_x, right_x in zip(x_bin_edges[:-1], x_bin_edges[1:])
              ]
    
    return np.array(counts), np.array(x_list), np.array(y_list)

In [None]:
def plot2d(f, params, x_range, y_range, x_step, y_step, **kwargs):
    '''
    Plot contours.
    '''
    x1 = np.arange(start = x_range[0], stop = x_range[1], step=x_step)
    y1 = np.arange(*y_range, step=y_step)

    X, Y = np.meshgrid(x1, y1)
    Z = f((X,Y), *params)
    plt.contour(X, Y, Z, **kwargs)
    return

In [None]:
def twoD_Gaussian(xdata_tuple, amplitude, xo, yo, sigma_x, sigma_y, theta, offset):
    (x, y) = xdata_tuple 
    xo = float(xo)
    yo = float(yo)    
    a = (np.cos(theta)**2)/(2*sigma_x**2) + (np.sin(theta)**2)/(2*sigma_y**2)
    b = -(np.sin(2*theta))/(4*sigma_x**2) + (np.sin(2*theta))/(4*sigma_y**2)
    c = (np.sin(theta)**2)/(2*sigma_x**2) + (np.cos(theta)**2)/(2*sigma_y**2)
    g = offset + amplitude*np.exp( - (a*((x-xo)**2) + 2*b*(x-xo)*(y-yo) 
                            + c*((y-yo)**2)))
    return g #.ravel()

### Fitting functions

In [1]:
from iminuit import Minuit, describe

Gaussian function, plus normalized one

In [1]:
def gaus(x, a0, mu, sigma):
    return (a0 * np.exp(-(x-mu)**2/(sigma**2)))

In [None]:
def gaus_norm(x, a0, mu, sigma, fit_range, int_step=1.):
    # Normalize
    n = 1/(np.average([gaus(x, a0, mu, sigma) for x in np.arange(*fit_range, step=int_step)]))
    return n * gaus(x, a0, mu, sigma)

Gaussians with linear background

In [2]:
def gaus_with_bg(x, a0, mu, sigma, off, slope):
    return (a0 * np.exp(-(x-mu)**2/(sigma**2)) + off + slope * x)

In [3]:
def gaus_with_bg_norm(x, a0, mu, sigma, off, slope, fit_range, int_step=1.):
    # Normalize
    n = 1/(np.average([gaus_with_bg(x, a0, mu, sigma, off, slope) for x in np.arange(*fit_range, step=int_step)]))
    return n * gaus_with_bg(x, a0, mu, sigma, off, slope)

Fit functions

In [1]:
def fit_gaus_with_bg(e_list, fit_range, a0, mu, sigma, off, slope):
    '''
    Note: outdated!
    '''
    plt.hist(e_list, bins=200, histtype='step', range=(0, 1e3), label='Spectrum')
    x_plot = np.linspace(0, 1000, 1000)
    plt.plot(x_plot, gaus_with_bg(x_plot,a0, mu, sigma, off, slope), label='Guess')
    for l in fit_range:
        plt.axvline(l, color='black')
#     plt.show()
    
    sel = (e_list < fit_range[1]) & (e_list > fit_range[0])
    x_fit = e_list[sel]
    
    def f_to_minimize(a0, mu, sigma, off, slope):
        p_list = gaus_with_bg_norm(x_fit, a0, mu, sigma, off, slope, fit_range=fit_range)
        return -sum(np.log(p_list))
    
    print(f_to_minimize(a0=a0, mu=mu, sigma=sigma, off=off, slope=slope))
    m=Minuit(f_to_minimize, a0=a0, mu=mu, sigma=sigma, off=off, slope=slope)
    m.migrad()

    normalization = len(x_fit) / sum(
        [gaus_with_bg(_x, m.values['a0'], m.values['mu'],m.values['sigma'],m.values['off'], m.values['slope']) for _x in 
        np.arange(*fit_range, step=5)])

    plt.plot(x_plot, normalization * gaus_with_bg(x_plot, m.values['a0'], m.values['mu'],m.values['sigma'],m.values['off'],
                                  m.values['slope']), label='Fit')
    plt.legend()
    print(m.values['sigma'] / m.values['mu'])

    return  [m.values['a0'], m.values['mu'],m.values['sigma'],m.values['off'], m.values['slope']], m, normalization

In [1]:
def fit_gaus(e_list, fit_range, mu, sigma, plot_range=(0, 1e3), plot_bins=200, int_step=0.1, plot_guess = True,
             plot=True, print_level = 1):
    '''
    23/5
    '''
    # Get the list of energies to fit
    sel = (e_list < fit_range[1]) & (e_list > fit_range[0])
    x_fit = e_list[sel]
    
    # PLOTTING
    # Plot histogram of data to fit
    if plot:        
        plt.hist(e_list, bins=plot_bins, histtype='step', range=plot_range, label='Spectrum')
        # Compute normalization based on number of counts
        normalization_guess = len(x_fit) / sum(
            [gaus(_x, 1, mu, sigma) for _x in 
            np.arange(*fit_range, step=(plot_range[1] - plot_range[0])/plot_bins)])
        # plot guess
        x_plot = np.linspace(*plot_range, num=1000)
        if plot_guess:
            plt.plot(x_plot, normalization_guess * gaus(x_plot, 1, mu, sigma), label='Guess')
        # Plot fit range
        for l in fit_range:
            plt.axvline(l, color='black', ls='--')
    
    
    # The function to minimize is the sum of the negative log likelihood
    def f_to_minimize(mu, sigma):
        p_list = gaus_norm(x_fit, 1, mu, sigma, fit_range=fit_range, int_step=int_step)
        return -sum(np.log(p_list))
    
    m=Minuit(f_to_minimize, mu=mu, sigma=sigma, errordef=1, error_mu=1, error_sigma = 1, print_level = print_level)
    m.migrad()

    # Plot fitted function
    normalization = len(x_fit) / sum(
        [gaus(_x, 1, m.values['mu'],m.values['sigma']) for _x in 
        np.arange(*fit_range, step=(plot_range[1] - plot_range[0])/plot_bins)])
    if plot:        
        plt.plot(x_plot, normalization * gaus(x_plot, 1, m.values['mu'],m.values['sigma']), label='Fit')
        plt.legend()

    return  [m.values['mu'],m.values['sigma']], m, normalization

In [None]:
def p2(x, a0, a1, a2):
    return a0 + a1 * x + a2 * x**2

In [None]:
def plt_divide_axis(axis='x', factor = 1e3):
    import matplotlib.ticker as ticker
    ax = plt.gca()
    if axis == 'x':
        ticks_x = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/factor))
        ax.xaxis.set_major_formatter(ticks_x)
    elif axis == 'y':
        ticks_y = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/factor))
        ax.yaxis.set_major_formatter(ticks_y)
    else:
        raise NotImplementedError()