# This code for blur $^{39}Ar$ Monte Carlo parameters
# Код для размытия Монте Карло данных $^{39}Ar$

### Import required libraries
### Подключаем необходимые библиотеки

In [None]:
import random
import uproot
import seaborn as sns
import numpy as np
import pandas as pd
from collections import Counter
from matplotlib import pylab as plt
from scipy.stats import chisquare

import math
from scipy.special import gamma

from scipy.optimize import curve_fit

import numpy.random as ra

import sys
sys.setrecursionlimit(10000)

from itertools import chain

import scipy.stats as st

import cellbell

### Import datasets 
### Импортируем наборы данных

In [2]:
ds_Ar39_1 = pd.read_csv('/home/kingaa/Desktop/DarkSide/Monte/data/outputAr39_100kevents.csv',header = 0, sep = ",")
ds_Ar39_2 = pd.read_csv('/home/kingaa/Desktop/DarkSide/Monte/data/outputAr39_100k_events_071019.csv',header = 0, sep = ",")
ds_Ar39_3 = pd.read_csv('/home/kingaa/Desktop/DarkSide/Monte/data/outputAr39_100k_events_211019.csv',header = 0, sep = ",")
ds_Ar39_4 = pd.read_csv('/home/kingaa/Desktop/DarkSide/Monte/data/outputAr39_20kevents_1.csv',header = 0, sep = ",")
ds_Ar39_5 = pd.read_csv('/home/kingaa/Desktop/DarkSide/Monte/data/outputAr39_20kevents_2.csv',header = 0, sep = ",")
ds_Ar39_6 = pd.read_csv('/home/kingaa/Desktop/DarkSide/Monte/data/outputAr39_20kevents_3.csv',header = 0, sep = ",")

ds_Ar39_1 = ds_Ar39_1.dropna()
ds_Ar39_2 = ds_Ar39_2.dropna()
ds_Ar39_3 = ds_Ar39_3.dropna()
ds_Ar39_4 = ds_Ar39_4.dropna()
ds_Ar39_5 = ds_Ar39_5.dropna()
ds_Ar39_6 = ds_Ar39_6.dropna()

ds_Ar39_mall = pd.concat([ds_Ar39_1, ds_Ar39_2, ds_Ar39_3, ds_Ar39_4, ds_Ar39_5, ds_Ar39_6])

ds_Ar39 = ds_Ar39_mall

ds_Ar39 = ds_Ar39.query('s1>30 and s1 < 500 and f90 > 0.15 and f90 < 0.5')

Ar39_f90 = list(ds_Ar39['f90'])
Ar39_f30 = list(ds_Ar39['f30'])
Ar39_f60 = list(ds_Ar39['f60'])
Ar39_f200 = list(ds_Ar39['f200'])
Ar39_s1 = list(ds_Ar39['s1'])
Ar39_s2 = list(ds_Ar39['s2'])
Ar39_late = list(ds_Ar39['s1late'])
Ar39_prompt = list(ds_Ar39['s1prompt'])

ds_AAr = "/home/kingaa/Desktop/DarkSide/Monte/AAr_cut_500.root"
tree_AAr = uproot.open(ds_AAr)["TreeB"]

df_f90_ds_AAr = tree_AAr.pandas.df('f90')
df_f90_ds_AAr['f30'] = tree_AAr.pandas.df('f30')
df_f90_ds_AAr['f60'] = tree_AAr.pandas.df('f60')
df_f90_ds_AAr['f200'] = tree_AAr.pandas.df('f200')
df_f90_ds_AAr['s1'] = tree_AAr.pandas.df('s1')
df_f90_ds_AAr['s2'] = tree_AAr.pandas.df('s2')
df_f90_ds_AAr['sprompt'] = tree_AAr.pandas.df('long_prompt')
df_f90_ds_AAr['slate'] = tree_AAr.pandas.df('long_late')
df_f90_ds_AAr = df_f90_ds_AAr.query('s1>30 and s1 < 500 and f90 > 0.15 and f90 < 0.5')

AAr_f90 = list(df_f90_ds_AAr['f90'])
AAr_f30 = list(df_f90_ds_AAr['f30'])
AAr_f60 = list(df_f90_ds_AAr['f60'])
AAr_f200 = list(df_f90_ds_AAr['f200'])
AAr_s1 = list(df_f90_ds_AAr['s1'])
AAr_s2 = list(df_f90_ds_AAr['s2'])
AAr_sprompt = list(df_f90_ds_AAr['sprompt'])
AAr_slate = list(df_f90_ds_AAr['slate'])

### Fit functions creation
### Создаем фитирующие функции

#### DEAP function
#### Функция распределения коллаборации DEAP
##### DEAP collaboration "Search for dark matter with a 231-day exposure of liquid argon using DEAP-3600 at SNOLAB", 2019.

In [3]:
def deap_distr_NR(f, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, A):
    global q 
    fbar = a0 + a1/(q - a2) + a3/(q - a4)**2
    fbarnew = 1 - fbar
    b = a5 + a6/q + a7/q**2
    sigma = a8 + a9/q + a10/q**2
    fnr = A * 1/math.sqrt(2*math.pi*sigma**2) * np.exp(-(f**2)/(2*sigma**2)) * (fbar**b * (1 - f)**(b-1) * np.exp(-fbar*(1 - f)))/(gamma(b))
    return fnr

#### David Hinkley function 
#### Функция распределения Дэвида Хинкли
##### W. H. Lippincott et. al. "Scintillation time dependence and pulse shape discrimination in liquid argon", 2008.

In [4]:
def hinkley(x, mu_s1late_NR, mu_s1prompt_NR, sigma_s1late_NR,sigma_s1prompt_NR, A):
    func1 = A * np.exp((-0.5*(mu_s1late_NR*x-mu_s1prompt_NR*(1-x))**2)/(sigma_s1late_NR**2*x**2+sigma_s1prompt_NR**2*(1-x)**2))*(sigma_s1late_NR**2*mu_s1prompt_NR*x+sigma_s1prompt_NR**2*mu_s1late_NR*(1-x))/((2*np.pi)**(0.5)*(sigma_s1late_NR**2*x**2+sigma_s1prompt_NR**2*(1-x)**2)**1.5)
    return func1

### Main code for $f_{30}$, $f_{60}$, $f_{90}$, $f_{200}$
### Основной код для $f_{30}$, $f_{60}$, $f_{90}$, $f_{200}$

In [None]:
# Inverse transform sampling for hinkley distribution
# Создаем функцию выборки обратного преобразования
def its_hinkley_ER(N):
    prob = f_hinkley_ER/float(sum(f_hinkley_ER))
    cum_prob = np.cumsum(prob)    
    R = ra.uniform(0, 1, N)
    gen_random = [float(x_hinkley_ER[np.argwhere(cum_prob == min(cum_prob[(cum_prob - r) > 0]))]) for r in R]  
    return gen_random
# Blur function for 'extra values' - values that gets into a crowded bin
# Создаем функцию размытия для 'экстра значений' - значений, которые попали в переполненный бин
def blur(value, depth):
    if value == min(list_data):
        value = value + 0.001
    if value == max(list_data):
        value = value - 0.001
    j = int(np.argwhere(list_AAr_data[:100] == max(list_AAr_data[:100][(list_AAr_data[:100] - value) <= 0])))
    #print(j)
    if (n_list_ER[j] < n_AAr_data[j]) or depth == 10:
        #print(depth)
        return j, value
    else:
        depth = depth + 1
        a = its_hinkley_ER(1)[0]
        return blur(a, depth)
# Using Hinkley distribution to blur Ar39 parameters (f30, f60, f90, f200)
# Используем распределение Хинкли для размытия параметров Ar39 (f30,f60,f90,f200)
def hinkley_ER_all(data_AAr):
    # Define 'n_AAr_data' - number of events in each bin in AAr data
    # Ищем чему равно n в каждом бине в реальных данных NR
    global n_AAr_data, list_AAr_data, patches_AAr_data
    n_AAr_data, list_AAr_data, patches_AAr_data = plt.hist(data_AAr, bins = 100, density = False)
    # Fit
    # фитируем
    global popt_hinkley_Ar39, pcov_hinkley_Ar39
    popt_hinkley_Ar39, pcov_hinkley_Ar39 = curve_fit(hinkley, list_AAr_data[:100], n_AAr_data, maxfev=100000)
    print(popt_hinkley_Ar39)
    # Calculate n for fit function in each bin
    # вычисляем n для фита
    global n_f90_AAr_trial_deap
    n_f90_AAr_trial_deap = hinkley(list_AAr_data, *popt_hinkley_Ar39)
    global x_hinkley_ER,f_hinkley_ER
    x_hinkley_ER = np.linspace(min(data), max(data), 200)
    f_hinkley_ER = hinkley(x_hinkley_ER, *popt_hinkley_Ar39)
    data_blur = []
    print(data_blur)
    global n_list_ER
    n_list_ER = [0] * 101
    for i in range (len(data)):
        j, value = blur(data[i], 0)
        n_list_ER[j] = n_list_ER[j] + 1
        data_blur.append(value)
    return data_blur, n_list_ER
# Function for comparison plotting data before and after blur
# Функция для сравнительного построения данных до и после размытия
def plotting (AAr_data, Ar39_old, Ar39_new):
    nbins = 50
    plt.figure(figsize=(16,6))
    plt.subplot(121)
    n_data_aar, list_data_aar, patches_data_aar = plt.hist(AAr_data, bins = nbins, density = True, histtype = 'step', color = 'black', linewidth = 3, label = 'AAr')
    n_data_ar39, list_data_ar39, patches_data_ar39 = plt.hist(Ar39_old, bins = nbins, density = True, histtype = 'step', color = 'red', linewidth = 3, label = 'Ar39')
    plt.title('$^{39}$Ar $f_{30}$ до преобразований', fontsize=22)
    plt.xticks(fontsize = 14)
    plt.yticks(fontsize = 14)
    plt.legend()
    plt.subplot(122)
    n_data_aar, list_data_aar, patches_data_aar = plt.hist(AAr_data, bins = nbins, density = True, histtype = 'step', color = 'black', linewidth = 3, label = 'AAr')
    n_new_data_ar39, list_new_data_ar39, patches_new_data_ar39 = plt.hist(Ar39_new, bins = nbins, density = True, histtype = 'step', color = 'red', linewidth = 3, label = 'Ar39')
    plt.title('$^{39}$Ar $f_{30}$ после преобразований', fontsize=22)
    plt.xticks(fontsize = 14)
    plt.yticks(fontsize = 14)
    plt.legend()

In [None]:
parameter_AAr_list = ['AAr_f30', 'AAr_f60', 'AAr_f90', 'AAr_f200']
Ar39_new_parameters = []
for parameter in parameter_AAr_list:
    global data
    global n_data, list_data, patches_data
    shuffled_AAr = random.sample(parameter, len(parameter))
    if (parameter == 'AAr_f30'):
        data = Ar39_f30
    elif (parameter == 'AAr_f60'):
        data = Ar39_f60
    elif (parameter == 'AAr_f90'):
        data = Ar39_f90
    elif (parameter == 'AAr_f200'):
        data = Ar39_f200
    AAr_parameter_lists = []
    for i in range (12):
        AAr_parameter_lists.append(shuffled_AAr[i*50646:50646*(i+1)])
    n_data, list_data, patches_data = plt.hist(data,100)
    a = []
    lst = []
    for i in range (12):
        q1,q2 = hinkley_ER_all(AAr_parameter_lists[i])
        a.append(q1)
        lst.append(q2)
    new_param = list(chain(*a))
    Ar39_new_parameters.append(new_param)
    plotting(parameter, data, new_param)

### Main code for $S_1$
### Основной код для $S_1$

In [None]:
AAr_s1_lists = []
for i in range (12):
    AAr_s1_lists.append(AAr_s1[i*50646:50646*(i+1)])

In [None]:
AAr_f90_trial = list(df_f90_ds_AAr['f90'])[0:50646]
AAr_s1_trial = list(df_f90_ds_AAr['s1'])[0:50646]

In [None]:
n_s1_Ar39, list_s1_Ar39, patches_s1_Ar39 = plt.hist(Ar39_s1, bins = 100, density = False)

In [None]:
# Using uniform function for S1 blur, hinkley fit
# Используем равномерную функцию распределения для размытия S1, фитируем распределением Хинкли
def blur_s1_ER(value, depth):
    if value == min(list_s1_Ar39):
        value = value + 1
    if value == max(list_s1_Ar39):
        value = value - 1
    j = int(np.argwhere(list_s1_Ar39[:100] == max(list_s1_Ar39[:100][(list_s1_Ar39[:100] - value) <= 0])))
    if (n_list_ER_s1[j] < n_s1_AAr_fit[j]) or depth == 5:
        return j, value
    else:
        depth = depth + 1
        a = np.random.uniform(min(Ar39_s1),max(Ar39_s1),1)[0]
        return blur_s1_ER(a, depth)
def hinkley_ER_all_s1(AAr_s1):
    # Define 'n_AAr_data' - number of events in each bin in AAr data
    # Ищем чему равно n в каждом бине в реальных данных ER
    global n_s1_AAr_part, list_s1_AAr_part, patches_s1_AAr_part
    n_s1_AAr_part, list_s1_AAr_part, patches_s1_AAr_part = plt.hist(AAr_s1, bins = 100, density = False)
    # Fit
    # фитируем
    popt_hinkley_Ar39, pcov_hinkley_Ar39 = curve_fit(hinkley, list_s1_AAr_part[:100], n_s1_AAr_part, maxfev=100000)
    # Calculate n for fit function in each bin
    # вычисляем n для фита
    global n_s1_AAr_fit
    n_s1_AAr_fit = hinkley(list_s1_AAr_part, *popt_hinkley_Ar39)
    Ar39_s1_deap = []
    global n_list_ER_s1
    n_list_ER_s1 = [0] * 201
    for i in range (len(Ar39_s1)):
        j, value = blur_s1_ER(Ar39_s1[i], 0)
        n_list_ER_s1[j] = n_list_ER_s1[j] + 1
        Ar39_s1_deap.append(value)
    return Ar39_s1_deap, n_list_ER_s1

In [None]:
a = []
lst = []
for i in range (12):
    q1,q2 = uniform_ER_all_s1(AAr_s1_lists[i])
    a.append(q1)
    lst.append(q2)
Ar39_s1_deap = list(chain(*a))

In [90]:
# Create the pandas DataFrame and save it
# Создание нового датафрейма и сохранение его в .csv файл
data = {'f30':Ar39_new_parameters[0], 'f60':Ar39_new_parameters[1], 'f90':Ar39_new_parameters[2], 'f200':Ar39_new_parameters[3], 's1':Ar39_s1_new}
df = pd.DataFrame(data) 
df.to_csv('new_ds_Ar39_MC_30_500_f30_f60_f90_f200_s1.csv')