In [None]:
source('Requirements_and_Functions.R')

## Preparing datas

In [None]:
datas_or <- load_all_datas(labels=TRUE, check_none=TRUE)
data_bkg <- datas_or[[1]]
data_sim_sign <- datas_or[[2]]
data_lhcb <- datas_or[[3]]
data_compl <- get_complementary_cut(data_lhcb)
datas_or <- list('data_bkg' = data_bkg,
                'data_sig' = data_sim_sign,
                'data_lhcb' = data_lhcb,
                'data_compl' = data_compl
               )

In [None]:
datas_show <- list()
for (i in 1:4){
    datas_show[[names(datas_or[i])]] = clean_datas(datas_or[[i]], only_useful=FALSE, only_unbiased=FALSE)
}
datas <- list()
for (i in 1:4){
    datas[[names(datas_show[i])]] = clean_datas(datas_show[[i]], only_useful=FALSE, only_unbiased=TRUE)
}

In [None]:
datas_train_test <- take_test_set(datas, write_on_file=TRUE)
datas_train <- datas_train_test[['data_train']]
x_test <- datas_train_test[['x_test']]
y_test <- datas_train_test[['y_test']]

## Show Datas

In [None]:
show_datas(datas_show[c(1,3)], density=TRUE, cuts=TRUE)

## Random Forest - All Features

#### Fraction of the total sample to be used

In [None]:
samples <- c(seq(950,1050,50), seq(1070,1130,20), seq(1150,1300,50))
results <- get_plots_fraction_tot_samples(datas_train, samples=samples, times=1, method=customRF)

#### Best parameters to be used

In [None]:
tuneGrid <- expand.grid(.mtry = 5,
                        .ntree = 400,
                        .maxnodes = 150,
                        .importance = FALSE,
                        .nodesize = 150)
try_parameters(method=customRF, parameters=tuneGrid, datas_train=datas_train)

In [None]:
best_mtry <- 5
best_ntree <- 400
best_maxnodes <- 150
best_importance <- FALSE
best_nodesize <- 150

#### Train and test

In [None]:
data_to_train <- get_training_datas(datas_train)

In [None]:
random_forest_all <- randomForest(Label ~ .,
                                  data = data_to_train,
                                  mtry = best_mtry,
                                  ntree = best_ntree,
                                  maxnodes = best_maxnodes,
                                  importance = best_importance,
                                  best_nodesize = best_nodesize
                                 )

In [None]:
show_result(model = random_forest_all,
            x_test = x_test,
            y_test = y_test
           ) 

In [None]:
predicted_proba_all <- predict(random_forest_all, x_test, type='prob')

## Random Forest - Useful Features

#### Preparing Datas

In [None]:
datas_useful <- list()
for (i in 1:length(datas_train)){
    datas_useful[[names(datas_train[i])]] <- clean_datas(datas_train[[i]], only_useful=TRUE)
}

#### Fraction of the total sample to be used

In [None]:
samples <- c(seq(950,1050,50), seq(1070,1130,20), seq(1150,1300,50))
results <- get_plots_fraction_tot_samples(datas_useful, samples=samples, times=1, method=customRF)

#### Best parameters to be used

In [None]:
tuneGrid <- expand.grid(.mtry = 3,
                        .ntree = 135,#c(seq(10,150,10),seq(200,2000,500)),
                        .maxnodes = 30,#c(seq(10,60,4)),
                        .importance = TRUE,
                        .nodesize = seq(1,50,3))
try_parameters(method=customRF, parameters=tuneGrid, datas_train=datas_useful)

In [None]:
best_mtry <- 3
best_ntree <- 135
best_maxnodes <- 30
best_importance <- TRUE
best_nodesize <- 27

#### Train and test

In [None]:
data_to_train <- get_training_datas(datas_useful)

In [None]:
random_forest_usf <- randomForest(Label ~ .,
                                  data = data_to_train,
                                  mtry = best_mtry,
                                  ntree = best_ntree,
                                  maxnodes = best_maxnodes,
                                  importance = best_importance,
                                  best_nodesize = best_nodesize
                                 )

In [None]:
x_test_useful <- clean_datas(x_test, only_useful=TRUE)
show_result(model = random_forest_usf,
            x_test = x_test_useful,
            y_test = y_test
           ) 

In [None]:
predicted_proba_usf <- predict(random_forest_usf, x_test_useful, type='prob')

## Random Forest - Optimized Features

#### Preparing Datas

In [None]:
drop_columns <- correlations(datas[['data_lhcb']], show=TRUE, columns_to_drop=TRUE, only_useful=TRUE,
                            threshold=0.8)
datas_optimized <- list()
for (i in 1:length(datas_useful)){
    datas_optimized[[names(datas_useful[i])]] <- clean_datas(datas_useful[[i]], drop_columns=drop_columns)
}

#### Fraction of the total sample to be used

In [None]:
samples <- c(seq(900,1050,50), seq(1070,1130,20), seq(1150,1300,50))
results <- get_plots_fraction_tot_samples(datas_optimized, samples=samples, times=1, method=customRF)

#### Best parameters to be used

In [None]:
tuneGrid <- expand.grid(.mtry = 3,
                        .ntree = 135,#c(seq(10,200,10),seq(250,2000,500)),
                        .maxnodes = 40,#c(seq(10,100,6)),
                        .importance = FALSE,
                        .nodesize = seq(1,50,3))
try_parameters(method=customRF, parameters=tuneGrid, datas_train=datas_optimized)

In [None]:
best_mtry <- 3
best_ntree <- 135
best_maxnodes <- 40
best_importance <- FALSE
best_nodesize <- 30

#### Train and test

In [None]:
data_to_train <- get_training_datas(datas_optimized)

In [None]:
random_forest_opt <- randomForest(Label ~ .,
                                  data = data_to_train,
                                  mtry = best_mtry,
                                  ntree = best_ntree,
                                  maxnodes = best_maxnodes,
                                  importance = best_importance,
                                  best_nodesize = best_nodesize
                                 )

In [None]:
x_test_optimized <- clean_datas(x_test_useful, drop_columns=drop_columns) 
show_result(model = random_forest_opt,
            x_test = x_test,
            y_test = y_test
           ) 

In [None]:
predicted_proba_opt <- predict(random_forest_opt, x_test_useful, type='prob')

## Ensemble Random Forest

#### Train and test

In [None]:
params_list <- list(all_datas = list(mtry = 4:6,
                                     ntree = seq(300,500,50),
                                     maxnodes = seq(130,170,10),#c(seq(10,100,6)),
                                     importance = c(TRUE,FALSE,FALSE),
                                     nodesize = seq(130,170,10)
                                    ),
                    few_datas = list(mtry = 3:4,
                                     ntree = seq(110,150,10),
                                     maxnodes = seq(30,60,5),#c(seq(10,100,6)),
                                     importance = c(TRUE,FALSE),
                                     nodesize = seq(20,40,5)
                                    )
                   )

EnsembleForest_classifiers <- create_fit_EnsembleForest(params_list, datas_train, nforests=20, same_params=FALSE,
                                                        all_datas=TRUE, useful=TRUE, optimized=TRUE,
                                                        drop_columns=drop_columns)

In [None]:
predicted_ens <- predict_ensemble_forest(EnsembleForest_classifiers, x_test, drop_columns=drop_columns)

show_result_ensemble(predicted_prob = predicted_ens,
                     y_test = y_test
                    ) 

## AdaBoost - All Features

#### Best parameters to be used

In [None]:
params <- expand.grid(tree_depth = seq(1,7,2),
                      n_rounds = seq(200,500,75),#seq(25,300,25),
                      verbose = FALSE
                     )
try_parameters_AB(params=params, datas=datas_train)

In [None]:
best_tree_depth <- 4
best_n_rounds <- 350

#### Train and test

In [None]:
data_train <- get_training_datas(datas_train, AB=TRUE)
x_train <- as.matrix(data_train[,-length(data_train)])
y_train <- data_train$Label

In [None]:
AB <- adaboost(X = x_train,
               y = y_train,
               tree_depth = best_tree_depth,
               n_rounds = best_n_rounds,
               verbose = FALSE
              )

In [None]:
predicted_prob_AB <- predict(AB, x_test, type='prob')

In [None]:
show_result_AB(predicted_prob = predicted_prob_AB,
               y_test = y_test
              ) 

## GradBoost - All Features

#### Best parameters to be used

In [None]:
params <- expand.grid(max.depth = 1:2,
                      eta = seq(0.45, 0.7,0.05),
                      nrounds = seq(250,500,125),
                      gamma = 0,
                      early_stopping_rounds = 50
                     )
try_parameters_GB(params=params, datas=datas_train)

In [None]:
best_max.depth <- 2
best_eta <- 0.57
best_nrounds <- 300
best_gamma <- 0
best_early_stopping_rounds <- 50

#### Train and test

In [None]:
datas_train_test <- take_test_set(datas, write_on_file=TRUE)
datas_train <- datas_train_test[['data_train']]
x_test <- datas_train_test[['x_test']]
y_test <- datas_train_test[['y_test']]

In [None]:
data_train <- get_training_datas(datas_train)
x_train <- as.matrix(data_train[,-length(data_train)])
y_train <- as.numeric(data_train[,length(data_train)])-1

In [None]:
GB <- xgboost(data = x_train,
              label = y_train,
              max.depth = best_max.depth,
              eta = best_eta,
              nrounds = best_nrounds,
              gamma = best_gamma,
              early_stopping_rounds = best_early_stopping_rounds,
              objective = "binary:logistic",
              verbose = 0
              )

In [None]:
predicted_prob_GB <- predict(GB, as.matrix(x_test), type='prob')

In [None]:
show_result_AB(predicted_prob = predicted_prob_GB,
               y_test = y_test
              ) 

## Deep Neural Network - All Features

#### Best parameters to be used

In [None]:
#c(50,30,10), c(30,50,100), c(90,180), c(150,300)),#, c(100), c(50), c(200)),
#c(90,90,90,10), c(100,70,50,30), c(140,110,80,50)
params <- expand.grid(layers_size = list(c(700), c(90,180), c(250,130,80), c(100,70,50,30), c(100,70,50,30,20)),
                      regularizer = 'bn',#list(list('l1',0.01), list('l2', 0.1), list('l2', 0.01), 'none', 'bn'),
                      dropout = 0.5,
                      activation = 'relu',#c('relu', 'elu'),
                      optimizer = 'rmsprop',#c('nesterov', 'rmsprop', 'adam'),
                      momentum = 0.75,
                      epochs = c(400), 
                      batch_size = c(50)
                     )
try_parameters_NN(params=params, datas=datas_train)

In [None]:
best_layers_size <- list(c(700), c(90,180), c(250,130,80), c(100,70,50,30), c(100,70,50,30,20))
best_regularizer <- 'bn'
best_dropout <- 0.5
best_activation <- 'relu'
best_optimizer <- 'rmsprop'
best_momentum <- 0.75
best_epochs <- c(400) 
best_batch_size <- c(25)

In [None]:
best_parameters <- list(best_layers_size = best_layers_size,
                        best_regularizer = best_regularizer,
                        best_dropout = best_dropout,
                        best_activation = best_activation,
                        best_optimizer = best_optimizer,
                        best_momentum = best_momentum,
                        best_epochs = best_epochs,
                        best_batch_size = best_batch_size
                       )

#### Train and test

In [None]:
source('Requirements_and_Functions.R')

In [None]:
NN_models_and_scalers <- make_NN_models(best_parameters, datas_train)

In [None]:
predicted_prob_NN <- predict_NN_prob(NN_models_and_scalers, x_test)

In [None]:
show_result_AB(predicted_prob = predicted_prob_NN,
               y_test = y_test
              ) 

# All Together

##### Split train val

In [None]:
datas_train_test_AT <- take_test_set(datas_train, write_on_file=FALSE)
datas_train_AT <- datas_train_test_AT[['data_train']]
x_val_AT <- datas_train_test_AT[['x_test']]
y_val_AT <- datas_train_test_AT[['y_test']]

##### Random forest all features

In [None]:
best_mtry <- 5
best_ntree <- 400
best_maxnodes <- 150
best_importance <- FALSE
best_nodesize <- 150

In [None]:
data_to_train_AT <- get_training_datas(datas_train_AT)
random_forest_all_AT <- randomForest(Label ~ .,
                                     data = data_to_train_AT,
                                     mtry = best_mtry,
                                     ntree = best_ntree,
                                     maxnodes = best_maxnodes,
                                     importance = best_importance,
                                     best_nodesize = best_nodesize
                                    )

##### Random forest useful features

In [None]:
data_to_train_AT <- get_training_datas(datas_train_AT)
data_to_train_useful_AT <- clean_datas(data_to_train_AT, only_useful=TRUE)

In [None]:
best_mtry <- 3
best_ntree <- 135
best_maxnodes <- 30
best_importance <- TRUE
best_nodesize <- 27

In [None]:
random_forest_usf_AT <- randomForest(Label ~ .,
                                     data = data_to_train_useful_AT,
                                     mtry = best_mtry,
                                     ntree = best_ntree,
                                     maxnodes = best_maxnodes,
                                     importance = best_importance,
                                     best_nodesize = best_nodesize
                                    )

##### Random forest all features

In [None]:
data_to_train_AT <- get_training_datas(datas_train_AT)
data_to_train_useful_AT <- list()
data_to_train_useful_AT <- clean_datas(data_to_train_AT, only_useful=TRUE)
drop_columns <- correlations(datas[['data_lhcb']], show=FALSE, columns_to_drop=TRUE, only_useful=TRUE,
                            threshold=0.8)
data_to_train_optimized_AT <- clean_datas(data_to_train_useful_AT, drop_columns=drop_columns)

In [None]:
best_mtry <- 3
best_ntree <- 135
best_maxnodes <- 40
best_importance <- FALSE
best_nodesize <- 30

In [None]:
random_forest_opt_AT <- randomForest(Label ~ .,
                                     data = data_to_train_optimized_AT,
                                     mtry = best_mtry,
                                     ntree = best_ntree,
                                     maxnodes = best_maxnodes,
                                     importance = best_importance,
                                     best_nodesize = best_nodesize
                                    )

##### Ensemble random forest

In [None]:
params_list <- list(all_datas = list(mtry = 4:6,
                                     ntree = seq(300,500,50),
                                     maxnodes = seq(130,170,10),#c(seq(10,100,6)),
                                     importance = c(TRUE,FALSE,FALSE),
                                     nodesize = seq(130,170,10)
                                    ),
                    few_datas = list(mtry = 3:4,
                                     ntree = seq(110,150,10),
                                     maxnodes = seq(30,60,5),#c(seq(10,100,6)),
                                     importance = c(TRUE,FALSE),
                                     nodesize = seq(20,40,5)
                                    )
                   )

EnsembleForest_classifiers_AT <- create_fit_EnsembleForest(params_list, datas_train_AT, nforests=20,
                                                           same_params=FALSE, all_datas=TRUE, useful=TRUE,
                                                           optimized=TRUE, drop_columns=drop_columns)

##### Adaboost

In [None]:
best_tree_depth <- 4
best_n_rounds <- 350

In [None]:
data_train_AT <- get_training_datas(datas_train_AT, AB=TRUE)
x_train_AT <- as.matrix(data_train_AT[,-length(data_train_AT)])
y_train_AT <- data_train_AT$Label

In [None]:
AB_AT <- adaboost(X = x_train_AT,
                  y = y_train_AT,
                  tree_depth = best_tree_depth,
                  n_rounds = best_n_rounds,
                  verbose = FALSE
                 )

##### Gradboost

In [None]:
best_max.depth <- 2
best_eta <- 0.57
best_nrounds <- 300
best_gamma <- 0
best_early_stopping_rounds <- 50

In [None]:
data_to_train_AT <- get_training_datas(datas_train_AT)
x_train_AT <- as.matrix(data_to_train_AT[,-length(data_to_train_AT)])
y_train_AT <- as.numeric(data_to_train_AT[,length(data_to_train_AT)])-1

In [None]:
GB_AT <- xgboost(data = x_train_AT,
                 label = y_train_AT,
                 max.depth = best_max.depth,
                 eta = best_eta,
                 nrounds = best_nrounds,
                 gamma = best_gamma,
                 early_stopping_rounds = best_early_stopping_rounds,
                 objective = "binary:logistic",
                 verbose = 0
                )

##### Neural networks

In [None]:
best_layers_size <- list(c(700), c(90,180), c(250,130,80), c(100,70,50,30), c(100,70,50,30,20))
best_regularizer <- 'bn'
best_dropout <- 0.5
best_activation <- 'relu'
best_optimizer <- 'rmsprop'
best_momentum <- 0.75
best_epochs <- c(400) 
best_batch_size <- c(25)

In [None]:
best_parameters <- list(best_layers_size = best_layers_size,
                        best_regularizer = best_regularizer,
                        best_dropout = best_dropout,
                        best_activation = best_activation,
                        best_optimizer = best_optimizer,
                        best_momentum = best_momentum,
                        best_epochs = best_epochs,
                        best_batch_size = best_batch_size
                       )

In [None]:
NN_models_and_scalers_AT <- make_NN_models(best_parameters, datas_train_AT, final=TRUE)

### Final Model

In [None]:
source('Requirements_and_Functions.R')

In [None]:
Final_Model <- make_final_model(nn_models = NN_models_and_scalers_AT,
                                adaboost = AB_AT,
                                gradboost = GB_AT,
                                rf_all = random_forest_all_AT,
                                rf_usf = random_forest_usf_AT,
                                rf_opt = random_forest_opt_AT,
                                rf_ens = EnsembleForest_classifiers_AT,
                                x_val = x_val_AT,
                                y_val = y_val_AT,
                                drop_columns = drop_columns
                               )

In [None]:
sum(unlist(get_weights(Final_Model)[1]))

In [None]:
get_weights(Final_Model)

In [None]:
labels <- c('RF_all', 'RF_usf', 'RF_opt',
           'Ensemble_RF', 'AdaBoost', 'GradBoost',
           'NN_1', 'NN_2', 'NN_3', 'NN_4', 'NN_5'
          )
pie(get_weights(Final_Model)[[1]], labels=labels, main="Weights Classifiers")

In [None]:
labels <- c('Random Forests', 'AdaBoost', 'GradBoost', 'Neural Networks')
weights_rf <- sum(get_weights(Final_Model)[[1]][1:4])
weight_ab <- get_weights(Final_Model)[[1]][5]
weight_gb <- get_weights(Final_Model)[[1]][6]
weight_nn <- sum(get_weights(Final_Model)[[1]][7:11])
weights <- c(weights_rf, weight_ab, weight_gb, weight_nn)
pie(weights, labels=labels, main="Weights Classifiers Grouped")

In [None]:
alpha <- sum(unlist(get_weights(Final_Model)[1]))
predicted_prob_final <- predict_final_model(nn_models = NN_models_and_scalers_AT,
                                            adaboost = AB_AT,
                                            gradboost = GB_AT,
                                            rf_all = random_forest_all_AT,
                                            rf_usf = random_forest_usf_AT,
                                            rf_opt = random_forest_opt_AT,
                                            rf_ens = EnsembleForest_classifiers_AT,
                                            x_test = x_test,
                                            final_model = Final_Model,
                                            drop_columns = drop_columns,
                                            alpha = alpha
                                           )

In [None]:
show_result_AB(predicted_prob = predicted_prob_final,
               y_test = y_test
              ) 

# ROC Curve

In [None]:
smooth = FALSE

auc_rf_all <- roc(y_test, predicted_proba_all[,1], plot=TRUE, col=2, quiet=TRUE, smooth=smooth)$auc
auc_rf_usf <- roc(y_test, predicted_proba_usf[,1], plot=TRUE, col=3, add=TRUE, quiet=TRUE, smooth=smooth)$auc
auc_rf_opt <- roc(y_test, predicted_proba_opt[,1], plot=TRUE, col=4, add=TRUE, quiet=TRUE, smooth=smooth)$auc
auc_rf_ens <- roc(y_test, predicted_ens[,1], plot=TRUE, col=5, add=TRUE, quiet=TRUE, smooth=smooth)$auc
auc_AB <- roc(y_test, predicted_prob_AB, plot=TRUE, col=6, add=TRUE, quiet=TRUE, smooth=smooth)$auc
auc_GB <- roc(y_test, predicted_prob_GB, plot=TRUE, col=8, add=TRUE, quiet=TRUE, smooth=smooth)$auc
auc_NN <- roc(y_test, as.numeric(predicted_prob_NN), plot=TRUE, col=9, add=TRUE, quiet=TRUE, smooth=smooth)$auc
auc_Final <- roc(y_test, as.numeric(predicted_prob_final), plot=TRUE, col='orange', add=TRUE, quiet=TRUE,
                 smooth=smooth)$auc



legend("bottomright",
       c('forest_all', 'forest_usf', 'forest_opt', 'forest_ens', 'AB', 'GB', 'NN', 'Final'),
       fill=c(2:6,c(8,9), 'orange')
      )

aucs <- list(rf_all = auc_rf_all,
             rf_usf = auc_rf_usf,
             rf_opt = auc_rf_opt,
             rf_ens = auc_rf_ens,
             AB = auc_AB,
             GB = auc_GB,
             NN = auc_NN,
             Final = auc_Final
            )

In [None]:
ordered_aucs <- aucs[order(sapply(aucs, FUN=max), decreasing=TRUE)]

In [None]:
print(ordered_aucs)

In [None]:
barplot(unlist(ordered_aucs), 
        ylim = c(min(unlist(ordered_aucs)), max(unlist(ordered_aucs))),
        xpd = FALSE
       )

## Final prediction

Show all datas

In [None]:
counts <- hist(datas_or$data_lhcb$Lambda_b0_MM_F, freq = FALSE, breaks=200)

Filtering in order to fit the curve

In [None]:
filter <- counts[[1]]<5300 | counts[[1]]>5900
x_ <- counts[[1]][filter]
y_ <- counts[[3]][filter]

Normalizing because Gamma distribution starts from zero

In [None]:
x <- x_[-length(x_)]-min(x_)
y <- y_[-length(y_)]

Fitting the gamma distribution

In [None]:
model_gamma <- nls(y ~ dgamma(x, shape=shape, rate=rate),
                   start = list(shape=3.685, rate=0.0031),
                   control = list(maxiter=1000)
                  )

Printing results of the fit

In [None]:
model_gamma

Showing the fit result

In [None]:
x_pred <- seq(0,8000, length.out = 1000)
y_pred <- predict(model_gamma, data.frame(x=x_pred))

In [None]:
hist(datas_or$data_lhcb$Lambda_b0_MM_F, freq = FALSE, breaks=200)
polygon(x_pred+min(x_), y_pred, col=rgb(1, 0, 0,0.3), border=NA, xlim=c(3000,8000), ylim=c(0,0.001))

In [None]:
hist(datas_or$data_lhcb$Lambda_b0_MM_F, freq = TRUE, breaks=200, xlim=c(5500,5800))
polygon(x_pred+3100, y_pred*nrow(datas_or$data_lhcb)*(counts$breaks[2]-counts$breaks[1]),
        col=rgb(1, 0, 0,0.3), border=NA, xlim=c(3000,8000), ylim=c(0,0.001))

Subtracting the fitted model to the data

In [None]:
breaks <- counts$breaks[-length(counts$breaks)]+(counts$breaks[2]-counts$breaks[1])/2
x_residual <- data.frame(x=breaks)-min(breaks)
counts_residual <- counts$counts - predict(model_gamma, x_residual)*nrow(datas_or$data_lhcb)*(counts$breaks[2]-counts$breaks[1])

In [None]:
counts_residual[counts_residual<0] = 0

In [None]:
plot(breaks, counts_residual, type='l')

In [None]:
plot(breaks, counts_residual,
     type = 'l',
     xlim = c(5500,5750)
    )

Calculating theorical number of interesting events

In [None]:
breaks_fit <-breaks[breaks>5500 & breaks<5750]
counts_residual_fit <- counts_residual[breaks>5500 & breaks<5750]

In [None]:
plot(breaks_fit, counts_residual_fit,
     type = 'l'
    )

In [None]:
expected_number_interesting_events <- as.integer(sum(counts_residual_fit))
print(expected_number_interesting_events)

Making the final prediction on the complementary data

In [None]:
predicted_prob_final <- predict_final_model(nn_models = NN_models_and_scalers_AT,
                                            adaboost = AB_AT,
                                            gradboost = GB_AT,
                                            rf_all = random_forest_all_AT,
                                            rf_usf = random_forest_usf_AT,
                                            rf_opt = random_forest_opt_AT,
                                            rf_ens = EnsembleForest_classifiers_AT,
                                            x_test = datas$data_compl,
                                            final_model = Final_Model,
                                            drop_columns = drop_columns,
                                            alpha = alpha
                                           )

In [None]:
pred_fin_classes <- round(predicted_prob_final)

In [None]:
predicted_true <- datas_or$data_compl[pred_fin_classes==1,]

In [None]:
fin_pred <- predicted_true$Lambda_b0_MM_F
filt_fin_pred <- fin_pred[fin_pred>5550 & fin_pred<5680]

In [None]:
str(filt_fin_pred)

In [None]:
tot_ <- hist(datas_or$data_compl$Lambda_b0_MM_F, freq = TRUE, breaks=200, col='red', main="")
delta_m <- tot_$breaks[2]-tot_$breaks[1]
final_pred <- hist(fin_pred, freq = TRUE, breaks=tot_$breaks, col='green', add=TRUE)
legend("topright",
       c('data_complementary', 'prediction'),
       fill=c('red', 'green')
      )

Other useful quantities

In [None]:
str(datas_or$data_compl$Lambda_b0_MM_F[datas_or$data_compl$Lambda_b0_MM_F<5550])

In [None]:
sum(tot_$counts[tot_$breaks<5680 & tot_$breaks>5500])

In [None]:
sum(tot_$counts[tot_$breaks<5500])

In [None]:
sum(final_pred$counts[final_pred$breaks<5500])