In [None]:
# %pip install graphviz

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import xgboost as xgb
import os
from xgboost import XGBClassifier

from sklearn.model_selection import RandomizedSearchCV, train_test_split

%matplotlib inline

In [None]:
data = pd.read_csv('full_data.csv')

## Looking at the raw data:

In [None]:
print('Size of data: {}'.format(data.shape))
print('Number of events: {}'.format(data.shape[0]))
print('Number of columns: {}'.format(data.shape[1]))

print ('\nList of features in dataset:')
for col in data.columns:
    print(col)

In [None]:
headings = data.columns.to_list()
print(len(headings))
headings_without_label = headings.pop(19)
print(len(headings))
headings.append('label')
print(headings)

In [None]:
data = data[headings]

In [None]:
data

In [None]:
# data.drop(columns=data.columns[0], axis=1,  inplace=True)

In [None]:
# data.head()

In [None]:
# data.to_csv('full_data.csv')

In [None]:
print('Number of signal events: {}'.format(len(data[data.label == 1])))
print('Number of background events: {}'.format(len(data[data.label == 0])))
sig_frac = len(data[data.label == 1])/(float)(len(data[data.label == 1]) + len(data[data.label == 0]))
print('Fraction signal: {}'.format(sig_frac))

## Data Observation

In [None]:
plt.figure()
plt.hist(data.scoresum_g[data.label == 1],bins=np.linspace(0, 2, 100),
         histtype='step',color='midnightblue',label='signal')
plt.hist(data.scoresum_g[data.label == 0],bins=np.linspace(0, 2, 100),
         histtype='step',color='firebrick',label='background')

plt.xlabel('feature',fontsize=12)
plt.ylabel('Events',fontsize=12)
plt.legend(frameon=False)

In [None]:
signal, sig_bins = np.histogram(data.scoresum_g[data.label == 1],bins=np.linspace(0, 2, 100))
bkgrnd, back_bins = np.histogram(data.scoresum_g[data.label == 0],bins=np.linspace(0, 2, 100))

sig_sf = 10 * 7.38400e-05
back_sf = (10 * 363) / 1.333318333

plt.hist(sig_bins[:-1], sig_bins, weights=sig_sf*signal, histtype='step',color='midnightblue',label='signal')
plt.hist(back_bins[:-1], back_bins, weights=back_sf*bkgrnd, histtype='step',color='firebrick',label='background')

plt.yscale('log')
plt.xlabel('Feature',fontsize=12)
plt.ylabel('Events',fontsize=12)
plt.legend(frameon=False)
plt.show()

## Formatting the data for use with XGBoost

In [None]:
shuf_data = data.sample(frac=1)

In [None]:
shuf_data['label'] = data.label.astype('category')

Taking 70% data for training, 15% for validation, 15% for test 

In [None]:
no_events = shuf_data.shape[0]
no_training = int(0.7 * no_events)
no_val = int(no_training + (0.15 * no_events))

training_set = shuf_data[:no_training]
validation_set = shuf_data[no_training:no_val]
test_set = shuf_data[no_val:]

In [None]:
print('Number of training samples: {}'.format(len(training_set)))
print('Number of validation samples: {}'.format(len(validation_set)))
print('Number of testing samples: {}'.format(len(test_set)))

print('\nNumber of signal events in training set: {}'.format(len(training_set[training_set.label == 1])))
print('Number of background events in training set: {}'.format(len(training_set[training_set.label == 0])))
print('Fraction signal: {}'.format(len(training_set[training_set.label == 1])/(float)(len(training_set[training_set.label == 1]) + len(training_set[training_set.label == 0]))))

In [None]:
# training_set.label.cat.codes
# print(training_set.label.cat.codes)

## Creating DMatrix

In [None]:
feature_names = data.columns[0:-1]  # skip the final collumn as it is the label
feature_names = feature_names.to_numpy()
feature_names = feature_names.astype(str)

In [None]:
train = xgb.DMatrix(data=training_set[feature_names],label=training_set.label.cat.codes,
                    missing=-999.0,feature_names=feature_names)
validation = xgb.DMatrix(data=validation_set[feature_names], label=validation_set.label.cat.codes,
                         missing=-999.0, feature_names=feature_names)
test = xgb.DMatrix(data=test_set[feature_names],label=test_set.label.cat.codes,
                   missing=-999.0,feature_names=feature_names)

In [None]:
print('Number of training samples: {}'.format(train.num_row()))
print('Number of validation samples: {}'.format(validation.num_row()))
print('Number of testing samples: {}'.format(test.num_row()))

print('\nNumber of signal events in training set: {}'.format(len(np.where(train.get_label())[0])))

## Hyperparameters

## Evaluating Hyperparmameters

In [None]:
eta_range = np.arange(0.1, 1.0, 0.1)
max_depth_range = np.arange(2, 11, 1)
min_child_range = np.arange(0, 5.1, 0.1)
subsample_range = np.arange(0.1, 1.1, 0.1)
colsample_range = np.arange(0.1, 1.1, 0.1)
lambda_range = np.arange(0, 10.25, 0.25)
gamma_range = np.arange(0, 5.1, 0.1)
delta_range = np.arange(0, 11, 1)

rmse_outputs = np.zeros(len(gamma_range))

no_params = len(eta_range) * len(max_depth_range) * len(min_child_range) * len(subsample_range) * len(colsample_range) * len(lambda_range) * len(gamma_range)

print(no_params)

## Random Hyperparameter Search

In [None]:
params = {
    'eta':eta_range,
    'max_depth':max_depth_range, 
    'gamma':gamma_range,  
    'subsample':subsample_range,
    'min_child_weight':min_child_range,
    'colsample_bytree':colsample_range, 
    'colsample_bylevel':colsample_range, 
    'colsample_bynode':colsample_range, 
    'max_delta_step':delta_range,
    'objective': ['binary:logistic'],
    'eval_metric': ['rmse'],
    'lambda':lambda_range,
}

In [None]:
cla = XGBClassifier()

In [None]:
# train test split with randomization performed (although randomization is not necessary)
hyperparam_X, hyperparam_y = shuf_data[:no_val].iloc[:,:-1], shuf_data[:no_val].label

In [None]:
random_search = RandomizedSearchCV(cla, param_distributions=params, n_iter=500, cv=3, verbose=3)

In [None]:
random_search.fit(hyperparam_X, hyperparam_y)
os.system("say bing bong")

In [None]:
hyper_res = random_search.cv_results_
mean_score = hyper_res['mean_test_score']
err = hyper_res['std_test_score']
x_arr = np.arange(1, 101, 1)
# hyper_res

In [None]:
plt.plot(x_arr, mean_score)
plt.xlabel('iteration')
plt.ylabel('score')

# Training

## Default Parameters

In [None]:
param = {}

# Booster parameters
param['eta']              = 0.3 # learning rate
param['max_depth']        = 6  # maximum depth of a tree
param['subsample']        = 1 # fraction of events to train tree on
param['min_child_weight'] = 1
param['colsample_bytree'] = 1 # fraction of features to train tree on

# Learning task parameters
param['objective']   = 'binary:logistic' # objective function
param['eval_metric'] = 'error'           # evaluation metric for cross validation
param = list(param.items()) + [('eval_metric', 'logloss')] + [('eval_metric', 'rmse')]

num_trees = 600  # number of trees to make

## Parameters from random search

In [None]:
hyper_params = random_search.best_params_
# hyper_params = list(hyper_params.items()) + [('eval_metric', 'logloss')] + [('eval_metric', 'error')] + [('eval_metric', 'auc')]
hyper_params

In [None]:
param = {}

# Booster parameters
param['eta']              = 0.2   # learning rate
param['min_child_weight'] = 4.4
param['max_depth']        = 10     # maximum depth of a tree
param['max_delta_step']   = 8
param['subsample']        = 0.8   # fraction of events to train tree on
param['colsample_bytree'] = 0.9   # fraction of features to train tree on
param['colsample_bynode'] = 0.8   # fraction of features to train tree on
param['colsample_bylevel'] = 0.7  # fraction of features to train tree on
param['gamma'] = 1.4

# L2 regularization
param['lambda'] = 9.0

# Learning task parameters
param['objective']   = 'binary:logistic' # objective function
param['eval_metric'] = 'error'           # evaluation metric for cross validation
param = list(param.items()) + [('eval_metric', 'logloss')] + [('eval_metric', 'rmse')] + [('eval_metric', 'auc')]

num_trees = 600  # number of trees to make

## Training Model

In [None]:
booster = xgb.train(param, train,  num_boost_round=num_trees, evals=[(validation, 'val')], early_stopping_rounds=10)
os.system("say beep")

In [None]:
print(booster.eval(test))

In [None]:
predictions = booster.predict(test)

In [None]:
att = booster.attributes()
att

In [None]:
# booster_df = booster.trees_to_dataframe()

In [None]:
best_tree = booster.best_iteration

In [None]:
print(best_tree)

In [None]:
# selected_feature_df = booster_df[booster_df['Feature'] == 'jj_m']

In [None]:
# selected_feature_df[selected_feature_df['Tree'] == 999]

In [None]:
# plot all predictions (both signal and background)
plt.figure()
plt.hist(predictions,bins=np.linspace(0,1,50),histtype='step',color='darkgreen',label='All events')
# make the plot readable
plt.xlabel('Prediction from BDT',fontsize=12)
plt.ylabel('Events',fontsize=12)
plt.legend(frameon=False)



In [None]:
# plot signal and background separately
plt.figure()
plt.hist(predictions[test.get_label().astype(bool)],bins=np.linspace(0,1,50),
         histtype='step',color='midnightblue',label='signal')
plt.hist(predictions[~(test.get_label().astype(bool))],bins=np.linspace(0,1,50),
         histtype='step',color='firebrick',label='background')
# make the plot readable
plt.xlabel('Prediction from BDT',fontsize=12)
plt.ylabel('Events',fontsize=12)
plt.legend(frameon=False)

In [None]:
frac = 799991 / 600000
799991 / frac

In [None]:
signal, sig_bins = np.histogram(predictions[test.get_label().astype(bool)],bins=np.linspace(0,1,50))
bkgrnd, back_bins = np.histogram(predictions[~(test.get_label().astype(bool))],bins=np.linspace(0,1,50))

sig_sf =  10 * 0.00123066835
back_sf = (10 * 4538.75) / frac

plt.hist(sig_bins[:-1], sig_bins, weights=sig_sf*signal, histtype='step',color='midnightblue',label='signal')
plt.hist(back_bins[:-1], back_bins, weights=back_sf*bkgrnd, histtype='step',color='firebrick',label='background')

plt.yscale('log')
plt.xlabel('Prediction from BDT',fontsize=12)
plt.ylabel('Events',fontsize=12)
plt.legend(frameon=False)
plt.show()

In [None]:
corr_signal = np.sum(signal[int(len(signal))-1:])
wrong_background = np.sum(bkgrnd[int(len(signal))-1:])

print(f'unscaled signal: {corr_signal}')
print(f'unscaled background {wrong_background}')
print()

lumi = 10

no_s = corr_signal * 0.00123066835 * lumi
no_b = (wrong_background * 4538.75 * lumi) / frac
significance = no_s / np.sqrt(no_s + no_b)

print(f'S: {no_s}')
print(f'B: {no_b}')
print(f'Significance={round(significance, 4)}')
# print(no_s / np.sqrt(no_s + no_b))

In [None]:
full_class_pred = (booster.predict(test) > 0.98).astype(int)

In [None]:
plt.figure()
plt.hist(full_class_pred[test.get_label().astype(bool)],bins=np.linspace(0,1,50),
         histtype='step',color='midnightblue',label='signal')
plt.hist(full_class_pred[~(test.get_label().astype(bool))],bins=np.linspace(0,1,50),
         histtype='step',color='firebrick',label='background')
# make the plot readable
plt.xlabel('Prediction from BDT',fontsize=12)
plt.ylabel('Events',fontsize=12)
plt.legend(frameon=False)

In [None]:
signal, sig_bins = np.histogram(full_class_pred[test.get_label().astype(bool)],bins=np.linspace(0,1,50))
bkgrnd, back_bins = np.histogram(full_class_pred[~(test.get_label().astype(bool))],bins=np.linspace(0,1,50))

sig_sf =  10 * 0.00123066835
back_sf = (10 * 4538.75) / frac

plt.hist(sig_bins[:-1], sig_bins, weights=sig_sf*signal, histtype='step',color='midnightblue',label='signal')
plt.hist(back_bins[:-1], back_bins, weights=back_sf*bkgrnd, histtype='step',color='firebrick',label='background')

plt.yscale('log')
plt.xlabel('Prediction from BDT',fontsize=12)
plt.ylabel('Events',fontsize=12)
plt.legend(frameon=False)
plt.show()

In [None]:
# corr_signal = np.sum(signal[int(len(signal))-1:])
# wrong_background = np.sum(bkgrnd[int(len(signal))-1:])

corr_signal = 79374
wrong_background = 200

print(f'unscaled signal: {corr_signal}')
print(f'unscaled background {wrong_background}')
print()

lumi = 10

no_s = corr_signal * 0.00123066835 * lumi
no_b = (wrong_background * 4538.75 * lumi) / frac
significance = no_s / np.sqrt(no_s + no_b)

print(f'S: {no_s}')
print(f'B: {no_b}')
print(f'Significance={round(significance, 4)}')

In [None]:
# choose score cuts:
cuts = np.linspace(0,1,500)
nsignal = np.zeros(len(cuts))
nbackground = np.zeros(len(cuts))
for i,cut in enumerate(cuts):
    nsignal[i] = len(np.where(predictions[test.get_label().astype(bool)] > cut)[0])
    nbackground[i] = len(np.where(predictions[~(test.get_label().astype(bool))] > cut)[0])

# plot efficiency vs. purity (ROC curve)
plt.figure()
plt.plot(nsignal/len(test_set[test_set.label == 1]),nsignal/(nsignal + nbackground),'o-',color='blueviolet')
# make the plot readable
plt.xlabel('Efficiency',fontsize=12)
plt.ylabel('Purity',fontsize=12)

In [None]:
# Zoom in view of the upper left corner.
plt.figure()
plt.xlim(0.85, 1.0)
plt.ylim(0.85, 1.0)
# plt.plot([0, 1], [0, 1], 'k--')
plt.plot(nsignal/len(test_set[test_set.label == 1]),nsignal/(nsignal + nbackground),'o-',color='blueviolet')
plt.xlabel('Efficiency',fontsize=12)
plt.ylabel('Purity',fontsize=12)
# plt.title('ROC curve (zoomed in at top right)')
plt.show()

In [None]:
xgb.plot_importance(booster,grid=False)

In [None]:
# dump model
# booster.dump_model('dump.raw.txt')

## Apply to filtered test set

In [None]:
test_glu = test_set[test_set.scoresum_g > 1.7]

In [None]:
print(len(test_glu))

In [None]:
glu_matrix = xgb.DMatrix(data=test_glu[feature_names],label=test_glu.label.cat.codes,
                    missing=-999.0,feature_names=feature_names)

In [None]:
print('\nNumber of signal events in gluon set: {}'.format(len(np.where(glu_matrix.get_label())[0])))

In [None]:
glu_predictions = booster.predict(glu_matrix)

In [None]:
plt.figure()
plt.hist(glu_predictions,bins=np.linspace(0,1,50),histtype='step',color='darkgreen',label='All events')
# make the plot readable
plt.xlabel('Prediction from BDT',fontsize=12)
plt.ylabel('Events',fontsize=12)
plt.legend(frameon=False)

In [None]:
plt.figure()
plt.hist(glu_predictions[glu_matrix.get_label().astype(bool)],bins=np.linspace(0,1,50),
         histtype='step',color='midnightblue',label='signal')
plt.hist(glu_predictions[~(glu_matrix.get_label().astype(bool))],bins=np.linspace(0,1,50),
         histtype='step',color='firebrick',label='background')
# make the plot readable
plt.xlabel('Prediction from BDT',fontsize=12)
plt.ylabel('Events',fontsize=12)
plt.legend(frameon=False)

In [None]:
glu_signal, glu_sig_bins = np.histogram(glu_predictions[glu_matrix.get_label().astype(bool)],bins=np.linspace(0,1,50))
glu_bkgrnd, glu_back_bins = np.histogram(glu_predictions[~(glu_matrix.get_label().astype(bool))],bins=np.linspace(0,1,50))

sig_sf =  10 * 0.00123066835
back_sf = (10 * 4538.75) / frac

plt.hist(glu_sig_bins[:-1], glu_sig_bins, weights=sig_sf*glu_signal, histtype='step',
         color='midnightblue',label='signal')

plt.hist(glu_back_bins[:-1], glu_back_bins, weights=back_sf*glu_bkgrnd, histtype='step',
         color='firebrick',label='background')

plt.yscale('log')
plt.xlabel('Prediction from BDT',fontsize=12)
plt.ylabel('Events',fontsize=12)
plt.legend(frameon=False)
plt.show()

In [None]:
# corr_signal_glu = np.sum(glu_signal[int(len(signal))-1:])
# wrong_background_glu = np.sum(glu_bkgrnd[int(len(signal))-1:])

corr_signal_glu = 7520
wrong_background_glu = 1

print(f'unscaled signal: {corr_signal_glu}')
print(f'unscaled background {wrong_background_glu}')
print()

lumi = 100

no_s_glu = corr_signal_glu * 0.00123066835 * lumi
no_b_glu = (wrong_background_glu * 4538.75 * lumi) / frac
significance = no_s_glu / np.sqrt(no_s_glu + no_b_glu)

print(f'S: {no_s_glu}')
print(f'B: {no_b_glu}')
print(f'Significance={round(significance, 4)}')

In [None]:
class_predictions = (booster.predict(glu_matrix) > 0.98).astype(int)

In [None]:
plt.figure()
plt.hist(class_predictions[glu_matrix.get_label().astype(bool)],bins=np.linspace(0,1,50),
         histtype='step',color='midnightblue',label='signal')
plt.hist(class_predictions[~(glu_matrix.get_label().astype(bool))],bins=np.linspace(0,1,50),
         histtype='step',color='firebrick',label='background')
# make the plot readable
plt.xlabel('Prediction from BDT',fontsize=12)
plt.ylabel('Events',fontsize=12)
plt.legend(frameon=False)

In [None]:
glu_signal, glu_sig_bins = np.histogram(class_predictions[glu_matrix.get_label().astype(bool)],bins=np.linspace(0,1,50))
glu_bkgrnd, glu_back_bins = np.histogram(class_predictions[~(glu_matrix.get_label().astype(bool))],bins=np.linspace(0,1,50))

sig_sf =  10 * 0.00123066835
back_sf = (10 * 4538.75) / frac

plt.hist(glu_sig_bins[:-1], glu_sig_bins, weights=sig_sf*glu_signal, histtype='step',
         color='midnightblue',label='signal')

plt.hist(glu_back_bins[:-1], glu_back_bins, weights=back_sf*glu_bkgrnd, histtype='step',
         color='firebrick',label='background')

plt.yscale('log')
plt.xlabel('Prediction from BDT',fontsize=12)
plt.ylabel('Events',fontsize=12)
plt.legend(frameon=False)
plt.show()

In [None]:
corr_signal_glu = np.sum(glu_signal[int(len(signal))-1:])
wrong_background_glu = np.sum(glu_bkgrnd[int(len(signal))-1:])

print(f'unscaled signal: {corr_signal_glu}')
print(f'unscaled background {wrong_background_glu}')
print()

lumi = 350

no_s_glu = corr_signal_glu * 0.00123066835 * lumi
no_b_glu = (wrong_background_glu * 4538.75 * lumi) / frac
significance = no_s_glu / np.sqrt(no_s_glu + no_b_glu)

print(f'S: {no_s_glu}')
print(f'B: {no_b_glu}')
print(f'Significance={round(significance, 4)}')

## Analyse features

In [None]:
plt.figure()
plt.hist(training_set.jj_eta_1[training_set.label == 1],bins=np.linspace(-4,4,50),
         histtype='step',color='midnightblue',label='signal')
plt.hist(training_set.jj_eta_1[training_set.label == 0],bins=np.linspace(-4,4,50),
         histtype='step',color='firebrick',label='background')

plt.xlabel('jj_p_1',fontsize=12)
plt.ylabel('Events',fontsize=12)
plt.legend(frameon=False)

In [None]:
plt.figure()
plt.plot(training_set.scoresum_g[training_set.label == 1], training_set.scoresum_b[training_set.label == 1],
         'o',markersize=1.5,color='mediumblue',markeredgewidth=0,alpha=0.7,label='signal')
plt.plot(training_set.scoresum_g[training_set.label == 0],training_set.scoresum_b[training_set.label == 0],
         'o',markersize=1.5,color='firebrick',markeredgewidth=0,alpha=0.8,label='background')

plt.xlim(0, 2)
plt.ylim(0, 2)
plt.xlabel('scoresum_g',fontsize=12)
plt.ylabel('scoresum_b',fontsize=12)
legend = plt.legend(frameon=1,numpoints=1,markerscale=2, facecolor='white', framealpha=1)
frame = legend.get_frame()
frame.set_facecolor('white')
frame.set_edgecolor('black')

In [None]:
plt.figure()
plt.plot(training_set.reco_g_1[training_set.label == 1],training_set.reco_g_2[training_set.label == 1],
         'o',markersize=2,color='mediumblue',markeredgewidth=0,alpha=0.8,label='signal')
plt.plot(training_set.reco_g_1[training_set.label == 0],training_set.reco_g_2[training_set.label == 0],
         'o',markersize=2,color='firebrick',markeredgewidth=0,alpha=1,label='background')

plt.xlim(0,1)
plt.ylim(0,1)
plt.xlabel('reco_g_1',fontsize=12)
plt.ylabel('reco_g_2',fontsize=12)
legend = plt.legend(frameon=1,numpoints=1,markerscale=2, facecolor='white', framealpha=1)
frame = legend.get_frame()
frame.set_facecolor('white')
frame.set_edgecolor('black')

In [None]:
plt.figure()
plt.plot(training_set.jj_m[training_set.label == 0],training_set.jj_p_2[training_set.label == 0],
         'o',markersize=2,color='firebrick',markeredgewidth=0,alpha=0.8,label='background')
plt.plot(training_set.jj_m[training_set.label == 1],training_set.jj_p_2[training_set.label == 1],
         'o',markersize=2,color='mediumblue',markeredgewidth=0,alpha=0.8,label='signal')

plt.xlim(0, 140)
plt.ylim(0, 75)
plt.xlabel('jj_m',fontsize=12)
plt.ylabel('jj_p_2',fontsize=12)
legend = plt.legend(frameon=1,numpoints=1,markerscale=2, facecolor='white', framealpha=1)
frame = legend.get_frame()
frame.set_facecolor('white')
frame.set_edgecolor('black')

In [None]:
plt.figure()
plt.plot(training_set.scoresum_g[training_set.label == 1],training_set.jet_m_1[training_set.label == 1],
         'o',markersize=2,color='mediumblue',markeredgewidth=0,alpha=0.8,label='signal')

plt.plot(training_set.scoresum_g[training_set.label == 0],training_set.jet_m_1[training_set.label == 0],
         'o',markersize=2,color='firebrick',markeredgewidth=0,alpha=0.8,label='background')

plt.xlim(0, 2)
plt.ylim(0, 90)
plt.xlabel('scoresum_g',fontsize=12)
plt.ylabel('jet_m_1',fontsize=12)
plt.legend(frameon=False,numpoints=1,markerscale=1)

In [None]:
plt.figure()
plt.plot(training_set.jet_m_1[training_set.label == 0],training_set.jet_m_2[training_set.label == 0],
         'o',markersize=2,color='firebrick',markeredgewidth=0,alpha=0.8,label='background')
plt.plot(training_set.jet_m_1[training_set.label == 1],training_set.jet_m_2[training_set.label == 1],
         'o',markersize=2,color='mediumblue',markeredgewidth=0,alpha=0.8,label='signal')

plt.xlim(0, 90)
plt.ylim(0, 60)
plt.xlabel('jet_m_1',fontsize=12)
plt.ylabel('jet_m_2',fontsize=12)
plt.legend(frameon=False,numpoints=1,markerscale=1)

In [None]:
plt.figure()
plt.plot(training_set.jet_m_1[training_set.label == 1],training_set.jj_pt_2[training_set.label == 1],
         'o',markersize=2,color='mediumblue',markeredgewidth=0,alpha=0.8,label='signal')
plt.plot(training_set.jet_m_1[training_set.label == 0],training_set.jj_pt_2[training_set.label == 0],
         'o',markersize=2,color='firebrick',markeredgewidth=0,alpha=0.8,label='background')

plt.xlim(0, 90)
plt.ylim(0, 70)
plt.xlabel('jet_m_1',fontsize=12)
plt.ylabel('jet_pt_2',fontsize=12)
plt.legend(frameon=False,numpoints=1,markerscale=1)