# ETL de root a MySQL

Este es un código para leer data de un archivo .root y transformarlo a múltiples dataframes para después subirlo a una base de datos de MySQL.

El archivo .root proviene de experimentos de colisiones de partículas realizados en el CERN y la base de datos está en un contenedor de Docker en un server.

Para más información del dataset consultar la siguiente referencia:



> ATLAS collaboration (2025). ATLAS ROOT ntuple format Run 2 2015+2016 proton-proton collision data beta release, 2J2LMET30 skim. CERN Open Data Portal. DOI:10.7483/OPENDATA.ATLAS.0CJR.N7ZT

## Cargando Librerías

In [1]:
import pandas as pd
import numpy as np
import uproot


## Obteniendo data del archivo .root y guardando en una variable local

In [2]:
#file_link = 'root://eospublic.cern.ch//eos/opendata/atlas/OutreachDatasets/93934/ODEO_FEB2025_v0_2J2LMET30_data15_periodD.2J2LMET30.root'
file_path = 'ODEO_FEB2025_v0_2J2LMET30_data15_periodD.2J2LMET30.root'

In [3]:
#Usar try-except para evitar errores
try:
    #Se usa un context manager para evitar problemas de abrir o cerrar archivos
    with uproot.open(file_path) as file:
        particles_file = file
        keys = particles_file.keys() #Obtener el nombre del TTree
        print(keys[0])
except Exception as e:
    print(f"Ocurrió un error: {e}")


analysis;1


In [4]:
#Guardamos los datos
data = particles_file['analysis;1']
data

<TTree 'analysis' (119 branches) at 0x01b6e9183090>

In [5]:
data.keys()

['num_events',
 'sum_of_weights',
 'sum_of_weights_squared',
 'category',
 'sig_lep',
 'n_sig_lep',
 'TriggerMatch_DILEPTON',
 'ScaleFactor_MLTRIGGER',
 'ScaleFactor_PILEUP',
 'ScaleFactor_FTAG',
 'mcWeight',
 'xsec',
 'filteff',
 'kfac',
 'channelNumber',
 'eventNumber',
 'runNumber',
 'trigML',
 'trigP',
 'trigDT',
 'trigT',
 'trigE',
 'trigDM',
 'trigDE',
 'trigM',
 'trigMET',
 'ScaleFactor_BTAG',
 'ScaleFactor_JVT',
 'jet_n',
 'jet_pt',
 'jet_eta',
 'jet_phi',
 'jet_e',
 'jet_btag_quantile',
 'jet_jvt',
 'largeRJet_n',
 'largeRJet_pt',
 'largeRJet_eta',
 'largeRJet_phi',
 'largeRJet_e',
 'largeRJet_m',
 'largeRJet_D2',
 'jet_pt_jer1',
 'jet_pt_jer2',
 'ScaleFactor_ELE',
 'ScaleFactor_MUON',
 'ScaleFactor_LepTRIGGER',
 'ScaleFactor_MuTRIGGER',
 'ScaleFactor_ElTRIGGER',
 'lep_n',
 'lep_type',
 'lep_pt',
 'lep_eta',
 'lep_phi',
 'lep_e',
 'lep_charge',
 'lep_ptvarcone30',
 'lep_topoetcone20',
 'lep_z0',
 'lep_d0',
 'lep_d0sig',
 'lep_isTightID',
 'lep_isMediumID',
 'lep_isLooseID',
 '

## Convertimos a pandas.DataFrame

In [6]:
df = pd.DataFrame( { column : np.array( data[column] ) for column in data.keys() } )

In [7]:
df.head()

Unnamed: 0,num_events,sum_of_weights,sum_of_weights_squared,category,sig_lep,n_sig_lep,TriggerMatch_DILEPTON,ScaleFactor_MLTRIGGER,ScaleFactor_PILEUP,ScaleFactor_FTAG,...,truth_photon_n,truth_photon_pt,truth_photon_eta,truth_photon_phi,truth_met,truth_met_phi,met,met_phi,met_mpx,met_mpy
0,1.0,1.0,1.0,data15,"[True, True, False]",2,0.0,1.0,1.0,1.0,...,0,[],[],[],0.0,0.0,57.207947,0.716323,43.147678,37.563633
1,1.0,1.0,1.0,data15,"[True, True]",2,1.0,1.0,1.0,1.0,...,0,[],[],[],0.0,0.0,32.000393,1.041358,16.161745,27.619253
2,1.0,1.0,1.0,data15,"[True, True]",2,0.0,1.0,1.0,1.0,...,0,[],[],[],0.0,0.0,41.463879,1.703697,-5.494378,41.09824
3,1.0,1.0,1.0,data15,"[True, True]",2,0.0,1.0,1.0,1.0,...,0,[],[],[],0.0,0.0,49.653851,-1.2319,16.507263,-46.829639
4,1.0,1.0,1.0,data15,"[True, True]",2,0.0,1.0,1.0,1.0,...,0,[],[],[],0.0,0.0,39.204754,-2.952981,-38.509476,-7.350717


In [8]:
df.index.rename('collision_ID' , inplace=True)

In [9]:
df.head()

Unnamed: 0_level_0,num_events,sum_of_weights,sum_of_weights_squared,category,sig_lep,n_sig_lep,TriggerMatch_DILEPTON,ScaleFactor_MLTRIGGER,ScaleFactor_PILEUP,ScaleFactor_FTAG,...,truth_photon_n,truth_photon_pt,truth_photon_eta,truth_photon_phi,truth_met,truth_met_phi,met,met_phi,met_mpx,met_mpy
collision_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,1.0,1.0,data15,"[True, True, False]",2,0.0,1.0,1.0,1.0,...,0,[],[],[],0.0,0.0,57.207947,0.716323,43.147678,37.563633
1,1.0,1.0,1.0,data15,"[True, True]",2,1.0,1.0,1.0,1.0,...,0,[],[],[],0.0,0.0,32.000393,1.041358,16.161745,27.619253
2,1.0,1.0,1.0,data15,"[True, True]",2,0.0,1.0,1.0,1.0,...,0,[],[],[],0.0,0.0,41.463879,1.703697,-5.494378,41.09824
3,1.0,1.0,1.0,data15,"[True, True]",2,0.0,1.0,1.0,1.0,...,0,[],[],[],0.0,0.0,49.653851,-1.2319,16.507263,-46.829639
4,1.0,1.0,1.0,data15,"[True, True]",2,0.0,1.0,1.0,1.0,...,0,[],[],[],0.0,0.0,39.204754,-2.952981,-38.509476,-7.350717


In [10]:
len(df)

10085

## Dividimos en distintos dataframes

### Primero los datos de la tabla principal

Vamos a usar las siguientes columnas:

'num_events',
'sum_of_weights',
'sum_of_weights_squared',
'met',
'met_phi',
'met_mpx',
'met_mpy'


In [11]:
main_df = df[['num_events',
'sum_of_weights',
'sum_of_weights_squared',
'met',
'met_phi',
'met_mpx',
'met_mpy']]

In [12]:
main_df

Unnamed: 0_level_0,num_events,sum_of_weights,sum_of_weights_squared,met,met_phi,met_mpx,met_mpy
collision_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,1.0,1.0,1.0,57.207947,0.716323,43.147678,37.563633
1,1.0,1.0,1.0,32.000393,1.041358,16.161745,27.619253
2,1.0,1.0,1.0,41.463879,1.703697,-5.494378,41.098240
3,1.0,1.0,1.0,49.653851,-1.231900,16.507263,-46.829639
4,1.0,1.0,1.0,39.204754,-2.952981,-38.509476,-7.350717
...,...,...,...,...,...,...,...
10080,1.0,1.0,1.0,36.490295,1.543729,0.987562,36.476933
10081,1.0,1.0,1.0,44.619659,2.989064,-44.101624,6.779440
10082,1.0,1.0,1.0,134.684464,2.831325,-128.253525,41.121010
10083,1.0,1.0,1.0,66.280960,-2.777267,-61.930546,-23.617212


In [13]:
main_df = main_df.astype({'num_events': int,
'sum_of_weights': int,
'sum_of_weights_squared': int,
'met': str,
'met_phi': str,
'met_mpx': str,
'met_mpy': str})

### Ahora el dataframe de jet

Se usarán las columnas:

 'jet_n',
 'jet_pt',
 'jet_eta',
 'jet_phi',
 'jet_e',
 'jet_btag_quantile',
 'jet_jvt',



In [14]:
jet_df = df[[
 'jet_n',
 'jet_pt',
 'jet_eta',
 'jet_phi',
 'jet_e',
 'jet_btag_quantile',
 'jet_jvt',
]]

In [15]:
jet_df

Unnamed: 0_level_0,jet_n,jet_pt,jet_eta,jet_phi,jet_e,jet_btag_quantile,jet_jvt
collision_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,2,"[27.277308, 23.915903]","[2.451941, 2.0638018]","[-2.5345325, -1.0778844]","[159.55577, 95.77804]","[1, 1]","[True, True]"
1,3,"[45.529007, 44.244377, 39.351395]","[-1.015103, -0.9125277, 1.202227]","[-0.58708495, -3.094194, 2.4808156]","[71.276375, 64.48113, 71.50386]","[1, 1, 1]","[True, True, True]"
2,2,"[20.359121, 22.363218]","[0.45512232, 0.36398754]","[1.4618268, 2.2884467]","[23.118109, 24.141964]","[1, 1]","[True, False]"
3,3,"[140.99684, 87.72979, 39.57966]","[0.7368227, 1.3449336, -0.11469239]","[1.4386162, -1.8008908, 1.3376634]","[181.1719, 180.16273, 40.326794]","[1, 1, 1]","[True, True, True]"
4,2,"[26.345266, 22.03974]","[-0.83155537, -1.3980061]","[1.1844668, -0.40749294]","[36.532345, 47.498894]","[1, 1]","[True, True]"
...,...,...,...,...,...,...,...
10080,2,"[88.412735, 26.964527]","[0.15310784, -1.0901309]","[0.76332563, -1.1900661]","[89.67663, 45.162025]","[5, 1]","[True, True]"
10081,5,"[78.88422, 64.4261, 32.706535, 22.50707, 25.19...","[1.2160527, -1.2554768, 0.18871357, 0.05326682...","[-0.5554678, 2.5292876, 0.32284036, -0.3901165...","[145.10197, 122.49265, 33.600014, 22.851765, 6...","[1, 1, 3, 1, 1]","[True, True, True, True, True]"
10082,3,"[169.2884, 55.495377, 21.571474]","[0.5902377, 1.2841488, 1.0625256]","[-0.4387807, 1.8077481, -0.94883084]","[200.06583, 108.271416, 35.277508]","[1, 1, 1]","[True, True, True]"
10083,3,"[100.65527, 36.94817, 20.89472]","[2.1096845, -2.1278172, -0.65240175]","[-0.040889245, 1.8016511, 2.0508022]","[421.1489, 157.3964, 25.87537]","[1, 5, 1]","[True, True, False]"


Ahora expandimos las columnas evitar tener arreglos en las filas

In [16]:
jet_df = jet_df.explode([
 'jet_pt',
 'jet_eta',
 'jet_phi',
 'jet_e',
 'jet_btag_quantile',
 'jet_jvt',
])

In [17]:
jet_df

Unnamed: 0_level_0,jet_n,jet_pt,jet_eta,jet_phi,jet_e,jet_btag_quantile,jet_jvt
collision_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,2,27.277308,2.451941,-2.534533,159.555771,1,True
0,2,23.915903,2.063802,-1.077884,95.778038,1,True
1,3,45.529007,-1.015103,-0.587085,71.276375,1,True
1,3,44.244377,-0.912528,-3.094194,64.481133,1,True
1,3,39.351395,1.202227,2.480816,71.50386,1,True
...,...,...,...,...,...,...,...
10083,3,100.655273,2.109684,-0.040889,421.148895,1,True
10083,3,36.94817,-2.127817,1.801651,157.396393,5,True
10083,3,20.89472,-0.652402,2.050802,25.87537,1,False
10084,2,21.110826,0.613736,2.449781,25.485065,1,True


In [44]:
jet_df = jet_df.astype({
 'jet_n':int,
 'jet_pt':str,
 'jet_eta':str,
 'jet_phi':str,
 'jet_e':str,
 'jet_btag_quantile':str,
 'jet_jvt':bool})

### Tabla de leptones

Usamos las columans:  


 'lep_n',
 'lep_type',
 'lep_pt',
 'lep_eta',
 'lep_phi',
 'lep_e',
 'lep_charge',
 'lep_ptvarcone30',
 'lep_topoetcone20',
 'lep_z0',
 'lep_d0',
 'lep_d0sig',
 'lep_isTightID',
 'lep_isMediumID',
 'lep_isLooseID',
 'lep_isTightIso',
 'lep_isLooseIso',
 'lep_isTrigMatched',


In [18]:
lep_df = df[[
 'lep_n',
 'lep_type',
 'lep_pt',
 'lep_eta',
 'lep_phi',
 'lep_e',
 'lep_charge',
 'lep_ptvarcone30',
 'lep_topoetcone20',
 'lep_z0',
 'lep_d0',
 'lep_d0sig',
 'lep_isTightID',
 'lep_isMediumID',
 'lep_isLooseID',
 'lep_isTightIso',
 'lep_isLooseIso',
 'lep_isTrigMatched',
]]

In [19]:
lep_df

Unnamed: 0_level_0,lep_n,lep_type,lep_pt,lep_eta,lep_phi,lep_e,lep_charge,lep_ptvarcone30,lep_topoetcone20,lep_z0,lep_d0,lep_d0sig,lep_isTightID,lep_isMediumID,lep_isLooseID,lep_isTightIso,lep_isLooseIso,lep_isTrigMatched
collision_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,3,"[11, 11, 13]","[20.874048, 7.4988637, 11.558971]","[1.6207038, 1.486013, 1.5528705]","[-0.11316347, 3.0881264, -0.054899357]","[54.840378, 17.418785, 28.531515]","[-1, -1, 1]","[63.344303, 87.11956, 32.073074]","[11.606884, 12.323459, 18.889587]","[1.3566418, -0.040052414, 0.036180496]","[-0.09558339, 0.0063920966, 0.09789402]","[-3.092892, 0.17257826, 4.6164446]","[True, True, False]","[True, True, False]","[True, True, False]","[False, False, False]","[False, False, False]","[False, False, False]"
1,2,"[11, 11]","[55.04022, 49.22271]","[-1.5614545, -1.8566843]","[-2.3305018, 0.09920949]","[136.92847, 161.41757]","[1, -1]","[0.0, 0.0]","[1.0865507, -0.18651]","[-0.07581329, 0.042907715]","[0.020473164, 0.02518605]","[0.92528534, 0.55464524]","[True, True]","[True, True]","[True, True]","[True, True]","[True, True]","[True, True]"
2,2,"[13, 13]","[7.962454, 28.892351]","[1.7648256, 1.7118078]","[-1.4523491, -1.4816622]","[23.934454, 82.625046]","[-1, 1]","[29.16796, 7.900184]","[3.865145, 4.286861]","[-0.19255257, 0.31853104]","[-0.014675194, 0.04953215]","[-0.50940454, 2.5927756]","[True, True]","[True, True]","[True, True]","[False, False]","[False, False]","[False, False]"
3,2,"[13, 13]","[10.407372, 9.1107]","[-2.3787608, -2.1718822]","[-1.8683571, -2.0053227]","[56.637993, 40.49149]","[-1, 1]","[12.416915, 12.960243]","[5.30828, 1.282021]","[-0.54913807, -0.19639683]","[-0.03912512, -0.00853643]","[-1.4291148, -0.32937345]","[True, True]","[True, True]","[True, True]","[False, False]","[False, False]","[False, False]"
4,2,"[13, 13]","[7.692173, 9.421369]","[-2.0520146, -1.0476807]","[-1.4894565, 1.5423807]","[30.43058, 15.082951]","[1, -1]","[0.0, 0.0]","[0.0, 2.2664888]","[0.2676983, 0.022184372]","[-0.063558, -0.056431998]","[-2.7181218, -2.7039282]","[True, True]","[True, True]","[True, True]","[True, False]","[True, True]","[False, False]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10080,2,"[11, 13]","[10.263605, 8.140703]","[1.2525169, 0.98286164]","[0.7862172, 0.71205634]","[19.42348, 12.400086]","[1, -1]","[9.955613, 8.998822]","[0.6192059, 0.45012036]","[-0.03651333, 0.25893307]","[-0.067461014, 0.024779482]","[-1.6956947, 1.1813747]","[True, True]","[True, True]","[True, True]","[False, False]","[False, False]","[False, False]"
10081,2,"[13, 13]","[21.708593, 9.861358]","[-0.896505, -0.8743773]","[2.3057775, 1.6518235]","[31.032776, 13.877815]","[1, -1]","[0.0, 1.2309349]","[-0.7887755, -0.48894998]","[0.005422592, 0.08281326]","[-0.045021463, -0.025833769]","[-2.6996508, -1.2398058]","[True, True]","[True, True]","[True, True]","[True, False]","[True, True]","[True, False]"
10082,2,"[11, 13]","[18.855267, 12.914005]","[0.59412426, 1.2399378]","[2.7989712, -2.104247]","[22.282112, 24.180418]","[1, -1]","[0.0, 0.0]","[0.08558075, -0.44781002]","[0.19130516, -0.0012073517]","[-0.074771374, 0.05298881]","[-4.1345353, 2.9677095]","[True, True]","[True, True]","[True, True]","[True, True]","[True, True]","[False, False]"
10083,2,"[11, 13]","[14.485749, 20.681236]","[-0.14508505, -0.2084966]","[-2.1901808, -2.2751505]","[14.638476, 21.132647]","[1, 1]","[29.697603, 16.321537]","[3.502641, 16.356028]","[-0.32203293, -0.038455963]","[0.0847699, -0.011818251]","[2.6920564, -0.7098859]","[True, True]","[True, True]","[True, True]","[False, False]","[False, False]","[False, False]"


In [20]:
lep_df = lep_df.explode([
 'lep_type',
 'lep_pt',
 'lep_eta',
 'lep_phi',
 'lep_e',
 'lep_charge',
 'lep_ptvarcone30',
 'lep_topoetcone20',
 'lep_z0',
 'lep_d0',
 'lep_d0sig',
 'lep_isTightID',
 'lep_isMediumID',
 'lep_isLooseID',
 'lep_isTightIso',
 'lep_isLooseIso',
 'lep_isTrigMatched',
])

In [21]:
lep_df

Unnamed: 0_level_0,lep_n,lep_type,lep_pt,lep_eta,lep_phi,lep_e,lep_charge,lep_ptvarcone30,lep_topoetcone20,lep_z0,lep_d0,lep_d0sig,lep_isTightID,lep_isMediumID,lep_isLooseID,lep_isTightIso,lep_isLooseIso,lep_isTrigMatched
collision_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,3,11,20.874048,1.620704,-0.113163,54.840378,-1,63.344303,11.606884,1.356642,-0.095583,-3.092892,True,True,True,False,False,False
0,3,11,7.498864,1.486013,3.088126,17.418785,-1,87.11956,12.323459,-0.040052,0.006392,0.172578,True,True,True,False,False,False
0,3,13,11.558971,1.552871,-0.054899,28.531515,1,32.073074,18.889587,0.03618,0.097894,4.616445,False,False,False,False,False,False
1,2,11,55.040218,-1.561455,-2.330502,136.928467,1,0.0,1.086551,-0.075813,0.020473,0.925285,True,True,True,True,True,True
1,2,11,49.22271,-1.856684,0.099209,161.417572,-1,0.0,-0.18651,0.042908,0.025186,0.554645,True,True,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10082,2,13,12.914005,1.239938,-2.104247,24.180418,-1,0.0,-0.44781,-0.001207,0.052989,2.96771,True,True,True,True,True,False
10083,2,11,14.485749,-0.145085,-2.190181,14.638476,1,29.697603,3.502641,-0.322033,0.08477,2.692056,True,True,True,False,False,False
10083,2,13,20.681236,-0.208497,-2.275151,21.132647,1,16.321537,16.356028,-0.038456,-0.011818,-0.709886,True,True,True,False,False,False
10084,2,13,14.033728,-1.932562,1.158909,49.482765,-1,7.497568,-0.167967,-0.148535,-0.008243,-0.421062,True,True,True,False,False,False


In [55]:
lep_df = lep_df.astype({
 'lep_type':int,
 'lep_pt':str,
 'lep_eta':str,
 'lep_phi':str,
 'lep_e':str,
 'lep_charge':int,
 'lep_ptvarcone30':str,
 'lep_topoetcone20':str,
 'lep_z0':str,
 'lep_d0':str,
 'lep_d0sig':str,
 'lep_isTightID':int,
 'lep_isMediumID':int,
 'lep_isLooseID':int,
 'lep_isTightIso':int,
 'lep_isLooseIso':int,
 'lep_isTrigMatched':int})

In [61]:

# Lista de columnas que deberían ser numéricas pero están como 'object'
numeric_cols = ['lep_pt', 'lep_eta', 'lep_phi', 'lep_e', 'lep_ptvarcone30',
                'lep_topoetcone20', 'lep_z0', 'lep_d0', 'lep_d0sig']

# Usar .apply(pd.to_numeric) para manejar de forma segura los valores no numéricos
# El argumento errors='coerce' reemplaza cualquier valor no numérico con NaN
for col in numeric_cols:
    lep_df[col] = pd.to_numeric(lep_df[col], errors='coerce')

### Finalizamos con la tabla de fotones

Se usan las columnas:



 'photon_n',
 'photon_pt',
 'photon_eta',
 'photon_phi',
 'photon_e',
 'photon_ptcone20',
 'photon_topoetcone40',
 'photon_isLooseID',
 'photon_isTightID',
 'photon_isLooseIso',
 'photon_isTightIso'


In [22]:
photon_df = df[[ 'photon_n',
 'photon_pt',
 'photon_eta',
 'photon_phi',
 'photon_e',
 'photon_ptcone20',
 'photon_topoetcone40',
 'photon_isLooseID',
 'photon_isTightID',
 'photon_isLooseIso',
 'photon_isTightIso']]

In [23]:
photon_df

Unnamed: 0_level_0,photon_n,photon_pt,photon_eta,photon_phi,photon_e,photon_ptcone20,photon_topoetcone40,photon_isLooseID,photon_isTightID,photon_isLooseIso,photon_isTightIso
collision_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,0,[],[],[],[],[],[],[],[],[],[]
1,0,[],[],[],[],[],[],[],[],[],[]
2,0,[],[],[],[],[],[],[],[],[],[]
3,0,[],[],[],[],[],[],[],[],[],[]
4,0,[],[],[],[],[],[],[],[],[],[]
...,...,...,...,...,...,...,...,...,...,...,...
10080,1,[77.935234],[0.64197505],[-2.1794174],[94.55423],[14.392157],[13.061836],[True],[False],[False],[False]
10081,0,[],[],[],[],[],[],[],[],[],[]
10082,0,[],[],[],[],[],[],[],[],[],[]
10083,0,[],[],[],[],[],[],[],[],[],[]


In [24]:
photon_df.describe()

Unnamed: 0,photon_n
count,10085.0
mean,0.011502
std,0.107561
min,0.0
25%,0.0
50%,0.0
75%,0.0
max,2.0


In [25]:
photon_df = photon_df.explode([ 
 'photon_pt',
 'photon_eta',
 'photon_phi',
 'photon_e',
 'photon_ptcone20',
 'photon_topoetcone40',
 'photon_isLooseID',
 'photon_isTightID',
 'photon_isLooseIso',
 'photon_isTightIso'])

In [26]:
photon_df

Unnamed: 0_level_0,photon_n,photon_pt,photon_eta,photon_phi,photon_e,photon_ptcone20,photon_topoetcone40,photon_isLooseID,photon_isTightID,photon_isLooseIso,photon_isTightIso
collision_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,0,,,,,,,,,,
1,0,,,,,,,,,,
2,0,,,,,,,,,,
3,0,,,,,,,,,,
4,0,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
10080,1,77.935234,0.641975,-2.179417,94.55423,14.392157,13.061836,True,False,False,False
10081,0,,,,,,,,,,
10082,0,,,,,,,,,,
10083,0,,,,,,,,,,


In [27]:
photon_df = photon_df.dropna(how = 'any')

In [28]:
photon_df

Unnamed: 0_level_0,photon_n,photon_pt,photon_eta,photon_phi,photon_e,photon_ptcone20,photon_topoetcone40,photon_isLooseID,photon_isTightID,photon_isLooseIso,photon_isTightIso
collision_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
75,1,29.428501,-0.42258,3.107259,32.095421,3.668287,9.95827,True,True,False,False
119,1,42.596954,-1.267952,-0.851704,81.679108,4.937068,7.57778,True,False,False,False
243,1,28.073273,2.329104,-0.178241,145.505508,4.934692,9.892411,True,False,False,False
451,1,31.139017,1.676202,-2.457949,86.135208,0.0,0.726301,True,True,True,True
635,1,38.747982,-0.244593,0.613119,39.91283,4.537786,9.767576,True,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...
9658,1,30.476669,-0.775461,2.399182,40.108582,5.155587,7.458091,True,False,False,False
9753,1,32.621662,2.18878,-2.750421,147.390686,0.0,-0.969303,True,True,True,True
9943,1,195.654312,-1.014938,0.232678,305.378876,140.083954,59.401478,True,False,False,False
10049,1,82.64035,1.294857,1.884439,162.157333,0.0,1.288387,True,True,True,True


In [73]:
photon_df = photon_df.astype( {'photon_n':int,
 'photon_pt':str,
 'photon_eta':str,
 'photon_phi':str,
 'photon_e':str,
 'photon_ptcone20':str,
 'photon_topoetcone40':str,
 'photon_isLooseID':int,
 'photon_isTightID':int,
 'photon_isLooseIso':int,
 'photon_isTightIso':int})

## Crear base de datos con Docker

Para tener una base de datos de MYSQL local primero se instala Docker CLI, 

después se jala la imagen de mysql con la siguiente linea de comandos en la terminal:

```
 > docker pull mysql:8.0
```

finalmente se crea el contenedor usando la imagen de mysql:


```
 > docker run --name sqlab -e MYSQL_ROOT_PASSWORD=contra_123 -e MYSQL_DATABASE=particles -p 3306:3306 -d mysql:8.0
```

Donde: 

*sqlab* es el nombre del contenedor

*contra_123* es la contraseña para el usuario root

*particles* es el nombre de la base de datos

*3306:3306* nos permite acceder de manera local al contenedor

*mysql:8.0* es la imagen:version sobre la que se contruye el contenedor

## Conectar a la base de datos



In [29]:
#importamos la libreria para hacer conexiones a mysql
import mysql.connector

In [None]:
#Primero declaramos los datos para conectarse
config = {
    'user':"<user>", 
    'password':"<pwd>", 
    'host':"<host address>", 
    'port':3306, 
    'database':"particles"
}

In [32]:
#Se intenta la conexión

try:
    connection = mysql.connector.connect(**config)
    cursor = connection.cursor()
    # Probar una query sencilla
    cursor.execute("SHOW TABLES")
    tables = cursor.fetchall()
    print("Tablas:", tables)

except mysql.connector.Error as err:
    print(f"Error: {err}")
    
finally:
    # Cerrar conexiones
    if 'connection' in locals() and connection.is_connected():
        cursor.close()
        connection.close()
        print("Conexión a MySQL cerrada")

Tablas: []
Conexión a MySQL cerrada


## Crear Tablas

### Empezamos con la tabla 1

In [33]:

#Creamos conexión
mydb = mysql.connector.connect(**config)

cursor = mydb.cursor()
#Crear tabla

query = """CREATE TABLE IF NOT EXISTS collisions(
                collision_ID INT PRIMARY KEY,
                num_events INT,
                sum_of_weights INT,
                sum_of_weights_squared INT,
                met DECIMAL,
                met_phi DECIMAL,
                met_mpx DECIMAL,
                met_mpy DECIMAL


)"""

try:

    # Ejecutamos la query
    cursor.execute(query)

except mysql.connector.Error as err:
    print(f"Error: {err}")
    
finally:
    # Cerrar conexiones
    if 'connection' in locals() and connection.is_connected():
        cursor.close()
        connection.close()
        print("Conexión a MySQL cerrada")

In [34]:

#Convertimos las filas en tuplas
main_tuplas = [tuple([idx] + list(row)) for idx, row in main_df.iterrows()]

# Preparamos la query para ejecutar un insert
sql = "INSERT INTO collisions (collision_ID,num_events,sum_of_weights,sum_of_weights_squared,met,met_phi,met_mpx,met_mpy) VALUES (%s,%s, %s, %s,%s, %s, %s,%s)"

#Creamos conexión
mydb = mysql.connector.connect(**config)

cursor = mydb.cursor()

try:
    cursor.executemany(sql, main_tuplas)
    mydb.commit() # Commit 
    print(f"{cursor.rowcount} filas añadidas.")
except mysql.connector.Error as err:
    print(f"Error: {err}")
    mydb.rollback() # Rollback por cualquier cosa
finally:
    cursor.close()
    mydb.close()

10085 filas añadidas.


### Tablas jets

In [None]:
#Creamos conexión
mydb = mysql.connector.connect(**config)

cursor = mydb.cursor()
#Crear tabla

query = """CREATE TABLE IF NOT EXISTS jets(
                jet_ID INT AUTO_INCREMENT PRIMARY KEY,
                collision_ID INT, 
                jet_n INT, 
                jet_pt DECIMAL, 
                jet_eta DECIMAL,
                jet_phi DECIMAL, 
                jet_e DECIMAL,
                jet_btag_quantile DECIMAL, 
                jet_jvt INT
)"""

try:

    # Ejecutamos la query
    cursor.execute("DROP TABLE IF EXISTS jets")
    cursor.execute(query)

except mysql.connector.Error as err:
    print(f"Error: {err}")
    
finally:
    # Cerrar conexiones
    if 'connection' in locals() and connection.is_connected():
        cursor.close()
        connection.close()
        print("Conexión a MySQL cerrada")

In [45]:
# Convertimos las filas en tuplas
jet_tuplas = [tuple([idx] + list(row)) for idx, row in jet_df.iterrows()]

# Preparamos la query para ejecutar un insert
sql_jet = """
INSERT INTO jets (
    collision_ID, jet_n, jet_pt, jet_eta, jet_phi, jet_e,
    jet_btag_quantile, jet_jvt
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
"""

# Creamos conexión
mydb_jet = mysql.connector.connect(**config)
cursor_jet = mydb_jet.cursor()

try:
    cursor_jet.executemany(sql_jet, jet_tuplas)
    mydb_jet.commit()  # Commit
    print(f"{cursor_jet.rowcount} filas añadidas a la tabla 'jets'.")
except mysql.connector.Error as err:
    print(f"Error al insertar en la tabla 'jets': {err}")
    mydb_jet.rollback()  # Rollback por cualquier cosa
finally:
    cursor_jet.close()
    mydb_jet.close()

25882 filas añadidas a la tabla 'jets'.


### Tablas leptons

In [69]:
# Creamos conexión
mydb = mysql.connector.connect(**config)
cursor = mydb.cursor()

# Crear tabla para leptones
query_lep = """
CREATE TABLE IF NOT EXISTS leptons (
    lep_ID INT AUTO_INCREMENT PRIMARY KEY,
    collision_ID INT,
    lep_type INT,
    lep_pt DECIMAL(10, 2),
    lep_eta DECIMAL(10, 4),
    lep_phi DECIMAL(10, 4),
    lep_e DECIMAL(10, 2),
    lep_charge INT,
    lep_ptvarcone30 DECIMAL(10, 2),
    lep_topoetcone20 DECIMAL(10, 2),
    lep_z0 DECIMAL(10, 4),
    lep_d0 DECIMAL(10, 4),
    lep_d0sig DECIMAL(10, 4),
    lep_isTightID INT,
    lep_isMediumID INT,
    lep_isLooseID INT,
    lep_isTightIso INT,
    lep_isLooseIso INT,
    lep_isTrigMatched INT
)
"""

try:
    cursor.execute("DROP TABLE IF EXISTS leptons")
    cursor.execute(query_lep)
    print("Tabla 'leptons' creada o ya existente.")
except mysql.connector.Error as err:
    print(f"Error al crear la tabla 'leptons': {err}")

# Convertimos las filas de lep_df en tuplas
# Agregamos el collision_ID que viene del índice del DataFrame principal

lep_tuplas = []
for index, row in lep_df.iterrows():
    try:
        # Create the tuple for this row, casting values to the correct type
        # The order here MUST match your SQL INSERT statement
        current_tuple = (
            int(index),
            int(row['lep_type']),
            float(row['lep_pt']),
            float(row['lep_eta']),
            float(row['lep_phi']),
            float(row['lep_e']),
            int(row['lep_charge']),
            float(row['lep_ptvarcone30']),
            float(row['lep_topoetcone20']),
            float(row['lep_z0']),
            float(row['lep_d0']),
            float(row['lep_d0sig']),
            int(row['lep_isTightID']),
            int(row['lep_isMediumID']),
            int(row['lep_isLooseID']),
            int(row['lep_isTightIso']),
            int(row['lep_isLooseIso']),
            int(row['lep_isTrigMatched'])
        )
        lep_tuplas.append(current_tuple)
    except (ValueError, TypeError) as e:
        # This will catch any row that has a non-numeric string or a list instead of a number
        print(f"Skipping row {index} due to conversion error: {e}")
        continue

# Preparamos la query para insertar datos
sql_lep = """
INSERT INTO leptons (
    collision_ID, lep_type, lep_pt, lep_eta, lep_phi, lep_e, lep_charge,
    lep_ptvarcone30, lep_topoetcone20, lep_z0, lep_d0, lep_d0sig,
    lep_isTightID, lep_isMediumID, lep_isLooseID, lep_isTightIso,
    lep_isLooseIso, lep_isTrigMatched
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""

try:
    cursor.executemany(sql_lep, lep_tuplas)
    mydb.commit()  # Guardamos los cambios
    print(f"{cursor.rowcount} filas añadidas a la tabla 'leptons'.")
except mysql.connector.Error as err:
    print(f"Error al insertar datos en la tabla 'leptons': {err}")
    mydb.rollback()  # Deshacemos cambios si hay error
finally:
    cursor.close()
    mydb.close()

Tabla 'leptons' creada o ya existente.
21461 filas añadidas a la tabla 'leptons'.


### Tablas photons

In [74]:
# Creamos conexión
mydb = mysql.connector.connect(**config)
cursor = mydb.cursor()

# Crear tabla para fotones
query_photon = """
CREATE TABLE IF NOT EXISTS photons (
    photon_ID INT AUTO_INCREMENT PRIMARY KEY,
    collision_ID INT,
    photon_n INT,
    photon_pt DECIMAL(10, 2),
    photon_eta DECIMAL(10, 4),
    photon_phi DECIMAL(10, 4),
    photon_e DECIMAL(10, 2),
    photon_ptcone20 DECIMAL(10, 2),
    photon_topoetcone40 DECIMAL(10, 2),
    photon_isLooseID BOOLEAN,
    photon_isTightID BOOLEAN,
    photon_isLooseIso BOOLEAN,
    photon_isTightIso BOOLEAN
)
"""

try:
    cursor.execute(query_photon)
    print("Tabla 'photons' creada o ya existente.")
except mysql.connector.Error as err:
    print(f"Error al crear la tabla 'photons': {err}")

# Convertimos las filas de photon_df en tuplas
# Agregamos el collision_ID que viene del índice del DataFrame principal
photon_tuplas = [tuple([idx] + list(row)) for idx, row in photon_df.iterrows()]

# Preparamos la query para insertar datos
sql_photon = """
INSERT INTO photons (
    collision_ID, photon_n, photon_pt, photon_eta, photon_phi, photon_e,
    photon_ptcone20, photon_topoetcone40, photon_isLooseID, photon_isTightID,
    photon_isLooseIso, photon_isTightIso
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""

try:
    cursor.executemany(sql_photon, photon_tuplas)
    mydb.commit() # Guardamos los cambios
    print(f"{cursor.rowcount} filas añadidas a la tabla 'photons'.")
except mysql.connector.Error as err:
    print(f"Error al insertar datos en la tabla 'photons': {err}")
    mydb.rollback() # Deshacemos cambios si hay error
finally:
    cursor.close()
    mydb.close()

Tabla 'photons' creada o ya existente.
116 filas añadidas a la tabla 'photons'.
