In [32]:
from PIL import Image

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import glob

In [40]:
#-- Classification classes:
# C0 for events in standard model
# C1 for events of new physics

#-- Filenames for 0 class (background noise as physics from standard model):
#-- Process: tth, tttt, ttbb/h
#-- 'bkg1.csv', 'bkg2.csv', 'bkg3.csv'

#-- Filenames for 1 class (positive signal as physics beyond standard model):
#-- Process: M(z')=250 GeV, M(z')=350 GeV, M(z')=1000 GeV
#-- 'signal250.csv', 'signal350.csv', 'signal1000.csv'

#-- Folder for images:
#-- 'C0_bkg1/', 'C0_bkg2/', 'C0_bkg3/' -> Class 0
#-- 'C1_signal250/', 'C1_signal350/', 'C1_signal1000/', 'C1_signal3000/'' -> Class 1

#-- Data sources:
fname_C0 = 'bkg3'
fname_C1 = 'signal1000'
folder_C0 = 'C0_'+fname_C0+'/'
folder_C1 = 'C1_'+fname_C1+'/'

#-- Name for images:
figname_C0 = 'C0_'+fname_C0
figname_C1 = 'C1_'+fname_C1

#-- Counts total images in each folder as a total number of events for analysis (for each class 0 or 1):
Tot_Events = len(glob.glob1(folder_C0, '*.png'))

#-- Convert image to a gray-scale (This allows put one value for each pixel in image):
format_fig = 'RGBA'

In [41]:
#-- Convert image to 1D array
def convert_to_array_image(filename, format_fig):
    Pic = Image.open(filename).convert(format_fig)
    image_sequence = Pic.getdata()
    Pic.close()
    return np.array(image_sequence), Pic.size, list(Pic.mode)

#-- Creates 1D array value-image for analysis:
def Image_array(fig_name_C0, format_fig):
    #-- Use I = 0.299R + 0.587G + 0.114B to gets single values per pixel
    Dat = []
    Pic, Pic_size, color_mode = convert_to_array_image(fig_name_C0, format_fig)
    data = pd.DataFrame(Pic, columns=color_mode)
    width = Pic_size[0]
    height = Pic_size[1]
    dpi = Pic_size[0]/5 # (= Pic_size[1]/3)
    for i in range(3): # i runs for pT, phi and eta.
        for j in range(5): # j runs for bjets0, bjets1, jets0, jets1 and leptons.
            l = int(dpi*(width*i+j))
            R = data.iloc[l,0]
            G = data.iloc[l,1]
            B = data.iloc[l,2]
            I = 0.299*R + 0.587*G + 0.114*B
            Dat.append(I)
    return np.array(Dat)

In [42]:
#-- Stock data for each folder of images:

dat = pd.DataFrame()
target = []
for Event in range(Tot_Events):
    fig_name_C0 = folder_C0+figname_C0+'_Event_'+str(Event+1)+'.png' # Class 0 files
    fig_name_C1 = folder_C1+figname_C1+'_Event_'+str(Event+1)+'.png' # Class 1 files
    DT_C0 = Image_array(fig_name_C0, format_fig)
    C0 = pd.DataFrame([DT_C0])
    dat = pd.concat([dat, C0], ignore_index=True)
    target.append(0)
    DT_C1 = Image_array(fig_name_C1, format_fig)
    C1 = pd.DataFrame([DT_C1])
    dat = pd.concat([dat, C1], ignore_index=True)
    target.append(1)

tags = pd.DataFrame(target, columns=['Targets'])
dat = pd.concat([dat, tags], axis=1)

dat.to_csv(fname_C0+fname_C1+'.csv')

In [43]:
dat

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,Targets
0,156.220,139.398,89.448,45.466,27.160,100.312,152.340,168.951,55.485,70.033,108.121,116.160,110.931,106.659,106.371,0
1,189.048,167.558,118.907,41.982,99.601,157.437,86.224,93.056,180.144,50.598,109.469,116.160,103.561,128.576,139.090,1
2,96.117,94.231,121.391,27.160,44.281,154.131,170.319,81.603,168.951,45.216,102.686,86.398,144.146,115.573,90.599,0
3,166.672,165.074,85.078,65.772,74.626,124.379,159.384,107.728,51.963,94.371,99.474,104.909,97.137,115.573,109.181,1
4,104.384,89.448,117.907,35.014,53.434,215.465,89.013,36.545,210.083,160.748,106.371,111.991,115.573,73.855,144.630,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,177.597,140.697,187.646,52.434,111.939,47.010,124.379,200.689,91.262,173.472,119.166,120.226,103.273,105.496,114.513,1
396,113.537,98.601,131.544,101.199,19.192,107.728,160.748,90.381,166.899,155.862,104.909,109.469,110.056,96.550,98.599,0
397,131.544,128.946,194.314,43.868,187.059,68.239,93.003,214.777,41.927,121.229,119.639,123.243,107.246,102.099,114.513,1
398,136.914,98.601,110.939,23.975,106.569,66.445,190.123,44.919,54.186,180.183,113.926,117.807,110.056,99.762,116.160,0
