In [1]:
from PIL import Image

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import glob

In [11]:
#-- Classification classes:
# C0 for events in standard model
# C1 for events of new physics

#-- Filenames for 0 class (background noise as physics from standard model):
#-- Process: tth, tttt, ttbb/h
#-- 'bkg1.csv', 'bkg2.csv', 'bkg3.csv'

#-- Filenames for 1 class (positive signal as physics beyond standard model):
#-- Process: M(z')=250 GeV, M(z')=350 GeV, M(z')=1000 GeV
#-- 'signal250.csv', 'signal350.csv', 'signal1000.csv'

#-- Folder for images:
#-- 'C0_bkg1/', 'C0_bkg2/', 'C0_bkg3/' -> Class 0
#-- 'C1_signal250/', 'C1_signal350/', 'C1_signal1000/', 'C1_signal3000/'' -> Class 1

#-- Data sources:
fname_C0 = 'bkg3'
fname_C1 = 'signal1000'
folder_C0 = 'C0_'+fname_C0+'/'
folder_C1 = 'C1_'+fname_C1+'/'

#-- Name for images:
figname_C0 = 'C0_'+fname_C0
figname_C1 = 'C1_'+fname_C1

#-- Counts total images in each folder as a total number of events for analysis (for each class 0 or 1):
Tot_Events = len(glob.glob1(folder_C0, '*.png'))

#-- Convert image to a gray-scale (This allows put one value for each pixel in image):
format_fig = 'RGBA'

In [12]:
#-- Convert image to 1D array
def convert_to_array_image(filename, format_fig):
    Pic = Image.open(filename).convert(format_fig)
    image_sequence = Pic.getdata()
    Pic.close()
    return np.array(image_sequence), Pic.size, list(Pic.mode)

#-- Creates 1D array value-image for analysis:
def Image_array(fig_name_C0, format_fig):
    #-- Use I = 0.299R + 0.587G + 0.114B to gets single values per pixel
    Dat = []
    Pic, Pic_size, color_mode = convert_to_array_image(fig_name_C0, format_fig)
    data = pd.DataFrame(Pic, columns=color_mode)
    width = Pic_size[0]
    height = Pic_size[1]
    dpi = Pic_size[0]/5 # (= Pic_size[1]/3)
    for i in range(3): # i runs for pT, phi and eta.
        for j in range(5): # j runs for bjets0, bjets1, jets0, jets1 and leptons.
            l = int(dpi*(width*i+j))
            R = data.iloc[l,0]
            G = data.iloc[l,1]
            B = data.iloc[l,2]
            I = 0.299*R + 0.587*G + 0.114*B
            Dat.append(I)
    return np.array(Dat)

In [13]:
#-- Stock data for each folder of images:

datbkg = pd.DataFrame()
datsgn = pd.DataFrame()
targetbkg = []
targetsgn = []
for Event in range(Tot_Events):
    fig_name_C0 = folder_C0+figname_C0+'_Event_'+str(Event+1)+'.png' # Class 0 files
    fig_name_C1 = folder_C1+figname_C1+'_Event_'+str(Event+1)+'.png' # Class 1 files
    DT_C0 = Image_array(fig_name_C0, format_fig)
    C0 = pd.DataFrame([DT_C0])
    datbkg = pd.concat([datbkg, C0], ignore_index=True)
    targetbkg.append(0)
    DT_C1 = Image_array(fig_name_C1, format_fig)
    C1 = pd.DataFrame([DT_C1])
    datsgn = pd.concat([datsgn, C1], ignore_index=True)
    targetsgn.append(1)

tags_bkg = pd.DataFrame(targetbkg, columns=['Targets'])
datbkg = pd.concat([datbkg, tags_bkg], axis=1)

tags_sgn = pd.DataFrame(targetsgn, columns=['Targets'])
datsgn = pd.concat([datsgn, tags_sgn], axis=1)

datbkg.to_csv(fname_C0+'_img_dat.csv')
datsgn.to_csv(fname_C1+'_img_dat.csv')