In [1]:
import os
os.chdir(f"../../..")
from SOURCE.CLASS_CODE_GPU_Classes import *
from SOURCE.CLASS_CODE_Image_Manager import *
import numpy as np
import json
import cv2
import pandas as pd
import matplotlib.pyplot as plt
from IPython import display
from time import time
import h5py

In [2]:
# Set the PARAMETERS ############################################
##################################################################
experiment_name="Basler_like_R0_300x_w0_300x_Z_50x_64bit"
N_R0 = 70 #130
N_w0 = 70 #130
N_Z = 4 #8

output_directory=f"/home/oiangu/Hippocampus/Conical_Refraction_Polarimeter/OUTPUT/LIBRARIES_OF_THEORETICAL_D/{experiment_name}/"
#output_directory=f"./OUTPUT/LIBRARIES_OF_THEORETICAL_D/{experiment_name}/"
#os.chdir(f"/home/oiangu/Desktop/Conical_Refraction_Polarimeter/")

randomization_seed=666
image_shortest_side=540
saturation=1

# Ring parameters to test (each will be a different simulation)
#phiCR_s=np.linspace(-180,180,360*10**significant_decimal+1)*np.pi/180
R0_s=np.linspace(70, 180, N_R0) #np.linspace(70,180,40) # in pxels 153
w0_s=np.linspace( 8, 40, N_w0) #np.linspace(8,50,40) 11
Z_s=np.linspace(0, 0.9, N_Z)
rho_0s=R0_s/w0_s


resolution_side_nx=image_shortest_side # generated images will be resolution_side x resolution_side
# Other parameters
max_k=50
num_k=1200
sim_chunk_ax=image_shortest_side



##################################################
##################################################

# General considerations
image_directory=f"{output_directory}/SIMULATIONS/" #nx_{image_shortest_side}_depth_{image_depth}_sat_{saturation}
os.makedirs(image_directory, exist_ok=True)
np.random.seed(randomization_seed)

In order to do this data generation in parallel, we will generate the data in parallel processes.
Eventually, when all is finished, we will manually generate a single h5f from the individual parts.

In [47]:
K=5 # workers
N=56 # tareas

for j in range(K):
    print(f"Worker {j} -> {(N//K)*j + j*((N%K-j)>0) + (N%K)*(j>=N%K) } to {(N//K)*(j+1) + (j+1)*((N%K-j-1)>0)+ (N%K)*(j+1>=N%K)}")

Worker 0 -> 0 to 12
Worker 1 -> 12 to 23
Worker 2 -> 23 to 34
Worker 3 -> 34 to 45
Worker 4 -> 45 to 56


In [3]:
TOTAL_WORKERS = 2
WORKER_ID = 0 # beginning from 0 till TOTAL_WORKERS-1

def beg_index(N,j,K):
    # N -> number of total elements in vector, j is worker id, K is total number of workers
    return (N//K)*j + j*((N%K-j)>0) + (N%K)*(j>=N%K)
def end_index(N,j,K):
    return (N//K)*(j+1) + (j+1)*((N%K-j-1)>0)+ (N%K)*(j+1>=N%K)

# we just need to partitionate one of the vectors! If you do it with all of them you do not get the whole!!!
R0_s = R0_s[beg_index(N_R0, WORKER_ID, TOTAL_WORKERS):end_index(N_R0, WORKER_ID, TOTAL_WORKERS)] 
# w0_s = w0_s[beg_index(N_w0, WORKER_ID, TOTAL_WORKERS):end_index(N_w0, WORKER_ID, TOTAL_WORKERS)] 
# Z_s = Z_s[beg_index(N_Z, WORKER_ID, TOTAL_WORKERS):end_index(N_Z, WORKER_ID, TOTAL_WORKERS)]
# rho_0s=R0_s/w0_s

### Using h5f

In [4]:
# Initialize the vigilant
try:
    phase_vigilant = json.load(open(f"{output_directory}/STRUCTURE_Grid_PART_{WORKER_ID}.json"))
except:
    phase_vigilant = {'R0s':[], 'w0s':[], 'Zs':[], 'IDs':[]}

# Set the objects ready ##################
# The simulator object
simulator =RingSimulator_Optimizer_GPU( n=1.5, a0=1.0, max_k=max_k, num_k=num_k, nx=resolution_side_nx, 
                                      sim_chunk_x=sim_chunk_ax, sim_chunk_y=sim_chunk_ax)

# Initialize the hdf5 dataset saver
h5f = h5py.File(f"{image_directory}/Dataset_PART_{WORKER_ID}.h5", 'a') # append if exists, create if not

# save phis
try:
    h5f.create_dataset('phis', data=simulator.phis[:,:,0], compression="lzf", shuffle=True) #, compression_opts=9)
except:
    print(f"phis was already in h5f but not in phase vigilant")
    h5f['phis'][:] = simulator.phis[:,:,0]

# Execute the stuff #####################
i=1
total=Z_s.shape[0]*R0_s.shape[0]*w0_s.shape[0]
elapsed=0
beg=time()
output_info_every=25
dump_every=25

for Z in Z_s:
    for R0 in R0_s:
        for w0 in w0_s:
            ID=f"R0_{str(R0)}_w0_{str(w0)}_Z_{str(Z)}"
            if ID not in phase_vigilant['IDs']:
                # simulate matrix
                D_matrix = simulator.compute_pieces_for_I_LP(R0_pixels=R0, Z=Z, w0_pixels=w0)
                
                if D_matrix is None:
                    raise ValueError
                    
                # save the matrix
                try:
                    h5f.create_dataset(ID, data=D_matrix, compression="lzf", shuffle=True) #, compression_opts=9)
                except: # in case the phase_vigilant did not record it, but it was already in h5f
                    print(f"{ID} was already in h5f but not in phase vigilant")
                    h5f[ID][:] = D_matrix

                #append the data
                phase_vigilant['IDs'].append(ID)
                phase_vigilant['R0s'].append(str(R0))
                phase_vigilant['Zs'].append(str(Z))
                phase_vigilant['w0s'].append(str(w0))
                
                
                if i%output_info_every==0:
                    display.clear_output(wait=True)
                    elapsed=time()-beg
                    print(f"["+'#'*(int(100*i/total))+' '*(100-int(100*i/total))+f"] {100*i/total:3.4}% \n\nSimulated: {i}/{total}\nElapsed time: {elapsed//3600} h {elapsed//60-(elapsed//3600)*60} min {elapsed-(elapsed//60)*60-(elapsed//3600)*60:2.4} s")
                    if i%dump_every==0:
                        h5f.flush()
                        # we save the progess (in order to be able to quit and resume)
                        json.dump(phase_vigilant, open( f"{output_directory}/STRUCTURE_Grid_PART_{WORKER_ID}.json", "w"))
            i+=1
            
display.clear_output(wait=True)
elapsed=time()-beg
print(f"["+'#'*(int(100*i/total))+' '*(100-int(100*i/total))+f"] {100*i/total:3.4}% \n\nSimulated: {i}/{total}\nElapsed time: {elapsed//3600} h {elapsed//60-(elapsed//3600)*60} min {elapsed-(elapsed//60)*60-(elapsed//3600)*60:2.4} s")               
h5f.flush()
# we save the progess (in order to be able to quit and resume)
json.dump(phase_vigilant, open( f"{output_directory}/STRUCTURE_Grid_PART_{WORKER_ID}.json", "w"))

h5f.close()
print(f"\n\nWORKER {WORKER_ID} FINISHED!!!")

[#####                                                                                               ] 5.357% 

Simulated: 525/9800
Elapsed time: 1.0 h 49.0 min -58.67 s


KeyboardInterrupt: 

In [5]:
h5f.flush()
h5f.close()

Now the code to join all the parts individually generated!

In [5]:
raise ValueError
# Open the big h5f that will contain all of the parts
h5f = h5py.File(f"{image_directory}/Dataset_R0_{N_R0}_w0_{N_w0}_Z_{N_Z}.h5", 'w') # append if exists, create if not
total_phase = {'R0s':[], 'w0s':[], 'Zs':[], 'IDs':[]}
for j in range(TOTAL_WORKERS):
    h5f_worker = h5py.File(f"{image_directory}/Dataset_PART_{j}.h5", 'r')
    phase_worker = json.load(open(f"{output_directory}/STRUCTURE_Grid_PART_{j}.json"))
    if j==0:
        h5f.create_dataset('phis', data=h5f_worker['phis'][:], compression="lzf", shuffle=True)
    for ID in phase_worker['IDs']:
        h5f.create_dataset(ID, data=h5f_worker[ID][:], compression="lzf", shuffle=True)
    total_phase['R0s'] = total_phase['R0s'] + phase_worker['R0s']
    total_phase['w0s'] = total_phase['w0s'] + phase_worker['w0s']
    total_phase['Zs'] = total_phase['Zs'] + phase_worker['Zs']
    total_phase['IDs'] = total_phase['IDs'] + phase_worker['IDs']
    h5f_worker.close()
    print(f"Worker {j} done!")
json.dump(total_phase, open( f"{output_directory}/STRUCTURE_Grid_R0_{N_R0}_w0_{N_w0}_Z_{N_Z}.json", "w"))
h5f.close()

Worker 0 done!
Worker 1 done!


In [None]:
raise ValueError

In [4]:
h5f.close()

In [None]:
t=time()
h5f = h5py.File(f"{image_directory}/Dataset.h5",'r')
D_mat = h5f[ID][:]
phis = h5f['phis'][:]
#print(list(h5f.keys()))
h5f.close()
print(f"hf5:{time()-t}s size {os.path.getsize(image_directory+'/Dataset.h5')}")   
print(D_mat.dtype, D_mat.shape)

In [None]:
os.path.getsize(image_directory+'/Dataset.h5')/total

In [None]:
raise ValueError

In [3]:
import pickle, gzip, lzma, bz2
import h5py


# Initialize the vigilant
try:
    phase_vigilant = json.load(open(f"{output_directory}/STRUCTURE_Grid.json"))
except:
    phase_vigilant = {'R0s':[], 'w0s':[], 'Zs':[], 'IDs':[], 'rel_path':[]}

# Set the objects ready ##################
# The simulator object
simulator=RingSimulator_Optimizer_GPU( n=1.5, a0=1.0, max_k=max_k, num_k=num_k, nx=resolution_side_nx, sim_chunk_x=sim_chunk_ax, sim_chunk_y=sim_chunk_ax)

# Execute the stuff #####################
i=1
total=Z_s.shape[0]*R0_s.shape[0]*w0_s.shape[0]
elapsed=0
beg=time()
output_info_every=100
dump_every=10000

for Z in Z_s:
    for R0 in R0_s:
        for w0 in w0_s:
            ID=f"R0_{R0}_w0_{w0}_Z_{Z}"
            if ID not in phase_vigilant['IDs']:
                # simulate matrix
                #D_matrix = simulator.compute_D_matrix( R0_pixels=R0, Z=Z, w0_pixels=w0)
                D_matrix = simulator.compute_pieces_for_I_LP(R0_pixels=R0, Z=Z, w0_pixels=w0)
                
                # save the matrix
                rel_path=f"{image_directory}/{ID}.pkl.lzma"
                #np.save( rel_path, D_matrix, allow_pickle=False)
                print(f"\nDUMP {i}")
                t=time()
                pickle.dump(D_matrix, lzma.open(rel_path, 'wb'))
                print(f"lzma:{time()-t}s size {os.path.getsize(rel_path)}")
                t=time()
                pickle.dump(D_matrix, gzip.open(rel_path+".gzip", 'wb'))
                print(f"gzip:{time()-t}s size {os.path.getsize(rel_path+'.gzip')}")
                t=time()
                pickle.dump(D_matrix, bz2.open(rel_path+".bz2", 'wb'))
                print(f"bz2:{time()-t}s size {os.path.getsize(rel_path+'.bz2')}")
                t=time()
                fp = np.memmap(rel_path+".p", dtype='complex64', mode='w+', shape=(3,image_shortest_side,image_shortest_side))
                fp=D_matrix
                print(f"memmap:{time()-t}s size {os.path.getsize(rel_path+'.p')}")
                t=time()
                h5f = h5py.File(rel_path+'.h5', 'w')
                h5f.create_dataset('dataset_1', data=D_matrix, compression="gzip", shuffle=True) #, compression_opts=9)
                h5f.close()
                print(f"hf5 lzf:{time()-t}s size {os.path.getsize(rel_path+'.h5')}")     


                
                print(f"\nLOAD {i}")
                t=time()
                D_mat=pickle.load(lzma.open(rel_path, 'rb'))
                print(f"lzma:{time()-t}s size {os.path.getsize(rel_path)}")
                t=time()
                D_mat=pickle.load(gzip.open(rel_path+".gzip", 'rb'))
                print(f"gzip:{time()-t}s size {os.path.getsize(rel_path+'.gzip')}")
                t=time()
                D_mat=pickle.load( bz2.open(rel_path+".bz2",'rb'))
                #print(D_mat.dtype, D_mat.shape)
                print(f"bz2:{time()-t}s size {os.path.getsize(rel_path+'.bz2')}")     
                t=time()
                D_mat= np.memmap(rel_path+".p", dtype='complex64', mode='r+', shape=(3,image_shortest_side,image_shortest_side))
                print(f"memmap:{time()-t}s size {os.path.getsize(rel_path+'.p')}")     
                t=time()
                h5f = h5py.File(rel_path+'.h5','r')
                D_mat = h5f['dataset_1'][:]
                h5f.close()
                print(f"hf5:{time()-t}s size {os.path.getsize(rel_path+'.h5')}")     
                print(type(D_mat), D_mat.dtype, D_mat.shape)
                print(np.allclose(D_mat, D_matrix))

                

                if D_matrix is None:
                    raise ValueError

                #append the data
                phase_vigilant['IDs'].append(ID)
                phase_vigilant['R0s'].append(float(R0))
                phase_vigilant['Zs'].append(float(Z))
                phase_vigilant['w0s'].append(float(w0))
                phase_vigilant['rel_path'].append(rel_path)
                
                
                if i%output_info_every==0:
                    display.clear_output(wait=True)
                    elapsed=time()-beg
                    print(f"["+'#'*(int(100*i/total))+' '*(100-int(100*i/total))+f"] {100*i/total:3.4}% \n\nSimulated: {i}/{total}\nElapsed time: {elapsed//3600} h {elapsed//60-(elapsed//3600)*60} min {elapsed-(elapsed//60)*60-(elapsed//3600)*60:2.4} s")
                    if i%dump_every==0:
                        # we save the progess (in order to be able to quit and resume)
                        json.dump(phase_vigilant, open( f"{output_directory}/STRUCTURE_Grid.json", "w"))
            i+=1
print("\n\nFINISHED!!!")

NameError: name 'image_directory' is not defined

In [None]:
%%time
for i in range(1000):
    a=np.abs(D_mat)**2

In [None]:
%%time
for i in range(1000):
    a=(D_mat*D_mat.conjugate()).real

In [None]:
%%time
for i in range(1000):
    a=D_mat.real**2+D_mat.imag**2

In [None]:
%%time
for i in range(1000):
    a=D_mat.real**2
    a+=D_mat.imag**2

In [None]:
np.allclose( (D_mat*D_mat.conjugate()).real, D_mat.real**2+D_mat.imag**2, rtol=0)