In [1]:
# 2D CNN

# importing all needed functions
import os
from astropy.io import fits
import numpy as np
import glob
import time
from Tools import tools
import traceback
from pathlib import Path
import pandas as pd
import line_profiler 
from tqdm import tqdm
from line_profiler import profile
%load_ext line_profiler

In [2]:
# reading the ttsplit file
df = pd.read_csv('ttsplit')

# getting the test and train events
test_events = df[df['category'] == 'test']['filename'].tolist()
print(test_events[:10])  

train_events = df[df['category'] == 'train']['filename'].tolist()
print(train_events[:10])  

['TGF_bn190821888', 'SFLARE_bn220520007', 'GRB_bn100224112', 'TGF_bn151209879', 'SGR_bn220114673', 'SFLARE_bn110309971', 'TGF_bn100331421', 'TGF_bn150209374', 'SFLARE_bn130421669', 'SGR_bn160623809']
['SFLARE_bn140204646', 'SFLARE_bn240208118', 'SFLARE_bn240204601', 'TGF_bn221114697', 'SGR_bn211226538', 'TGF_bn100901124', 'GRB_bn150403913', 'TGF_bn170911550', 'TGF_bn101012231', 'SGR_bn160623838']


In [3]:
@profile
def process_folder(folder, bin_list,ti,data_set_path,source_data_set_path,error_folders, plot = False):
    # try:
    event_type,event = folder.split("_")

    mod_folder = folder[:-1] if folder [-1] == 'd' else folder
    # print(mod_folder)

    # check if the file is processed
    data_file_path = Path(data_set_path) / f"{folder}"
    if data_file_path.exists():
        print(f"File already processed: {data_file_path}")
        return

    # Construct file pattern and find matching files
    file_pattern = str(Path(source_data_set_path) / mod_folder / 'current' / '*_tte_*')
    NaI_detector = glob.glob(file_pattern)

    # print(NaI_detector)

    # Read data from FITS file
    with fits.open(NaI_detector[0], memmap=True) as hdul:
        all_count_data = hdul[2].data
        trigtime = float(hdul[2].header['TRIGTIME'])

    # Extract times and channels from all_count_data
    counts = all_count_data['TIME'].astype(np.float64) - trigtime

    # print(counts[:10])
    # print(counts[-10:])

    data_array = []

    for i in bin_list:
        # Define the range and number of bins
        range_min = ti[0]
        range_max = ti[-1]
            
        bin_size = i

        # Create bin edges
        bin_edges = np.arange(range_min, range_max, bin_size)

        # Create the histogram using numpy.histogram
        hist, _ = np.histogram(counts, bins=bin_edges)

        # print(hist[10:])
        # print(hist[-10:])

        data_array = data_array + list(hist)    

    data_array = np.array(data_array)

    # Save the 2D array to a text file
    data = os.path.join(data_set_path,event_type+'_'+event)
    np.savetxt(data, data_array, fmt='%d', delimiter='\t')
    # print(f'saved to {data}')

    # except Exception as e:
    #     print(f'error {e} in ',folder)
    #     traceback.print_exc()
    #     error_folders.append(folder)

In [4]:
if True:
   # name of the data set
   source_data_set_path = r"D:\GRB_data\500_data_set"

   # Get a list of all folders in the specified directory
   folders = [str(folder) for folder in os.listdir(source_data_set_path) if os.path.isdir(os.path.join(source_data_set_path, folder))]

   # list of bin sizes
   bin_list = [0.016,0.064,0.256,1.024,4.096]

   # time interval around trigger
   ti = [-20,100]
   t = ti[1] - ti[0]

   # # number of datapoints in a light curve
   # data_no = int(t / min(bin_list)) 
   # print('number of data point' , data_no)
   
   dir_path = tools.json_path(r'data_path.json')
   data_set_name = "2DCNN_ds2"

   # creating the data set folder
   data_set_path = os.path.join(dir_path,data_set_name)
   tools.create_folder(data_set_path,carefull=True)

   # creating the test and train directories
   test_path = os.path.join(data_set_path, 'test')
   train_path = os.path.join(data_set_path, 'train')
   other_path = os.path.join(data_set_path, 'other')
   tools.create_folder(test_path,carefull=True)
   tools.create_folder(train_path,carefull=True)
   tools.create_folder(other_path,carefull=True)
   print('start')
   print('total : ',len(folders))
   c = 0

   # Measure execution time
   start_time = time.time()

   # Writing the parameters to a json file
   params_dict = {"bin list" : bin_list, "time interval" : ti, "number of data points" : 'n/a', "data set name" : data_set_name, "data set path" : data_set_path}
   write_json_file = tools.write_json_file(params_dict,os.path.join(data_set_path,'params.json'))

   error_folders = []

   profiler = line_profiler.LineProfiler()
   profiler.add_function(process_folder)
   profiler.enable_by_count()

# processing test data
print('saving processed events to ',test_path)
for folder in tqdm(test_events[:], desc="Processing folders", unit="folder"):
   process_folder(folder,bin_list,ti,test_path,source_data_set_path,error_folders)

# processing train data
print('saving processed events to ',train_path)
for folder in tqdm(train_events[:], desc="Processing folders", unit="folder"):
   process_folder(folder,bin_list,ti,train_path,source_data_set_path,error_folders)   
         
profiler.print_stats()
print('\n----------------------------------------------------------------------------\n\nevents', folders, ' in folder', data_set_path)
end_time = time.time()

# Calculate and print the elapsed time
elapsed_time = end_time - start_time
print(f"Elapsed Time: {elapsed_time:.2f} seconds")

print("errors occured in:")
for folder in error_folders:
    print(folder)


Folder 'C:\Users\arpan\OneDrive\Documents\GRB\data\2DCNN_ds2' created successfully.
Folder 'C:\Users\arpan\OneDrive\Documents\GRB\data\2DCNN_ds2\test' created successfully.
Folder 'C:\Users\arpan\OneDrive\Documents\GRB\data\2DCNN_ds2\train' created successfully.
Folder 'C:\Users\arpan\OneDrive\Documents\GRB\data\2DCNN_ds2\other' created successfully.
start
total :  4374
Data has been written to C:\Users\arpan\OneDrive\Documents\GRB\data\2DCNN_ds2\params.json
saving processed events to  C:\Users\arpan\OneDrive\Documents\GRB\data\2DCNN_ds2\test


Processing folders: 100%|██████████| 784/784 [03:34<00:00,  3.66folder/s]


saving processed events to  C:\Users\arpan\OneDrive\Documents\GRB\data\2DCNN_ds2\train


Processing folders: 100%|██████████| 3200/3200 [26:10<00:00,  2.04folder/s]    

Timer unit: 1e-07 s

Total time: 1757.16 s
File: C:\Users\arpan\AppData\Local\Temp\ipykernel_40584\3338671239.py
Function: process_folder at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           @profile
     2                                           def process_folder(folder, bin_list,ti,data_set_path,source_data_set_path,error_folders, plot = False):
     3                                               # try:
     4      3984     148049.0     37.2      0.0      event_type,event = folder.split("_")
     5                                           
     6      3984      58876.0     14.8      0.0      mod_folder = folder[:-1] if folder [-1] == 'd' else folder
     7                                               # print(mod_folder)
     8                                           
     9                                               # check if the file is processed
    10      3984    4919646.0   1234.9      0.0      dat


