# Import log files into panda dataframes

In [2]:
import pandas as pd
from glob import glob
import os


# List all .dat files in the current directory
# CV lab
# dat_file_root = "/home/joy0921/Desktop/2023S/Dataset/SNfeedback"

# Compute2.idsl
# dat_file_root = "/home/joy0921/Desktop/Dataset/SNfeedback"

# Mac
dat_file_root = "SNfeedback"

dat_files = glob(os.path.join(dat_file_root, "*.dat"))

# Initialize an empty DataFrame
all_data = pd.DataFrame()

# Read and concatenate data from all .dat files
for dat_file in dat_files:
    # Assuming space-separated values in the .dat files
    df = pd.read_csv(dat_file, delim_whitespace=True, header=None,
                     names=['n_SN', 'type', 'n_timestep', 'n_tracer', 'time',
                            'posx', 'posy', 'posz', 'radius', 'mass'])
    
    # Convert the columns to numerical
    df = df.iloc[1:]
    df['n_SN'] = df['n_SN'].map(int)
    df['type'] = df['type'].map(int)
    df['n_timestep'] = df['n_timestep'].map(int)
    df['n_tracer'] = df['n_tracer'].map(int)
    df['time'] = pd.to_numeric(df['time'],errors='coerce')
    df['posx'] = pd.to_numeric(df['posx'],errors='coerce')
    df['posy'] = pd.to_numeric(df['posy'],errors='coerce')
    df['posz'] = pd.to_numeric(df['posz'],errors='coerce')
    df['radius'] = pd.to_numeric(df['radius'],errors='coerce')
    df['mass'] = pd.to_numeric(df['mass'],errors='coerce')
    all_data = pd.concat([all_data, df], ignore_index=True)

all_data.head()


Unnamed: 0,n_SN,type,n_timestep,n_tracer,time,posx,posy,posz,radius,mass
0,1,2,12,0,2869811000000.0,9.763277e+20,1.530785e+21,-1.072755e+21,1.243486e+20,4.098446e+35
1,2,1,20,0,4796885000000.0,-1.145076e+21,-1.434358e+21,-1.205343e+19,3.511398e+19,9.042147e+35
2,3,2,26,0,5738623000000.0,3.977631e+20,-1.33793e+21,4.700837e+20,4.27845e+19,4.298436e+35
3,4,2,34,0,8607434000000.0,-1.120969e+21,1.482572e+21,-1.084808e+20,3.511398e+19,8.470545999999999e+35
4,5,1,40,0,9592771000000.0,1.434358e+21,-8.557934e+20,2.205777e+21,3.085678e+20,4.513983e+33


In [4]:
# convert seconds to Megayears
def seconds_to_megayears(seconds):
    return seconds / (1e6 * 365 * 24 * 3600)

# Convert pixel value to pc
def pixel2pc(pixel):
    return (pixel * 10) / 8

def cm2pc(cm):
    return cm * 3.24077929e-19

# filter the DataFrame
def filter_data(df, range_coord):
    return df[(df['posx_pc'] > range_coord[0]) & (df['posx_pc'] < range_coord[0] + range_coord[2]) & (df['posy_pc'] > range_coord[1]) & (df['posy_pc'] < range_coord[1] + range_coord[3]) & (df['posz_pc'] > range_coord[4] & (df['posz_pc'] < range_coord[5]))]


# Convert time to Megayears
all_data['time_Myr'] = seconds_to_megayears(all_data['time'])

# Convert 'pos' from centimeters to parsecs
all_data['posx_pc'] = cm2pc(all_data['posx'])
all_data['posy_pc'] = cm2pc(all_data['posy'])
all_data['posz_pc'] = cm2pc(all_data['posz'])

# Sort the DataFrame by time in ascending order
all_data.sort_values(by='time_Myr', inplace=True)

# convert the cube range to pc
low_x0, low_y0, low_w, low_h, bottom_z, top_z = 386,456, 350, 343, -400, 200
low_x0, low_y0, low_w, low_h = pixel2pc(low_x0) - 500, pixel2pc(low_y0) - 500, pixel2pc(low_w), pixel2pc(low_h)

In [9]:
low_x0, low_y0, low_w, low_h, bottom_z, top_z = 386,456, 350, 343, -400, 200
low_x0, low_y0, low_w, low_h = pixel2pc(low_x0) - 500, pixel2pc(low_y0) - 500, pixel2pc(low_w), pixel2pc(low_h)

In [5]:
all_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 96771 entries, 0 to 38215
Data columns (total 14 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   n_SN        96771 non-null  int64  
 1   type        96771 non-null  int64  
 2   n_timestep  96771 non-null  int64  
 3   n_tracer    96771 non-null  int64  
 4   time        96771 non-null  float64
 5   posx        96771 non-null  float64
 6   posy        96771 non-null  float64
 7   posz        96771 non-null  float64
 8   radius      96771 non-null  float64
 9   mass        96771 non-null  float64
 10  time_Myr    96771 non-null  float64
 11  posx_pc     96771 non-null  float64
 12  posy_pc     96771 non-null  float64
 13  posz_pc     96771 non-null  float64
dtypes: float64(10), int64(4)
memory usage: 11.1 MB


In [10]:
low_x0, low_y0, low_w, low_h, bottom_z, top_z

(-17.5, 70.0, 437.5, 428.75, -400, 200)

In [11]:
start_yr = 199
end_yr = 200

# Filter data based on specified conditions
filtered_data = filter_data(all_data[(all_data['time_Myr'] >= start_yr) & (all_data['time_Myr'] <= end_yr)], (low_x0, low_y0, low_w, low_h, bottom_z, top_z))
# filtered_data = filter_data(all_data[(all_data['time'] >= start_yr) & (all_data['time'] <= end_yr)], (-50, 50, 500, 500))


# Print the resulting DataFrame
print(filtered_data)

       n_SN  type  n_timestep  n_tracer          time          posx  \
32585  6493     1       50361         0  6.287407e+15  4.158433e+20   
90850  6489     1       50353      1995  6.287407e+15  4.158433e+20   
52998  6493     1       50361         0  6.287407e+15  4.158433e+20   
45032  6493     1       50361         0  6.287407e+15  4.158433e+20   
76964  6493     1       50361         0  6.287407e+15  4.158433e+20   
20617  6493     1       50361         0  6.287407e+15  4.158433e+20   
6492   6493     1       50361         0  6.287407e+15  4.158433e+20   
54761  6489     1       50353      1995  6.287407e+15  4.158433e+20   
89087  6493     1       50361         0  6.287407e+15  4.158433e+20   
8255   6489     1       50353      1995  6.287407e+15  4.158433e+20   
67123  6493     1       50361         0  6.287407e+15  4.158433e+20   
68886  6489     1       50353      1995  6.287407e+15  4.158433e+20   
22380  6489     1       50353      1995  6.287407e+15  4.158433e+20   
68892 

In [12]:
filtered_data.drop_duplicates(subset=['posx_pc', 'n_tracer', 'n_SN'], keep='last')
filtered_data

Unnamed: 0,n_SN,type,n_timestep,n_tracer,time,posx,posy,posz,radius,mass,time_Myr,posx_pc,posy_pc,posz_pc
32585,6493,1,50361,0,6287407000000000.0,4.158433e+20,1.343957e+21,2.260018e+21,3.085678e+20,2.6267069999999997e+35,199.372362,134.765626,435.546866,732.421888
90850,6489,1,50353,1995,6287407000000000.0,4.158433e+20,1.343957e+21,2.260018e+21,3.085678e+20,1.7791e+35,199.372362,134.765626,435.546866,732.421888
52998,6493,1,50361,0,6287407000000000.0,4.158433e+20,1.343957e+21,2.260018e+21,3.085678e+20,2.6267069999999997e+35,199.372362,134.765626,435.546866,732.421888
45032,6493,1,50361,0,6287407000000000.0,4.158433e+20,1.343957e+21,2.260018e+21,3.085678e+20,2.6267069999999997e+35,199.372362,134.765626,435.546866,732.421888
76964,6493,1,50361,0,6287407000000000.0,4.158433e+20,1.343957e+21,2.260018e+21,3.085678e+20,2.6267069999999997e+35,199.372362,134.765626,435.546866,732.421888
20617,6493,1,50361,0,6287407000000000.0,4.158433e+20,1.343957e+21,2.260018e+21,3.085678e+20,2.6267069999999997e+35,199.372362,134.765626,435.546866,732.421888
6492,6493,1,50361,0,6287407000000000.0,4.158433e+20,1.343957e+21,2.260018e+21,3.085678e+20,2.6267069999999997e+35,199.372362,134.765626,435.546866,732.421888
54761,6489,1,50353,1995,6287407000000000.0,4.158433e+20,1.343957e+21,2.260018e+21,3.085678e+20,1.7791e+35,199.372362,134.765626,435.546866,732.421888
89087,6493,1,50361,0,6287407000000000.0,4.158433e+20,1.343957e+21,2.260018e+21,3.085678e+20,2.6267069999999997e+35,199.372362,134.765626,435.546866,732.421888
8255,6489,1,50353,1995,6287407000000000.0,4.158433e+20,1.343957e+21,2.260018e+21,3.085678e+20,1.7791e+35,199.372362,134.765626,435.546866,732.421888


In [None]:
filtered_data.drop(columns=['posx', 'posy', 'posz', 'time'], inplace=True)
filtered_data.to_csv('SNfeedback_185_200.txt', sep='\t', index=False, encoding='utf-8')