In [12]:
import glob
import pandas as pd
import numpy as np
from google.colab import drive 
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [112]:
# Choose which fields to use. 
def get_col_names(file_path):
  time_space = ['UTC_Start', 'G_LAT', 'G_LONG', 'G_ALT']
  names = pd.read_csv(file_path, skiprows=100, nrows=100).to_numpy()[:,0]
  col_names = time_space
  for item in names:
    if 'CAFS' in item and 'dnwFrac' not in item:  
      col_names.append(item) 
  return col_names

# Get the relevant data.    
def get_data(file_path, col_names):
  pre_rows = int((pd.read_csv(file_path, nrows=1, header=None))[0])
  relevant_data = pd.read_csv(file_path, skiprows=pre_rows-1, index_col=0, parse_dates=[0], usecols=col_names, na_values='-99999.0')
  return relevant_data

# Get the date and times.
def get_date_time(file_path, data):
  date = file_path[-16:-8]
  date = f'{date[0:4]}-{date[4:6]}-{date[6:8]}'
  data['UTC_Start_dt'] = pd.to_datetime(data.index, unit='s', origin=date)
  data.index = data['UTC_Start_dt']
  return data

In [106]:
# Tests on 1 file.
file_path = '/content/drive/MyDrive/Documents/AI4ER/PhD/Photolysis_data/ATom_MER10_Dataset.20210613/MER10_DC8_20160729_R35.ict'

col_names = get_col_names(file_path)
print(len(col_names))
print(col_names)

data = get_data(file_path, col_names)
print(data.shape)
print(data[0:2])

data = get_date_time(file_path, data)
print(data[0:2])

51
['UTC_Start', 'UTC_Stop', 'G_LAT', 'G_LONG', 'G_ALT', 'jO3_O2_O1D_CAFS', 'jNO2_NO_O3P_CAFS', 'jH2O2_OH_OH_CAFS', 'jNO3_NO_O2_CAFS', 'jNO3_NO2_O3P_CAFS', 'jN2O5_NO3_NO2_CAFS', 'jHNO2_OH_NO_CAFS', 'jHNO3_OH_NO2_CAFS', 'jHNO4_HO2_NO2_UvVisOnly_CAFS', 'jCH2O_H_HCO_CAFS', 'jCH2O_H2_CO_CAFS', 'jCH3CHO_CH3_HCO_CAFS', 'jPropanal_CH2CH3_HCO_CAFS', 'jCH3OOH_CH3O_OH_CAFS', 'jMeONO2_CH3O_NO2_CAFS', 'jEthONO2_CH3CH2O_NO2_CAFS', 'jPAN_CH3COO2_NO2_CAFS', 'jPAN_CH3COO_NO3_CAFS', 'jMAC_NoProductsSpecified_CAFS', 'jMVK_NoProductsSpecified_CAFS', 'jAcetone_CH3CO_CH3_CAFS', 'jMEK_CH3CO_CH2CH3_CAFS', 'jHydroxyacetone_CH3CO_CH3O_CAFS', 'jHydroxyacetone_CH3COO_CH3_CAFS', 'jCHOCHO_HCO_HCO_CAFS', 'jCHOCHO_H2_2CO_CAFS', 'jCHOCHO_CH2O_CO_CAFS', 'jCH3COCHO_CH3CO_HCO_CAFS', 'j23Butanedione_NoProductsSpecified_CAFS', 'jCl2_Cl_Cl_CAFS', 'jClO_Cl_O3P_CAFS', 'jClNO2_Cl_NO2_CAFS', 'jClONO_Cl_NO2_CAFS', 'jClONO2_Cl_NO3_CAFS', 'jClONO2_ClO_NO2_CAFS', 'jBr2_Br_Br_CAFS', 'jBrO_Br_O_CAFS', 'jHOBr_OH_Br_CAFS', 'jBrNO_Br_N

In [117]:
# Make a .csv file with all the photolysis data.
folder_path = '/content/drive/MyDrive/Documents/AI4ER/PhD/Photolysis_data/ATom_MER10_Dataset.20210613'
all_files = glob.glob(folder_path + '/*.ict')
all_data = []
for each_file in all_files:
   col_names = get_col_names(each_file) 
   data = get_data(each_file, col_names)
   data = get_date_time(each_file, data)
   all_data.append(data)
all_data = pd.concat(all_data, axis=0)
all_data = all_data.drop(['UTC_Start_dt'], axis=1)
out_path = folder_path + '/photolysis_data.csv'
all_data.to_csv(out_path)