In [301]:
import os
import re
import random
import math
import xarray as xa
import numpy as np

In [302]:
def make_directory(base_path, directory_name):
    path = os.path.join(base_path, directory_name)
    if os.path.exists(path) == False:
        os.mkdir(path)
        return True
    return False

def scrape_directory_name(filename):
    #name=f"S2A_20160724_135032_27XVB_B{random.randint(0,9)}{random.randint(0,9)}{random.randint(0,9)}{number}.tif"
    return filename[0:21]

#get all file names from a directory
#TODO: rename it to list directory nc files
def list_directory_files(path):
    dir_list = os.listdir(path)
    files = [f for f in dir_list if os.path.isfile(path+'/'+f)]
    files = [f for f in files if f[len(f)-3:] == ".nc"]
    return files

def extract_variable_name(filename):
    pattern = "[a-zA-Z]_flash[a-zA-Z_]+"
    match = re.findall(pattern, filename)
    return match[0][2:]

In [303]:
def open_file(input_path):
    return xa.open_dataset(input_file_path, engine="netcdf4", decode_coords='all', decode_times=False)

def copy_lat_lon_data(file, var_name1, var_name2, var_name3, number=0):
    lat = file[var_name1].data.copy()
    lon = file[var_name2].data.copy()
    data = file[var_name3].data[number].copy()
    return lat, lon, data

def make_new_xarray(lat, lon, data):
    file = xa.Dataset(
        {
            "flash_extent": (["longitude", "latitude"], data),
        },
        coords={
            "longitude": (["longitude"], lon),
            "latitude": (["latitude"], lat),
        },
    )
    return file

def generate_cog(file, variable_name, latitude, longitude, number, filename, base_path, for_terracotta):
    file = file[variable_name]
    file = file.transpose(latitude, longitude)
    file.rio.set_spatial_dims(x_dim=longitude, y_dim=latitude, inplace=True)
    file.rio.set_crs('epsg:4326')
    directory_name = scrape_directory_name(filename)
    make_directory(base_path, directory_name)
    file_name = base_path + directory_name + '/' + str(number)
    print(file_name)
    file.rio.to_raster(f"{file_name}.tif", driver='COG')
    file.to_netcdf(f"{file_name}.nc")
    
def delete_row_col(lat, lon, data):
    #finding row with all zeroes    
    zero_row = np.where(np.all(data == 0, axis=1))[0]
    #removing lon with all zeroes
    lon = np.delete(lon, zero_row)
    # Find rows with non-zero elements
    non_zero_rows = np.any(data != 0, axis=1)
    # Filter the array based on non-zero rows
    data = data[non_zero_rows]

    #finding columns with all zeroes
    zero_columns = np.where(np.all(data == 0, axis=0))[0]
    #removing lat with all zeroes
    lat = np.delete(lat, zero_columns)
    # Find rows with non-zero elements
    non_zero_rows = np.any(data != 0, axis=1)
    # Find columns with non-zero elements
    non_zero_columns = np.any(data != 0, axis=0)
    # Filter the array based on non-zero rows and columns
    data = data[non_zero_rows][:, non_zero_columns]
    return lat, lon, data

def flip_lat_data(lat, lon, data):
    lat = lat[::-1]
    data = np.flip(data, axis=1)
    return lat, lon, data

In [304]:
def nalma(base_path, filename):
    input_file_path = f"{base_path}{filename}"
    variable_name = extract_variable_name(filename)
    lat='latitude'
    lon='longitude'
    return input_file_path, variable_name, lat, lon

def wtlma(base_path, filename):
    input_file_path = f"{base_path}{filename}"
    variable_name = extract_variable_name(filename)
    lat='latitude'
    lon='longitude'
    return input_file_path, variable_name, lat, lon

def trmm_lis(base_path, filename):
    input_file_path = f'{base_path}{filename}'
    variable_name = "VHRFC_LIS_FRD"
    lat="Latitude"
    lon="Longitude"
    return input_file_path, variable_name, lat, lon

In [305]:
files = list_directory_files(base_path)

In [306]:
files

['NALMA_20230629_235000_600_10src_0.0109deg-dx_flash_extent1.nc',
 'WTLMA_20170531_235000_600_10src_0.0108deg-dx_flash_extent.nc']

In [307]:
def connector(base_path, file_name_type="regular"):
    for_terracotta = True if file_name_type == "terracotta_suitable" else False
    file_names = list_directory_files(base_path)
    for filename in file_names:
        print(f">>> fileName: {filename}", end="\n")
        input_file_path, variable_name, lat_var, lon_var = nalma(base_path, filename)
        file = open_file(input_file_path)
        total_data = len(file[variable_name].data)
        for i in range(total_data):
            file = open_file(input_file_path)
            lat, lon, data = copy_lat_lon_data(file, lat_var, lon_var, variable_name, i)
            print(data.shape, end=" --> ")
            lat, lon, data = delete_row_col(lat, lon, data)
            print(data.shape)
            lat, lon, data = flip_lat_data(lat, lon, data)
            file = make_new_xarray(lat, lon ,data)
            print(f"Latitude: {file.latitude.shape}, Longitude: {file.longitude.shape}, Data: {file[variable_name].data.shape}")
            generate_cog(file, variable_name, lat_var, lon_var, i, filename, base_path, for_terracotta)
            print("----------------------------------")
        print("\n")

In [308]:
file_name_type = "terracotta_suitable"
for_terracotta = True if file_name_type == "terracotta_suitable" else False

print(for_terracotta)

True


In [309]:
#constant variables
base_path = "/home/asubedi/test_cog/"

In [310]:
connector(base_path)

>>> fileName: NALMA_20230629_235000_600_10src_0.0109deg-dx_flash_extent1.nc
(400, 400) --> (6, 11)
Latitude: (11,), Longitude: (6,), Data: (6, 11)
/home/asubedi/test_cog/NALMA_20230629_235000/0
----------------------------------
(400, 400) --> (4, 5)
Latitude: (5,), Longitude: (4,), Data: (4, 5)
/home/asubedi/test_cog/NALMA_20230629_235000/1
----------------------------------
(400, 400) --> (23, 28)
Latitude: (28,), Longitude: (23,), Data: (23, 28)
/home/asubedi/test_cog/NALMA_20230629_235000/2
----------------------------------
(400, 400) --> (19, 21)
Latitude: (21,), Longitude: (19,), Data: (19, 21)
/home/asubedi/test_cog/NALMA_20230629_235000/3
----------------------------------
(400, 400) --> (34, 53)
Latitude: (53,), Longitude: (34,), Data: (34, 53)
/home/asubedi/test_cog/NALMA_20230629_235000/4
----------------------------------
(400, 400) --> (27, 18)
Latitude: (18,), Longitude: (27,), Data: (27, 18)
/home/asubedi/test_cog/NALMA_20230629_235000/5
--------------------------------