## Legacy Code Maintainence

### Streamfunction

In [None]:
def UNITEST_streamfunction_global(fname):
    dataset = Dataset(os.path.join(path_ERA5, fname))
    pressure = dataset["plev"][::-1]
    thickness = np.diff(np.insert(pressure, 0, 0))
    lat = dataset["lat"][:]
    v_div = dataset["v"][:, ::-1, :, -1]
    tmp = np.insert(v_div, 0, 0, axis=1)
    v_div_interp = (tmp[:, :-1, :] + tmp[:, 1:, :]) / 2
    weighting = 2 * np.pi * 6.371e6 * np.cos(np.deg2rad(lat)) / 9.81
    streamfunction = np.swapaxes(v_div_interp, 1, -1) * thickness
    streamfunction = np.cumsum(streamfunction, axis=-1)
    streamfunction = np.swapaxes(streamfunction, -1, 1) * weighting
    streamfunction_convolution = moving_average(streamfunction, axis=0)
    return streamfunction, streamfunction_convolution


def UNITEST_streamfunction_regional(fname):
    dataset = nc.Dataset(os.path.join(path_ERA5, fname))
    pressure = dataset["plev"][::-1]
    thickness = np.diff(np.insert(pressure, 0, 0))
    lon = dataset["lon"][:]
    lat = dataset["lat"][:]
    v_div = dataset["v"][:, ::-1, :, :]
    tmp = np.mean(v_div, axis=-1)
    tmp = np.insert(tmp, 0, 0, axis=1)
    v_div_interp = (tmp[:, :-1, :] + tmp[:, 1:, :]) / 2
    weighting = np.deg2rad(lon[-1] - lon[0]) * 6.371e6 * np.cos(np.deg2rad(lat)) / 9.81
    streamfunction = np.swapaxes(v_div_interp, 1, -1) * thickness
    streamfunction = np.cumsum(streamfunction, axis=-1)
    streamfunction = np.swapaxes(streamfunction, -1, 1) * weighting
    streamfunction_convolution = moving_average(streamfunction, axis=0)

    return streamfunction, streamfunction_convolution

### Tropical Depression Occurrence

In [None]:
def calculate_occurrence(filepath: str) -> tuple[np.ndarray, np.ndarray]:
    from utils import moving_average, split_dimension
    from pandas import read_csv

    dataframe = read_csv(filepath, sep="\t", on_bad_lines="skip", header=None)
    dataframe.columns = ["Year", "Month", "Day", "Occurrence", "Time"]
    dataframe = dataframe.astype(int)

    occurrence_raw = dataframe["Occurrence"].to_numpy()
    occurrence_smoothed = moving_average(occurrence_raw, axis=0)

    occurrence_raw = split_dimension(occurrence_raw, axis=0)
    occurrence_smoothed = split_dimension(occurrence_smoothed, axis=0)

    return occurrence_raw, occurrence_smoothed


### (Equivalent) Potential Temperature

In [None]:
def calculate_potential_temperature(
    filepath: str,
) -> tuple[np.ndarray, np.ndarray, dict[str, np.ndarray]]:
    """
    Calculate the potential temperature for a specific geographical region using latitude and longitude boundaries.

    This function slices data to focus on a region defined by `INDIAN_MASK` and calculates the potential temperature.
    Assumes 43 years of data with 365 days per year.

    Parameters:
    ----------
    filepath: str
        Path to the NetCDF file containing potential temperature and dimensional data.

    Returns:
    -------
    tuple[np.ndarray, np.ndarray, dict[str, np.ndarray]]:
        - potential_temperature: np.ndarray
            The extracted and processed potential temperature data.
        - potential_temperature_smoothed: np.ndarray
            Smoothed version of the potential temperature.
        - dims: dict[str, np.ndarray]
            Dictionary containing dimension data (time, lat, lon, plev).
    """

    # Local imports
    from utils import moving_average, split_dimension
    from constants import INDIAN_MASK

    # Open the dataset and extract dimensions
    with Dataset(filepath) as dataset:
        dims = {dim: dataset[dim][:] for dim in dataset["pt"].dimensions}
        data_slice = [slice(None)] * len(dims)  # Initialize data slice for indexing

        # Slice latitude and longitude based on the INDIAN_MASK region
        for idx, dim in enumerate(dataset["pt"].dimensions):
            if dim == "time" or dim == "plev":
                continue  # Skip time and pressure dimensions
            elif dim == "lat":
                # Slice latitude based on the INDIAN_MASK region
                data_slice[idx] = (dims[dim] <= INDIAN_MASK.LATITUDE_NORTH) & (
                    dims[dim] >= INDIAN_MASK.LATITUDE_SOUTH
                )
                dims[dim] = dims[dim][data_slice[idx]]
            elif dim == "lon":
                # Slice longitude based on the INDIAN_MASK region
                data_slice[idx] = (dims[dim] <= INDIAN_MASK.LONGITUDE_EAST) & (
                    dims[dim] >= INDIAN_MASK.LONGITUDE_WEST
                )
                dims[dim] = dims[dim][data_slice[idx]]

        # Extract the potential temperature data with the applied slices
        potential_temperature = dataset["pt"][tuple(data_slice)]
    # Average over the longitude axis (axis 3)
    potential_temperature = np.mean(potential_temperature, axis=3)

    # Apply moving average smoothing over the time axis (axis 0)
    potential_temperature_smoothed = moving_average(potential_temperature, axis=0)

    # Split the potential temperature and smoothed data over time (axis 0)
    potential_temperature = split_dimension(potential_temperature, axis=0)
    potential_temperature_smoothed = split_dimension(
        potential_temperature_smoothed, axis=0
    )

    # Convert pressure levels from Pa to hPa
    dims["plev"] /= 100

    # Return the potential temperature, its smoothed version, and the dimension data
    return (
        potential_temperature,
        potential_temperature_smoothed,
        dims,
    )

In [None]:
def calculate_equivalent_potential_temperature(
    filepath: str,
) -> tuple[np.ndarray, np.ndarray, dict[str, np.ndarray]]:
    """
    Calculate the equivalent potential temperature for a specific geographical region using latitude and longitude boundaries.

    This function slices data to focus on a region defined by `INDIAN_MASK` and calculates the equivalent potential temperature.
    Assumes 43 years of data with 365 days per year.

    Parameters:
    ----------
    filepath: str
        Path to the NetCDF file containing equivalent potential temperature and dimensional data.

    Returns:
    -------
    tuple[np.ndarray, np.ndarray, dict[str, np.ndarray]]:
        - equivalent_potential_temperature: np.ndarray
            The extracted and processed equivalent potential temperature data.
        - equivalent_potential_temperature_smoothed: np.ndarray
            Smoothed version of the equivalent potential temperature.
        - dims: dict[str, np.ndarray]
            Dictionary containing dimension data (time, lat, lon, plev).
    """

    # Local imports
    from utils import moving_average, split_dimension
    from constants import INDIAN_MASK

    # Open the dataset and extract dimensions
    with Dataset(filepath) as dataset:
        dims = {dim: dataset[dim][:] for dim in dataset["ept"].dimensions}
        data_slice = [slice(None)] * len(dims)  # Initialize data slice for indexing

        # Slice latitude and longitude based on the INDIAN_MASK region
        for idx, dim in enumerate(dataset["ept"].dimensions):
            if dim == "time" or dim == "plev":
                continue  # Skip time and pressure dimensions
            elif dim == "lat":
                # Slice latitude based on the INDIAN_MASK region
                data_slice[idx] = (dims[dim] <= INDIAN_MASK.LATITUDE_NORTH) & (
                    dims[dim] >= INDIAN_MASK.LATITUDE_SOUTH
                )
                dims[dim] = dims[dim][data_slice[idx]]
            elif dim == "lon":
                # Slice longitude based on the INDIAN_MASK region
                data_slice[idx] = (dims[dim] <= INDIAN_MASK.LONGITUDE_EAST) & (
                    dims[dim] >= INDIAN_MASK.LONGITUDE_WEST
                )
                dims[dim] = dims[dim][data_slice[idx]]

        # Extract the equivalent potential temperature data with the applied slices
        equivalent_potential_temperature = dataset["ept"][tuple(data_slice)]

    # Average over the longitude axis (axis 3)
    equivalent_potential_temperature = np.mean(equivalent_potential_temperature, axis=3)

    # Apply moving average smoothing over the time axis (axis 0)
    equivalent_potential_temperature_smoothed = moving_average(
        equivalent_potential_temperature, axis=0
    )

    # Split the equivalent potential temperature and smoothed data over time (axis 0)
    equivalent_potential_temperature = split_dimension(
        equivalent_potential_temperature, axis=0
    )
    equivalent_potential_temperature_smoothed = split_dimension(
        equivalent_potential_temperature_smoothed, axis=0
    )

    # Convert pressure levels from Pa to hPa
    dims["plev"] /= 100

    # Return the equivalent potential temperature, its smoothed version, and the dimension data
    return (
        equivalent_potential_temperature,
        equivalent_potential_temperature_smoothed,
        dims,
    )