# Classify the GeoTif data from WorldClim

## Initialisation and setup

In [9]:
# System imports
import os
import shutil

In [5]:
# Some print utilities to make output more digestible
from utilities.printUtils import cPrint, hPrint
from utilities.printUtils import AnsiColours as Cols

In [25]:
# Data Utilities
from data.WCDownloader import downloadData, extractData
from data.GeoTiff import readGeoData, GeoDB

def downloadFileList(files: list[str], resolution:str, dataPath: str = "./data") -> None:
    """
    Checks for and downloads any necessary files from the list.

    The files will be downloaded and extracted into directories in the data directory with their respective name and resolution.

    The file list should simply contain the WC variable names.
    For example:
        `files = ['tavg','prec']`

    Args:
        files (list[str]): The list of files.
        resolution (str): The resolution of the files.
        dataPath (str, optional): The data directory. Defaults to "./data".
    """

    cPrint(f"Checking for required files.", Cols.BLUE)

    # Check for the directory
    if not os.path.isdir(dataPath):
        cPrint("Creating data directory.", Cols.YELLOW)
        os.makedirs(dataPath)

    # Check for the files
    for file in files:
        if not os.path.isdir(f"{dataPath}/{file}_{resolution}"):
            cPrint(
                f"{file}_{resolution} directory not found. Checking for zip file.", Cols.YELLOW)

            # Check for the zip file
            if not os.path.isfile(f"{dataPath}/{file}_{resolution}.zip"):
                cPrint(
                    f"{file}_{resolution}.zip not found. Downloading.", Cols.YELLOW)
                downloadData(file, resolution, dataPath)
                cPrint(f"{file}_{resolution}.zip downloaded.",
                       Cols.GREEN)

            # Extract the zip file
            extractData(file, resolution, dataPath)
            cPrint(f"{file}_{resolution}.zip extracted.", Cols.GREEN)
        else:
            cPrint(f"{file}_{resolution} directory found.", Cols.GREEN)

def removeDirs(files: list[str], resolution:str, dataPath: str = "./data") -> None:
    """
    Removes the directories for the given files and resolution.
    The .zip files will not be removed.

    Args:
        files (list[str]): The list of files.
        resolution (str): The resolution of the files.
        dataPath (str, optional): The data directory. Defaults to "./data".
    """

    cPrint("Removing directories.", Cols.BLUE)

    for file in files:
        if os.path.isdir(f"{dataPath}/{file}_{resolution}"):
            try:
                cPrint(f"Removing {file}_{resolution} directory.", Cols.YELLOW)
                shutil.rmtree(f"{dataPath}/{file}_{resolution}")
            except:
                cPrint(f"Failed to remove {file}_{resolution} directory.", Cols.RED)
        else:
            cPrint(f"{file}_{resolution} directory not found.", Cols.YELLOW)


ImportError: cannot import name 'GeoDB' from 'data.GeoTiff' (/home/dylan/Documents/Programming/Projects/TerrainGeneration/data/GeoTiff.py)

Initialise the constants needed to perform the analysis.

In [7]:
REQUIRED_VARS = ["bio", "tavg", "prec"]
CHUNKS = 100
NUM_THREADS = 3
RESOLUTION = "5m"
DATA_PATH = "./data"

## Data loading

In [26]:
# Connect to the Geo database
geoDB = GeoDB(f"{DATA_PATH}/geotiffs.db")

NameError: name 'GeoDB' is not defined

In [15]:
# Download the required files if they do not exist
downloadFileList(REQUIRED_VARS, RESOLUTION, DATA_PATH)

# flatten and combine the data into the database
for file in REQUIRED_VARS:
    meta, base = readGeoData(f'{DATA_PATH}/{file}_{RESOLUTION}/{file}_{RESOLUTION}.tif')
"""
meta, base = geoData.get(list(geoData.keys())[0])  # type: ignore
    data["lat"] = np.repeat(base.index.values, base.shape[1])
    data["lon"] = np.tile(base.columns.values, base.shape[0])
    data["classification"] = 0  # classification values

    # Thus we need to flatten the tavg and prec data into columns
    for col in tqdm(geoData.keys(), desc="Flattening data", unit="files"):
        data[col] = geoData[col][1].values.flatten()
"""

#  remove the data directories
removeDirs(REQUIRED_VARS, RESOLUTION, DATA_PATH)


[94mChecking for required files.[0m
[93mbio_5m directory not found. Checking for zip file.[0m
[92mbio_5m.zip extracted.[0m
[93mtavg_5m directory not found. Checking for zip file.[0m
[92mtavg_5m.zip extracted.[0m
[93mprec_5m directory not found. Checking for zip file.[0m
[92mprec_5m.zip extracted.[0m
[94mRemoving directories.[0m
[93mRemoving bio_5m directory.[0m
[93mRemoving tavg_5m directory.[0m
[93mRemoving prec_5m directory.[0m


In [None]:
# Convert the classification data to a numpy array (like a raster)
"""
# Convert the classification back to a raster
classification = classification.pivot(index="lat", columns="lon", values="classification")

# TODO: Figure out why we need to reverse the latitudes
# We may be able to correct this when we create the lat column in the first place
classification = classification.iloc[::-1]
classArr = classification.to_numpy()
"""
