### Load packages

In [None]:
import os
import glob
import rasterio
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
from pathlib import Path

#### Create operational functions

>- calculate_vegetation_indices()

In [None]:
def calculate_vegetation_indices(red, green, blue, nir):
    """Calculates various vegetation indices."""
    # NDVI
    ndvi = (nir - red) / (nir + red + 1e-8)
    # GRVI (Green-Red Vegetation Index)
    grvi = (green - red) / (green + red + 1e-8)
    # NDWI (Normalized Difference Water Index) -  NDWI = (Green - NIR) / (Green + NIR)
    ndwi = (green - nir) / (green + nir + 1e-8)
    # EVI (Enhanced Vegetation Index) - EVI = 2.5 * (NIR - Red) / (NIR + 6 * Red - 7.5 * Blue + 1)
    evi = 2.5 * (nir - red) / (nir + 6 * red - 7.5 * blue + 1 + 1e-8)
    return ndvi, grvi, ndwi, evi

>- load_and_extract_patches

In [None]:
def load_and_extract_patches(stage_dir, patch_size=(64, 64)):
    """Loads GeoTIFFs, calculates vegetation indices, and extracts patches."""
    try:
        with rasterio.open(os.path.join(stage_dir, "red.tif")) as src:
            red = src.read(1).astype(np.float32)
        with rasterio.open(os.path.join(stage_dir, "green.tif")) as src:
            green = src.read(1).astype(np.float32)
        with rasterio.open(os.path.join(stage_dir, "blue.tif")) as src:
            blue = src.read(1).astype(np.float32)
        with rasterio.open(os.path.join(stage_dir, "nir.tif")) as src:
            nir = src.read(1).astype(np.float32)

        features = calculate_vegetation_indices(red, green, blue, nir)

        patches = []
        height, width = features.shape[:2]
        for i in range(0, height - patch_size[0] + 1, patch_size[0]):
            for j in range(0, width - patch_size[1] + 1, patch_size[1]):
                patches.append(features[i:i+patch_size[0], j:j+patch_size[1]])

        return patches
    except rasterio.RasterioIOError as e:
        print(f"Error loading image: {e}")
        return []

### Data Process

> 01. Image path setup

In [None]:
try: 
    data_dir = Path('../Data')
    germination_patches = load_and_extract_patches(os.path.join(data_dir, "germination"))
    tillering_patches = load_and_extract_patches(os.path.join(data_dir, "tillering"))
    grand_growth_patches = load_and_extract_patches(os.path.join(data_dir, "grand_growth"))
    ripening_patches = load_and_extract_patches(os.path.join(data_dir, "ripening"))
except Exception as e:
    raise e

> 2. Label Creat

In [None]:
germination_labels = [0] * len(germination_patches)
tillering_labels = [1] * len(tillering_patches)
grand_growth_labels = [2] * len(grand_growth_patches)
ripening_labels = [3] * len(ripening_patches)

> 3. Image and lable combine

In [None]:
all_patches = germination_patches + tillering_patches + grand_growth_patches + ripening_patches
all_labels = germination_labels + tillering_labels + grand_growth_labels + ripening_labels

> 4. Array Conversion

In [None]:
all_patches = np.array(all_patches)
all_labels = np.array(all_labels)


# Handle cases where a stage might have no data
if len(all_patches) == 0:
    raise ValueError("No data loaded. Check your filepaths.")

> 5. CNN Image conversion

In [None]:
all_patches_cnn = all_patches

> 6. Array Conversion

In [None]:
n_samples, patch_height, patch_width, n_features = all_patches.shape
all_patches_flat = all_patches.reshape((n_samples, patch_height * patch_width * n_features)) #Image converted in to flate array

> 7. Data splitting

In [None]:
X_train, X_test, y_train, y_test = train_test_split(all_patches_flat, all_labels, test_size=0.3, random_state=42, stratify=all_labels) #Image flate converted for train the model


# Split data and CNN conversion
X_train_cnn, X_test_cnn, y_train_cnn, y_test_cnn = train_test_split(all_patches_cnn, all_labels, test_size=0.3, random_state=42, stratify=all_labels)

# Data validation and Test Data
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42, stratify=y_test)
X_val_cnn, X_test_cnn, y_val_cnn, y_test_cnn = train_test_split(X_test_cnn, y_test_cnn, test_size=0.5, random_state=42, stratify=y_test_cnn)

> 8. Data scalling and CNN image Normalization

In [None]:
# Ensure the 'data' directory exists
os.makedirs("data", exist_ok=True)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

X_train_cnn = X_train_cnn / 255.0
X_val_cnn = X_val_cnn / 255.0
X_test_cnn = X_test_cnn / 255.0

#Data save
np.save("data/X_train_ml.npy", X_train)
np.save("data/y_train_ml.npy", y_train)
np.save("data/X_val_ml.npy", X_val)
np.save("data/y_val_ml.npy", y_val)
np.save("data/X_test_ml.npy", X_test)
np.save("data/y_test_ml.npy", y_test)

np.save("data/X_train_cnn.npy", X_train_cnn)
np.save("data/y_train_cnn.npy", y_train_cnn)
np.save("data/X_val_cnn.npy", X_val_cnn)
np.save("data/y_val_cnn.npy", y_val_cnn)
np.save("data/X_test_cnn.npy", X_test_cnn)
np.save("data/y_test_cnn.npy", y_test_cnn)

joblib.dump(scaler, 'data/scaler.pkl')