In [None]:
import os
import numpy as np
from osgeo import gdal, gdal_array, ogr
import rasterio as rio
import pandas as pd
import pickle
import cubist
from sklearn.cross_decomposition import PLSRegression
from sklearn.model_selection import train_test_split
import multiprocessing as mp
from sklearn.ensemble import GradientBoostingRegressor

In [2]:
def fod (spectra):
    fo_spec = spectra.iloc[:,:]
    (row,col) = fo_spec.shape
    
    for i in range(0, col):
        if i==col-1:
            fo_spec.iloc[:,i] = fo_spec.iloc[:,i-1]
        else:    
            fo_spec.iloc[:,i] = (spectra.iloc[:,i+1]- spectra.iloc[:,i])
    return fo_spec

def worker(arr):
    return model.predict(arr)

def make_parallel_predictions(model, X):
    num_processes = mp.cpu_count()
    chunk_size = int(X.shape[0] / num_processes)
    chunks = [X[i:i + chunk_size] for i in range(0, X.shape[0], chunk_size)]

    with mp.Pool(processes=num_processes) as pool:
        results = pool.map(worker, chunks)

    y_pred = np.concatenate(results, axis=0)
    return y_pred

In [3]:
## VENuS Image
# fname = './VE_VM03_VSC_L2VALD_ISRAELWI_20220824/VE_VM03_VSC_L2VALD_ISRAELWI_20220824.DBL.DIR/VE_VM03_VSC_PDTIMG_L2VALD_ISRAELWI_20220824_SRE.DBL.TIF'
fname = 'VE_VM03_VSC_L2VALD_ISRAELWI_20220824_Clip.tif'

src = rio.open(fname)
metadata = src.meta
num_bands = src.count

venus_image = []

for i in range(num_bands):
    image = src.read(i + 1)
    image = image.astype(float)
    image[image < 0] = np.nan
    image = image / 1000
    image[image < 0] = np.nan
    venus_image.append(image.flatten())

venus_image = np.array(venus_image).T

In [4]:
venus_df = pd.DataFrame(venus_image)
venus_df.columns = ['420', '443', '490', '555', '619', '638', '672', '702', '742', '782', '865', '910']
venus_df.drop('619', axis=1, inplace=True)

In [5]:
# 1. Remove NaN values from the dataframe
nan_indices = venus_df.index[venus_df.isna().any(axis=1)]
venus_df_cleaned = venus_df.dropna()

In [6]:
soil_property = input('Which soil property do you want to predict (caco3/clay/toc/silt/sand) - ')

Which soil property do you want to predict (caco3/clay/toc/silt/sand) -  clay


In [19]:
if soil_property == 'caco3':
    # CaCO3
    dumpName = './models/CaCO3_model_cubist.pkl'
    process1 = venus_df_cleaned.apply(np.log)
    process2 = fod(process1)
    process2 = process2.replace(-np.inf, 0)
    x_pred = process2
    
elif soil_property == 'clay':
    # Clay Model - PLSR
    dumpName = './models/clay_model_cubist.pkl'
    # process1 = venus_df_cleaned.apply(np.log)
    # process2 = process1
    process2 = fod(venus_df_cleaned)
    process2 = process2.replace(-np.inf, 0)
    x_pred = process2
    print(f'Model loaded for {soil_property}')
    
elif soil_property == 'toc':
    # TOC Model - Cubist 
    dumpName = './models/TOC_model_cubist.pkl'
    process1 = venus_df_cleaned.apply(np.log)
    process2 = fod(process1)
    process2 = process2.replace(-np.inf, 0)
    x_pred = process2

elif soil_property == 'silt':
    # Silt Model - GBRT 
    dumpName = './models/silt_model_gbrt.pkl'
    process2 = fod(venus_df_cleaned)
    process2 = process2.replace(-np.inf, 0)
    x_pred = process2

elif soil_property == 'sand':
    # Sand Model - GBRT 
    dumpName = './models/sand_model_gbrt.pkl'
    process2 = fod(venus_df_cleaned)
    process2 = process2.replace(-np.inf, 0)
    x_pred = process2


Model loaded for clay


In [20]:
# 2. Make predictions on the cleaned dataframe
model = pickle.load(open(dumpName, 'rb'))
y_pred = model.predict(x_pred)

  x = x.applymap(lambda a: a.lstrip())


In [21]:
# 3. Reinsert the NaN values in the predicted outcomes
y_pred_with_nans = np.full(len(venus_df), np.nan)
y_pred_with_nans[~venus_df.index.isin(nan_indices)] = y_pred
y_pred = y_pred_with_nans

In [22]:
y_pred = y_pred.flatten()
predicted_image = y_pred.reshape(metadata['height'], metadata['width'])
predicted_image = np.where(np.isnan(predicted_image), 0, predicted_image)
predicted_image = np.expand_dims(predicted_image, axis=0)
model_name = dumpName.split('.pkl')[0].split('_')[-1]
output_path = f'./results/20220824_Clip_Predicted_{model_name}_{soil_property}.tif'
output_image = rio.open(output_path, 'w', driver = 'GTiff', dtype = 'float32', crs = src.crs, width=metadata['width'], height=metadata['height'], count=1, transform = src.transform)
output_image.write(predicted_image)
output_image.close()

print("Predicted results saved :", output_path)

Predicted results saved : ./results/20220824_Clip_Predicted_cubist_clay.tif
