In [1]:
import os

import json
import joblib
import glob
import shutil
import numpy as np
import pandas as pd
from pathlib import Path
import tensorflow as tf
print(f'tensorflow version : {tf.version.VERSION}')
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras.models import Sequential, Model
from sklearn.model_selection import train_test_split
from datetime import datetime
from sklearn.metrics import *

tensorflow version : 2.9.1


In [2]:
# script should be in the same directory as the notebook
import utils


In [3]:
def mean_dupes_drop_singles_filter_by_std( df, std_thres=0.03, filt_zeros=True):
    """ Within the SPC samples,  duplicates should be averaged, not dropped.
    This function:

    - averages the duplicates
    - calculates std over column
    -  filters the samples out with high std (high std could mean collection error)
    -  filters the samples out with 0 std (not a true duplicate)

    Parameters
    ----------
    :param: ``df`` : ``pd.DataFrame``
        Dataframe of SPC data.
    :param: ``std_thres`` : ``float``
        standard deviation threshold for acceptable data. Default = 0.01.
    :param: ``filt_zeros`` : ``bool``
        If True, filter out sample duplicates with a standard deviation of 0 --> unrealistic!

    Returns
    -------
    :return: ```pd.DataFrame``
        Dataframe in better shape.

    """
    # copy dataframe for std calculation
    df_std = df.copy(deep=True)
#     cls.__log.info("Shape of initial dataframe: %s", np.shape(df_std))

    # average duplicates
    df = df.groupby(df.index).mean()

    # calculate average std over column, maintain index
    df_std = df_std.groupby(df_std.index).std()
    # failed_indices = [(2138, 61433) ]
    # print(df_std.loc[failed_indices])
    # df_std =df_std.loc[failed_indices]
    # print(df.std(axis=0).sort_values(ascending =False).head(30))
    # calculate avg std series
    df_std = df_std.mean(axis=1)

    # create dataframe from avg std series
    df_std = pd.DataFrame(
        index=df_std.index.values, columns=["std"], data=df_std.values
    )
    print(df_std.sort_values(by='std',ascending =False).head())
    # print(df_std)
    df_std = df_std.dropna()
    # failed_indices = [(2138, 61433) ]  
    # df_std.loc[failed_indices]

#     cls.__log.info("Shape of dataframe w/o NaNs: %s", np.shape(df_std))

    # filter df by std, removing below thresh
    df_std = df_std[df_std["std"] < std_thres]
#     cls.__log.info("Shape of thresholded dataframe: %s", np.shape(df_std))

    if filt_zeros:
        df_std = df_std[df_std["std"] > 0]
#         cls.__log.info(
#             "Shape of thresholded dataframe with filtered zeros: %s",
#             np.shape(df_std),
#         )

    # merge the dataframes by index
    df_mrgd = pd.merge(df, df_std, left_index=True, right_index=True, how="outer")
#     cls.__log.info("Shape of merged dataframe by index: %s", np.shape(df_mrgd))

    # drop na's by row
    df_mrgd = df_mrgd.dropna()
#     cls.__log.info("Shape of merged dataframe w/o NaNs: %s", np.shape(df_mrgd))

    # drop std column
    df_mrgd = df_mrgd.drop(["std"], axis=1)

    return df_mrgd


In [8]:
# # path to the output of opus to csv tool AB_quantized_spectra.csv 
# # path_to_ab_spectra = Path('../DS-ML69 product1 spectra/20221222-095217')
# # df_spectra_1 = pd.read_csv("D://CropNutsDocuments/MSSC_DVC/data/spc/spc.csv", engine='c')
# # path_to_ab_spectra = Path('../DS_L19/20230320-091519')

# # df_spectra_1 = pd.read_csv(path_to_ab_spectra / 'AB_quantized_spectra.csv',index_col=0)


# sample_codes = pd.read_csv("../DS-ML87/outputFiles/data/splits/clay_test_sample_codes.csv", index_col=0)
# df_spectra_1 = pd.read_csv("../DS-ML87/outputFiles/data/spc/spc.csv", index_col=0, engine='c')
# df_spectra_1 = df_spectra_1.loc[(df_spectra_1.index.isin(sample_codes.x))]

In [4]:
# path to the output of opus to csv tool AB_quantized_spectra.csv 
path_to_ab_spectra = Path('../DS_LI24/20230508-104244')

# path_to_ab_spectra = Path('C:/Users/Tsuma Thomas/Documents/CropNutsDocuments/DS-GIS46/DS-GIS46 all-batches-opus-pre-sorted_22-12-21')

df_spectra_1 = pd.read_csv(path_to_ab_spectra / 'AB_quantized_spectra.csv',index_col=0)

In [5]:
df_spectra_1

Unnamed: 0_level_0,522,524,526,528,530,532,534,536,538,540,...,3958,3960,3962,3964,3966,3968,3970,3972,3974,3976
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CI112SA1690_20230505_162630_001,2.331,2.325424,2.328555,2.332614,2.287139,2.237893,2.240552,2.25091,2.237878,2.217059,...,1.278593,1.278029,1.276232,1.274761,1.274128,1.274463,1.27509,1.274007,1.271721,1.270356
CI112SA1690_20230505_162703_002,2.199012,2.157742,2.1882,2.223477,2.203429,2.155636,2.138012,2.170931,2.190977,2.178411,...,1.27549,1.275186,1.273608,1.272207,1.270603,1.268437,1.267,1.267251,1.267997,1.268134
CI112SA1691_20230505_162810_001,2.282755,2.272106,2.255077,2.215148,2.226492,2.267738,2.278825,2.275184,2.290294,2.31205,...,1.279475,1.27746,1.276182,1.27603,1.276035,1.275524,1.274639,1.274242,1.275065,1.276241
CI112SA1691_20230505_162843_002,2.322816,2.237835,2.179648,2.179708,2.259499,2.338412,2.298177,2.212111,2.172442,2.176287,...,1.276569,1.276555,1.275715,1.273759,1.272331,1.272316,1.272767,1.272537,1.271954,1.272004
CI112SA1692_20230505_162948_001,2.352009,2.376109,2.361948,2.326987,2.297329,2.259442,2.199014,2.147604,2.128963,2.134511,...,1.239023,1.237281,1.236289,1.236662,1.236854,1.235752,1.234287,1.234076,1.235228,1.236176
CI112SA1692_20230505_163021_002,2.312098,2.335466,2.326989,2.287435,2.24633,2.20967,2.19024,2.210245,2.236495,2.216709,...,1.236726,1.236499,1.236949,1.236715,1.234909,1.233043,1.232952,1.233885,1.233876,1.232416
CI112SA1693_20230505_163126_001,2.341226,2.304043,2.302134,2.288668,2.261797,2.23017,2.197409,2.183332,2.206583,2.232955,...,1.26113,1.259929,1.2601,1.260441,1.259845,1.258304,1.257099,1.257602,1.258863,1.259123
CI112SA1693_20230505_163158_002,2.321388,2.30032,2.288405,2.259723,2.254259,2.294222,2.305589,2.262041,2.234841,2.24348,...,1.25925,1.258016,1.257174,1.256875,1.256767,1.25673,1.256626,1.256135,1.254935,1.253596
CI112SA1694_20230505_163308_001,2.466582,2.407294,2.345764,2.301093,2.297007,2.320388,2.306413,2.268332,2.268726,2.312092,...,1.201713,1.201529,1.200622,1.199793,1.199474,1.199135,1.198815,1.198692,1.19863,1.198947
CI112SA1694_20230505_163345_002,2.270987,2.270674,2.23714,2.209831,2.22628,2.293024,2.330715,2.292392,2.263257,2.270791,...,1.199364,1.199115,1.19863,1.198265,1.197532,1.196641,1.196115,1.195872,1.195657,1.195574


In [6]:
df_spectra_1

Unnamed: 0_level_0,522,524,526,528,530,532,534,536,538,540,...,3958,3960,3962,3964,3966,3968,3970,3972,3974,3976
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CI112SA1690_20230505_162630_001,2.331,2.325424,2.328555,2.332614,2.287139,2.237893,2.240552,2.25091,2.237878,2.217059,...,1.278593,1.278029,1.276232,1.274761,1.274128,1.274463,1.27509,1.274007,1.271721,1.270356
CI112SA1690_20230505_162703_002,2.199012,2.157742,2.1882,2.223477,2.203429,2.155636,2.138012,2.170931,2.190977,2.178411,...,1.27549,1.275186,1.273608,1.272207,1.270603,1.268437,1.267,1.267251,1.267997,1.268134
CI112SA1691_20230505_162810_001,2.282755,2.272106,2.255077,2.215148,2.226492,2.267738,2.278825,2.275184,2.290294,2.31205,...,1.279475,1.27746,1.276182,1.27603,1.276035,1.275524,1.274639,1.274242,1.275065,1.276241
CI112SA1691_20230505_162843_002,2.322816,2.237835,2.179648,2.179708,2.259499,2.338412,2.298177,2.212111,2.172442,2.176287,...,1.276569,1.276555,1.275715,1.273759,1.272331,1.272316,1.272767,1.272537,1.271954,1.272004
CI112SA1692_20230505_162948_001,2.352009,2.376109,2.361948,2.326987,2.297329,2.259442,2.199014,2.147604,2.128963,2.134511,...,1.239023,1.237281,1.236289,1.236662,1.236854,1.235752,1.234287,1.234076,1.235228,1.236176
CI112SA1692_20230505_163021_002,2.312098,2.335466,2.326989,2.287435,2.24633,2.20967,2.19024,2.210245,2.236495,2.216709,...,1.236726,1.236499,1.236949,1.236715,1.234909,1.233043,1.232952,1.233885,1.233876,1.232416
CI112SA1693_20230505_163126_001,2.341226,2.304043,2.302134,2.288668,2.261797,2.23017,2.197409,2.183332,2.206583,2.232955,...,1.26113,1.259929,1.2601,1.260441,1.259845,1.258304,1.257099,1.257602,1.258863,1.259123
CI112SA1693_20230505_163158_002,2.321388,2.30032,2.288405,2.259723,2.254259,2.294222,2.305589,2.262041,2.234841,2.24348,...,1.25925,1.258016,1.257174,1.256875,1.256767,1.25673,1.256626,1.256135,1.254935,1.253596
CI112SA1694_20230505_163308_001,2.466582,2.407294,2.345764,2.301093,2.297007,2.320388,2.306413,2.268332,2.268726,2.312092,...,1.201713,1.201529,1.200622,1.199793,1.199474,1.199135,1.198815,1.198692,1.19863,1.198947
CI112SA1694_20230505_163345_002,2.270987,2.270674,2.23714,2.209831,2.22628,2.293024,2.330715,2.292392,2.263257,2.270791,...,1.199364,1.199115,1.19863,1.198265,1.197532,1.196641,1.196115,1.195872,1.195657,1.195574


In [7]:
len(df_spectra_1)

26

In [8]:
# extract sample code
df_spectra_1.index = df_spectra_1.index.str.extract('([C]\w{3,5}SA\w{2,5}|[F]\w{4,5}[-]{1}\w{6,9}|A\w{5,7}[-]{1}\w{7,10}|\w{3}[-]{1}\w{1,2}[-]{1}\w{7,10}|\w{3}[-]{1}\w{2}[-]{1}\w{4,6})', expand=False)

In [9]:
df_spectra_1 = df_spectra_1.reset_index()
df_spectra_1.dropna(axis=0, inplace=True)
df_spectra_1 = df_spectra_1.set_index("sample_id")

In [18]:
# df_spectra_1 = df_spectra_1.rename(columns={"Unnamed: 0":"sample_id"})
# df_spectra_1 = df_spectra_1.set_index("sample_id")

In [11]:
# df_spectra_1.index = df_spectra_1.index.astype("str")
df_spectra_1.index = [x[:-1] for x in df_spectra_1.index]

In [12]:
df_spectra_1

Unnamed: 0,522,524,526,528,530,532,534,536,538,540,...,3958,3960,3962,3964,3966,3968,3970,3972,3974,3976
CI112SA1690,2.331,2.325424,2.328555,2.332614,2.287139,2.237893,2.240552,2.25091,2.237878,2.217059,...,1.278593,1.278029,1.276232,1.274761,1.274128,1.274463,1.27509,1.274007,1.271721,1.270356
CI112SA1690,2.199012,2.157742,2.1882,2.223477,2.203429,2.155636,2.138012,2.170931,2.190977,2.178411,...,1.27549,1.275186,1.273608,1.272207,1.270603,1.268437,1.267,1.267251,1.267997,1.268134
CI112SA1691,2.282755,2.272106,2.255077,2.215148,2.226492,2.267738,2.278825,2.275184,2.290294,2.31205,...,1.279475,1.27746,1.276182,1.27603,1.276035,1.275524,1.274639,1.274242,1.275065,1.276241
CI112SA1691,2.322816,2.237835,2.179648,2.179708,2.259499,2.338412,2.298177,2.212111,2.172442,2.176287,...,1.276569,1.276555,1.275715,1.273759,1.272331,1.272316,1.272767,1.272537,1.271954,1.272004
CI112SA1692,2.352009,2.376109,2.361948,2.326987,2.297329,2.259442,2.199014,2.147604,2.128963,2.134511,...,1.239023,1.237281,1.236289,1.236662,1.236854,1.235752,1.234287,1.234076,1.235228,1.236176
CI112SA1692,2.312098,2.335466,2.326989,2.287435,2.24633,2.20967,2.19024,2.210245,2.236495,2.216709,...,1.236726,1.236499,1.236949,1.236715,1.234909,1.233043,1.232952,1.233885,1.233876,1.232416
CI112SA1693,2.341226,2.304043,2.302134,2.288668,2.261797,2.23017,2.197409,2.183332,2.206583,2.232955,...,1.26113,1.259929,1.2601,1.260441,1.259845,1.258304,1.257099,1.257602,1.258863,1.259123
CI112SA1693,2.321388,2.30032,2.288405,2.259723,2.254259,2.294222,2.305589,2.262041,2.234841,2.24348,...,1.25925,1.258016,1.257174,1.256875,1.256767,1.25673,1.256626,1.256135,1.254935,1.253596
CI112SA1694,2.466582,2.407294,2.345764,2.301093,2.297007,2.320388,2.306413,2.268332,2.268726,2.312092,...,1.201713,1.201529,1.200622,1.199793,1.199474,1.199135,1.198815,1.198692,1.19863,1.198947
CI112SA1694,2.270987,2.270674,2.23714,2.209831,2.22628,2.293024,2.330715,2.292392,2.263257,2.270791,...,1.199364,1.199115,1.19863,1.198265,1.197532,1.196641,1.196115,1.195872,1.195657,1.195574


In [13]:
# df_avg_1 = mean_dupes_drop_singles_filter_by_std(df_spectra_1)
df = mean_dupes_drop_singles_filter_by_std(df_spectra_1)
# df = df_spectra_1

                  std
CI112SA1690  0.008341
CI112SA1693  0.008307
CI112SA1696  0.007754
CI112SA1699  0.007542
CI112SA1691  0.007415


In [14]:
df

Unnamed: 0,522,524,526,528,530,532,534,536,538,540,...,3958,3960,3962,3964,3966,3968,3970,3972,3974,3976
CI112SA1690,2.265006,2.241583,2.258378,2.278045,2.245284,2.196764,2.189282,2.210921,2.214427,2.197735,...,1.277041,1.276608,1.27492,1.273484,1.272366,1.27145,1.271045,1.270629,1.269859,1.269245
CI112SA1691,2.302785,2.25497,2.217362,2.197428,2.242996,2.303075,2.288501,2.243647,2.231368,2.244168,...,1.278022,1.277007,1.275949,1.274894,1.274183,1.27392,1.273703,1.273389,1.27351,1.274122
CI112SA1692,2.332054,2.355787,2.344469,2.307211,2.271829,2.234556,2.194627,2.178925,2.182729,2.17561,...,1.237874,1.23689,1.236619,1.236689,1.235881,1.234397,1.233619,1.23398,1.234552,1.234296
CI112SA1693,2.331307,2.302182,2.29527,2.274195,2.258028,2.262196,2.251499,2.222687,2.220712,2.238217,...,1.26019,1.258973,1.258637,1.258658,1.258306,1.257517,1.256862,1.256868,1.256899,1.256359
CI112SA1694,2.368784,2.338984,2.291452,2.255462,2.261643,2.306706,2.318564,2.280362,2.265991,2.291442,...,1.200538,1.200322,1.199626,1.199029,1.198503,1.197888,1.197465,1.197282,1.197144,1.197261
CI112SA1695,2.26087,2.213528,2.172084,2.163977,2.183194,2.196862,2.208452,2.228381,2.267457,2.293925,...,1.197199,1.196625,1.196555,1.196139,1.19539,1.195013,1.194782,1.194119,1.193055,1.192224
CI112SA1696,2.172838,2.210203,2.236345,2.199565,2.142046,2.116448,2.116474,2.125682,2.135014,2.135835,...,1.251988,1.251685,1.251457,1.25085,1.250335,1.250187,1.249998,1.249396,1.248391,1.247334
CI112SA1697,2.266229,2.243622,2.238354,2.245115,2.263493,2.27707,2.286879,2.281879,2.267438,2.256608,...,1.218142,1.217739,1.216829,1.215959,1.21531,1.215139,1.215113,1.214726,1.214075,1.21306
CI112SA1698,2.349541,2.358525,2.344436,2.317267,2.3282,2.358568,2.353124,2.316478,2.273677,2.255897,...,1.221732,1.220495,1.2197,1.219252,1.218914,1.218701,1.218575,1.218076,1.217108,1.216374
CI112SA1699,2.279482,2.234718,2.210521,2.219534,2.253933,2.268673,2.251685,2.238478,2.217924,2.190905,...,1.266753,1.266587,1.266867,1.266921,1.266409,1.265471,1.264331,1.26314,1.262393,1.262159


In [15]:
df_spectra_1.index

Index(['CI112SA1690', 'CI112SA1690', 'CI112SA1691', 'CI112SA1691',
       'CI112SA1692', 'CI112SA1692', 'CI112SA1693', 'CI112SA1693',
       'CI112SA1694', 'CI112SA1694', 'CI112SA1695', 'CI112SA1695',
       'CI112SA1696', 'CI112SA1696', 'CI112SA1697', 'CI112SA1697',
       'CI112SA1698', 'CI112SA1698', 'CI112SA1699', 'CI112SA1699',
       'CI112SA1700', 'CI112SA1700', 'CI112SA1701', 'CI112SA1701',
       'CI112SA1702', 'CI112SA1702'],
      dtype='object')

In [16]:
# df.to_csv("C:/Users/Tsuma Thomas/Documents/CropNutsDocuments/DS-ML69 product1 spectra/20221206-103534.csv")

In [17]:
# save to path of notebook
df.to_csv('spc.csv')

In [18]:
# path to file
filename = 'spc.csv'

In [19]:
chemicals = ['aluminium', 
            'phosphorus', 'ph', 'exchangeable_acidity', 'calcium', 'magnesium',
              'sulphur', 'sodium', 'iron', 'manganese', 'boron', 'copper', 'zinc', 'total_nitrogen', 'potassium',
             'ec_salts', 'organic_carbon', 'cec', 'sand', 'silt', 'clay']

# chemicals = ['exchangeable_acidity']


In [20]:
from math import log10, floor
def round_sig(x, sig=2):
    return round(x, sig-int(floor(log10(abs(x))))-1)

In [21]:
# read data from file for predictions
data = pd.read_csv(filename, index_col=0, engine='c')

In [22]:
data = data.set_index("sample_id")

KeyError: "None of ['sample_id'] are in the columns"

In [23]:
data

Unnamed: 0,522,524,526,528,530,532,534,536,538,540,...,3958,3960,3962,3964,3966,3968,3970,3972,3974,3976
CI112SA1690,2.265006,2.241583,2.258378,2.278045,2.245284,2.196764,2.189282,2.210921,2.214427,2.197735,...,1.277041,1.276608,1.27492,1.273484,1.272366,1.27145,1.271045,1.270629,1.269859,1.269245
CI112SA1691,2.302785,2.25497,2.217362,2.197428,2.242996,2.303075,2.288501,2.243647,2.231368,2.244168,...,1.278022,1.277007,1.275949,1.274894,1.274183,1.27392,1.273703,1.273389,1.27351,1.274122
CI112SA1692,2.332054,2.355787,2.344469,2.307211,2.271829,2.234556,2.194627,2.178925,2.182729,2.17561,...,1.237874,1.23689,1.236619,1.236689,1.235881,1.234397,1.233619,1.23398,1.234552,1.234296
CI112SA1693,2.331307,2.302182,2.29527,2.274195,2.258028,2.262196,2.251499,2.222687,2.220712,2.238217,...,1.26019,1.258973,1.258637,1.258658,1.258306,1.257517,1.256862,1.256868,1.256899,1.256359
CI112SA1694,2.368784,2.338984,2.291452,2.255462,2.261643,2.306706,2.318564,2.280362,2.265991,2.291442,...,1.200538,1.200322,1.199626,1.199029,1.198503,1.197888,1.197465,1.197282,1.197144,1.197261
CI112SA1695,2.26087,2.213528,2.172084,2.163977,2.183194,2.196862,2.208452,2.228381,2.267457,2.293925,...,1.197199,1.196625,1.196555,1.196139,1.19539,1.195013,1.194782,1.194119,1.193055,1.192224
CI112SA1696,2.172838,2.210203,2.236345,2.199565,2.142046,2.116448,2.116474,2.125682,2.135014,2.135835,...,1.251988,1.251685,1.251457,1.25085,1.250335,1.250187,1.249998,1.249396,1.248391,1.247334
CI112SA1697,2.266229,2.243622,2.238354,2.245115,2.263493,2.27707,2.286879,2.281879,2.267438,2.256608,...,1.218142,1.217739,1.216829,1.215959,1.21531,1.215139,1.215113,1.214726,1.214075,1.21306
CI112SA1698,2.349541,2.358525,2.344436,2.317267,2.3282,2.358568,2.353124,2.316478,2.273677,2.255897,...,1.221732,1.220495,1.2197,1.219252,1.218914,1.218701,1.218575,1.218076,1.217108,1.216374
CI112SA1699,2.279482,2.234718,2.210521,2.219534,2.253933,2.268673,2.251685,2.238478,2.217924,2.190905,...,1.266753,1.266587,1.266867,1.266921,1.266409,1.265471,1.264331,1.26314,1.262393,1.262159


In [25]:
# make sure we have 1728 wave numbers
data = data.T.head(1728).T

In [26]:
data.index

Index(['CI112SA1690', 'CI112SA1691', 'CI112SA1692', 'CI112SA1693',
       'CI112SA1694', 'CI112SA1695', 'CI112SA1696', 'CI112SA1697',
       'CI112SA1698', 'CI112SA1699', 'CI112SA1700', 'CI112SA1701',
       'CI112SA1702'],
      dtype='object')

In [27]:
# path to models and preprocessing steps
# base_path = Path('./dl_models_all_chems_20210414/saved_models')
base_path = Path('./dl_models_all_chems_20210414/dl_v2.2_update_2022')

# base_path = Path('D://CropNutsDocuments/DS-ML87/outputFiles/exchangeable_acidity_20230502_090047.462371')

In [29]:
for chemical in chemicals:
    print(chemical)
    preds_comb = pd.DataFrame()
    models_folder = base_path / chemical / 'std'
    all_models = [x for x in models_folder.glob('**/*.hdf5')]

    
    new_indices = data.index



    for model_path in all_models:

        json_path = model_path.parent.parent / 'model.json'

        with open(json_path) as f:
            json_ = json.load(f)

        inputs = []

        for i in range(len(json_['Inputs'])):
            input_name = json_['Inputs'][i]['Name']
            train = data.copy(deep=True)

            for j in range(len(json_['Inputs'][i]['Pre-processing'])):
                key_ = json_['Inputs'][i]['Pre-processing'][j]['Name']
                if input_name == 'nir2':
                    input_name = 'nir.2'
                pickle_path = model_path.parent / 'preprocess' / f'input.{input_name}.{j}.{key_}.pickle'
                pickle_ = joblib.load(pickle_path)
                train = pickle_.fit_transform(train)

            inputs.append(train.values)

        tf.keras.backend.clear_session()
        model = tf.keras.models.load_model(model_path, compile=False)
        preds = pd.DataFrame(model(inputs).numpy())
        preds_comb = pd.concat([preds_comb, preds], axis=1)

    preds_comb = preds_comb.median(axis=1)
    preds_comb.index = new_indices
    
    
    # save output
    preds_comb.to_csv(f'D://CropNutsDocuments/DS_LI24/outputFiles/{chemical}.csv')
    

aluminium
phosphorus
ph
exchangeable_acidity
calcium
magnesium
sulphur
sodium
iron
manganese
boron
copper
zinc
total_nitrogen
potassium
ec_salts
organic_carbon
cec
sand
silt
clay


In [None]:
print(json_['Inputs'][i])