# Import all the necessary libraries

In [1]:
import numpy as np
import pandas as pd
import pickle
import math

# Processing the data to be predicted
Below codes demonstrate how to process the data in an xlsx file. You can download the file above.

In [2]:
## Load the file
file_df = pd.read_excel("example_adsorption_HMsoil.xlsx", sheet_name='Sheet1')
file_df.head()

Unnamed: 0,Heavy metal,pH (soil),CEC (cmol/kg),OC (%),clay (%),pH (solution),equilibrium concentration (mg/L),ratio
0,Cd,6.1,3.01,1.2,15.6,6.1,639.90976,0.667
1,Cd,6.1,3.01,1.2,15.6,6.1,298.24704,0.667
2,Cd,5.8,20.5,6.1,23.0,5.8,201.03328,0.667
3,Ni,5.0,1.09,0.3,4.5,5.0,137.68048,0.667
4,Ni,5.8,20.5,6.1,23.0,5.8,93.43824,0.667


In [3]:
## Obtain the X_input for the model
X_input = np.array(file_df.iloc[:, 1:])
X_input

array([[6.10000000e+00, 3.01000000e+00, 1.20000000e+00, 1.56000000e+01,
        6.10000000e+00, 6.39909760e+02, 6.67000000e-01],
       [6.10000000e+00, 3.01000000e+00, 1.20000000e+00, 1.56000000e+01,
        6.10000000e+00, 2.98247040e+02, 6.67000000e-01],
       [5.80000000e+00, 2.05000000e+01, 6.10000000e+00, 2.30000000e+01,
        5.80000000e+00, 2.01033280e+02, 6.67000000e-01],
       [5.00000000e+00, 1.09000000e+00, 3.00000000e-01, 4.50000000e+00,
        5.00000000e+00, 1.37680480e+02, 6.67000000e-01],
       [5.80000000e+00, 2.05000000e+01, 6.10000000e+00, 2.30000000e+01,
        5.80000000e+00, 9.34382400e+01, 6.67000000e-01],
       [5.20000000e+00, 1.79000000e+00, 1.70000000e+00, 7.50000000e+00,
        5.20000000e+00, 8.72090240e+01, 6.67000000e-01],
       [6.10000000e+00, 3.01000000e+00, 1.20000000e+00, 1.56000000e+01,
        6.10000000e+00, 7.73932320e+01, 6.67000000e-01],
       [5.80000000e+00, 2.05000000e+01, 6.10000000e+00, 2.30000000e+01,
        5.80000000e+00, 4

In [4]:
len(X_input[1])

7

# Load the model and perform the prediction

In [5]:
model_Cd = pickle.load(open('models/Cd.pkl', 'rb'))
model_Cr = pickle.load(open('models/Cr.pkl', 'rb'))
model_Cu = pickle.load(open('models/Cu.pkl', 'rb'))
model_Ni = pickle.load(open('models/Ni.pkl', 'rb'))
model_Pb = pickle.load(open('models/Pb.pkl', 'rb'))
model_Zn = pickle.load(open('models/Zn.pkl', 'rb'))

In [6]:
## Perform the prediction. Use different models for corresponding heavy metals
predict_data = []
for i in range(len(X_input)):
    m = i + 1
    if len(X_input[i]) == 7:
        try:
            if file_df['Heavy metal'][i] == "Cd":
                prediction_individual = model_Cd.predict(np.array([X_input[i]]))
            elif file_df['Heavy metal'][i] == "Cr":
                prediction_individual = model_Cr.predict(np.array([X_input[i]]))
            elif file_df['Heavy metal'][i] == "Cu":
                prediction_individual = model_Cu.predict(np.array([X_input[i]]))
            elif file_df['Heavy metal'][i] == "Ni":
                prediction_individual = model_Ni.predict(np.array([X_input[i]]))
            elif file_df['Heavy metal'][i] == "Pb":
                prediction_individual = model_Pb.predict(np.array([X_input[i]]))
            elif file_df['Heavy metal'][i] == "Zn":
                prediction_individual = model_Zn.predict(np.array([X_input[i]]))
            else:
                prediction_individual = "Invalid entry"

            predict_row = [m] + np.array(file_df.iloc[i, :]).tolist() + [round(prediction_individual[0], 3), round(math.exp(prediction_individual[0]), 3)]
        except:
            predict_row = [m] + np.array(file_df.iloc[i, :]).tolist() + ['Invalid entry', 'Invalid entry']
    else:
        predict_row = [m] + np.array(file_df.iloc[i, :]).tolist() + ['Invalid entry', 'Invalid entry']

    predict_data.append(predict_row)


In [7]:
pred_df = pd.DataFrame(predict_data)
headings = ["Number"] + list(file_df.columns.values) + ["Ln-Adsorption (mg/L)", "Adsorption (mg/L)"]
pred_df.columns = headings
pred_df

Unnamed: 0,Number,Heavy metal,pH (soil),CEC (cmol/kg),OC (%),clay (%),pH (solution),equilibrium concentration (mg/L),ratio,Ln-Adsorption (mg/L),Adsorption (mg/L)
0,1,Cd,6.1,3.01,1.2,15.6,6.1,639.90976,0.667,-0.027,0.974
1,2,Cd,6.1,3.01,1.2,15.6,6.1,298.24704,0.667,-0.068,0.934
2,3,Cd,5.8,20.5,6.1,23.0,5.8,201.03328,0.667,0.355,1.426
3,4,Ni,5.0,1.09,0.3,4.5,5.0,137.68048,0.667,-2.877,0.056
4,5,Ni,5.8,20.5,6.1,23.0,5.8,93.43824,0.667,0.055,1.057
5,6,Ni,5.2,1.79,1.7,7.5,5.2,87.209024,0.667,-1.394,0.248
6,7,Ni,6.1,3.01,1.2,15.6,6.1,77.393232,0.667,-0.686,0.503
7,8,Pb,5.8,20.5,6.1,23.0,5.8,464.00499,0.667,1.526,4.6
8,9,Pb,5.8,20.5,6.1,23.0,5.8,256.42332,0.667,1.526,4.6
9,10,Pb,6.1,3.01,1.2,15.6,6.1,184.904406,0.667,0.95,2.585


# Save the results to a csv file

In [8]:
pred_df.to_csv("prediction_result.csv", index=False)