In [48]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from pyswarm import pso

In [49]:
data = pd.read_csv("/Users/nayana/Downloads/soil.csv")

In [50]:
data.dropna(inplace=True)

In [51]:
X = data.drop(columns=['pH', 'EC(dS/m)', 'OC(%)', 'P(kg/ha)', 'K(kg/ha)', 'Ca(meq/100g)',
                       'Mg(meq/100g)', 'S(ppm)', 'Fe(ppm)', 'Mn(ppm)', 'Cu(ppm)', 'Zn(ppm)', 'B(ppm)'])
y = data[['pH', 'EC(dS/m)', 'OC(%)', 'P(kg/ha)', 'K(kg/ha)', 'Ca(meq/100g)',
                       'Mg(meq/100g)', 'S(ppm)', 'Fe(ppm)', 'Mn(ppm)', 'Cu(ppm)', 'Zn(ppm)', 'B(ppm)']]


In [52]:
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

In [53]:
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42)

Partial swarm optimisation

In [54]:
def optimize_weights(X, y, model):
    def loss(weights):
        model.fit(X * weights, y)
        y_pred = model.predict(X * weights)
        return mean_squared_error(y, y_pred)
    lb = np.zeros(X.shape[1])
    ub = np.ones(X.shape[1])
    weights, _ = pso(loss, lb, ub)
    return weights

In [55]:
svr = SVR()
dtr = DecisionTreeRegressor()
knn = KNeighborsRegressor()
bagged_dtr = BaggingRegressor(n_estimators=10, random_state=42, bootstrap=True)
gpr = GaussianProcessRegressor()


In [None]:
models = {'SVR': svr, 'Decision Tree': dtr, 'KNN': knn, 'Bagged DTR': bagged_dtr, 'GPR': gpr}
best_weights = {}

for target_index in range(y_train.shape[1]):  # Iterate over each target variable
    try:
        y_train_single = y_train.iloc[:, target_index]  # Select the target variable
        model_results_target = {}  # Store results for this target variable
        for name, model in models.items():  # Iterate over each model
            try:
                # Optimize weights for the current model and target variable
                best_weights_target = optimize_weights(X_train, y_train_single, model)
                best_weights[f"Target Variable {target_index + 1} - {name}"] = best_weights_target
            except Exception as e:
                print(f"Error occurred during optimization for {name}: {e}")
    except Exception as e:
        print(f"Error occurred during optimization for target variable {target_index + 1}: {e}")

In [None]:
from tabulate import tabulate

headers = ["Target Variable", "Model", "Best Weights"]

table_data = []
for target_index in range(y_train.shape[1]):
    for name, _ in models.items():
        key = f"Target Variable {target_index + 1} - {name}"
        if key in best_weights:
            weights = best_weights[key]
            table_data.append([f"Target Variable {target_index + 1}", name, weights])
        else:
            table_data.append([f"Target Variable {target_index + 1}", name, "No optimized weights"])

print(tabulate(table_data, headers=headers))


Target Variable     Model          Best Weights
------------------  -------------  -------------------------------------------------------------------
Target Variable 1   SVR            [0.         0.16130448 0.18374638 0.84708377 0.90013895 0.82678611
                                    0.65569614 0.27395135 0.1090446  0.25158812 0.21562139 0.
                                    0.00721979 0.69671779 1.         1.         0.         0.65345102]
Target Variable 1   Decision Tree  [0.6412744  0.72957515 0.218736   0.1505267  0.80339009 0.39367713
                                    0.00723175 0.60862774 0.74296815 0.93264168 0.57037738 0.81210565
                                    0.40835537 0.84533733 0.61225366 0.97442598 0.05435295 0.37933301]
Target Variable 1   KNN            [0.0889605  0.3061704  0.37498323 0.26855788 0.26287191 0.33426665
                                    0.98945591 0.05288224 0.07168919 0.50829915 0.70682937 0.00966872
                                    0.2

In [46]:
pip install tabulate

Collecting tabulate
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)
Installing collected packages: tabulate
Successfully installed tabulate-0.9.0
Note: you may need to restart the kernel to use updated packages.


In [42]:
metrics = {'MSE': mean_squared_error, 'MAE': mean_absolute_error, 'R2': r2_score}
model_results = {}

for target_index in range(y_test.shape[1]):  # Iterate over each target variable
    try:
        y_test_single = y_test.iloc[:, target_index]  # Select the target variable
        target_metrics = {}  # Store evaluation metrics for this target variable
        for name, model in models.items():  # Iterate over each model
            try:
                weights = best_weights[f"Target Variable {target_index + 1} - {name}"]
                y_pred = model.predict(X_test * weights)
                # Compute evaluation metrics for the current model
                target_metrics[name] = {metric_name: metric(y_test_single, y_pred) for metric_name, metric in metrics.items()}
            except Exception as e:
                print(f"Error occurred during evaluation for {name}: {e}")
        # Add evaluation metrics for the current target variable to the overall results
        model_results[f'Target Variable {target_index + 1}'] = target_metrics
    except Exception as e:
        print(f"Error occurred during evaluation for target variable {target_index + 1}: {e}")

# Display the evaluation results for each model and target variable
for target_variable, metrics in model_results.items():
    print(f"Target Variable {target_variable}:")
    for model_name, metric_results in metrics.items():
        print(f"    {model_name}:")
        for metric_name, value in metric_results.items():
            print(f"        {metric_name}: {value}")



Target Variable Target Variable 1:
    SVR:
        MSE: 41.29321075861957
        MAE: 6.302589591840131
        R2: -25.197279061334036
    Decision Tree:
        MSE: 44.63481052631579
        MAE: 6.58
        R2: -27.31726004652186
    KNN:
        MSE: 42.84849894736842
        MAE: 6.400842105263157
        R2: -26.183986511612535
    Bagged DTR:
        MSE: 42.74012457894736
        MAE: 6.409
        R2: -26.11523153905256
    GPR:
        MSE: 47.82481383229699
        MAE: 6.780168022218738
        R2: -29.341065056548196
Target Variable Target Variable 2:
    SVR:
        MSE: 0.3605459912601433
        MAE: 0.5724985547214936
        R2: -4.264360539265649
    Decision Tree:
        MSE: 0.19789473684210526
        MAE: 0.3157894736842105
        R2: -1.8894767070319771
    KNN:
        MSE: 0.38042063157894734
        MAE: 0.5886315789473686
        R2: -4.554551734737626
    Bagged DTR:
        MSE: 0.293035947368421
        MAE: 0.5167894736842105
        R2: -3.278641