In [None]:
import os
import joblib
import numpy as np
import pandas as pd
import gc
import random

from scipy import stats
from scipy.spatial.distance import cosine, mahalanobis
from scipy.stats import ks_2samp, skew, kurtosis, entropy, f_oneway, norm
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler

from imblearn.over_sampling import SMOTE
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import AdaBoostRegressor, RandomForestRegressor, GradientBoostingRegressor, StackingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.svm import SVR

import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, Dropout, Concatenate, Multiply, Add, BatchNormalization, LayerNormalization, MultiHeadAttention, Reshape
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.utils import plot_model

import pyswarm
from skopt import gp_minimize
from skopt.space import Real
from pyswarm import pso
from scipy.stats import qmc
from scipy.optimize import minimize
from scipy.optimize import dual_annealing
from scipy.optimize import differential_evolution as genetic_algorithm
from scipy.interpolate import Rbf
from scipy.spatial import Delaunay
from scipy.ndimage import gaussian_filter

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from matplotlib.colors import Normalize, TwoSlopeNorm

gc.enable()
import warnings
warnings.filterwarnings("ignore")

In [None]:
seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
random.seed(seed)
tf.random.set_seed(seed)

In [None]:
# Create a directory to save plots if it doesn't already exist
optimization_dir = "optimization"
os.makedirs(optimization_dir, exist_ok=True)

In [None]:
# Extract the top ensembles for each crop based on lowest MAPE_Random Split Crop
top_ensembles = ensemble_results_crops_df.loc[
    ensemble_results_crops_df.groupby('Crop')['MAPE_Random Split Crop'].idxmin()
]

# Parse the model names and compute weights based on individual MAPE values
top_ensemble_models = {}
for _, row in top_ensembles.iterrows():
    crop = row['Crop']
    model_names = row['Model Names (Random Split)'].split(', ')

    # Retrieve individual MAPE values for the models
    mape_values = []
    for model_name in model_names:
        mape = wide_results_table_with_crops[
            (wide_results_table_with_crops['Model'] == model_name) &
            (wide_results_table_with_crops['Crop'] == crop) &
            (wide_results_table_with_crops['Split'] == 'Random Split Crop')
        ]['MAPE'].values
        if len(mape) > 0:
            mape_values.append(float(mape[0]))
        else:
            print(f"Warning: MAPE not found for model {model_name} in crop {crop}")
            mape_values.append(float('inf'))  # Assign a very high value if MAPE is missing

    # Calculate weights based on the corrected MAPE values
    weights = np.array([1 / mape for mape in mape_values])
    weights /= weights.sum()  # Normalize weights

    top_ensemble_models[crop] = {
        'models': model_names,
        'weights': weights
    }

# Prepare the results as a DataFrame for display
top_ensemble_df = pd.DataFrame([
    {
        'Crop': crop,
        'Models': ', '.join(info['models']),
        'Weights': ', '.join(map(str, info['weights']))
    }
    for crop, info in top_ensemble_models.items()
])

In [None]:
# Compute adaptive environmental impact scaling factor (lambda_env)
def compute_lambda_env(X, Y):
    # Computes an adaptive lambda_env based on soil properties, nutrient efficiency, and climate zone.
    lambda_0 = 1  # Base penalty

    # Soil depletion factor
    R_soil = (
        max(0, (6.5 - X['soil_ph'].mean())) / 10 +
        max(0, (2 - X['organic_matter_percent'].mean())) / 5 +
        max(0, (X['electrical_conductivity'].mean() - 0.5)) / 10
    )

    # Nutrient use efficiency factor
    NUE = X['npk_nitrogen'].mean() / max(1, Y.mean())  # Avoid division by zero
    PUE = X['npk_phosphorus_p2o5'].mean() / max(1, Y.mean())
    KUE = X['npk_potassium_k2o'].mean() / max(1, Y.mean())
    R_nutrient = NUE + PUE + KUE

    # Climate zone impact
    climate_zone_map = {0: 0.05, 1: 0.1, 2: 0.15, 3: 0.12}  # Mapped impact
    climate_zone_avg = int(X['climate_zone'].mean())  # Convert to int
    R_environment = climate_zone_map.get(climate_zone_avg, 0.1)  # Default 0.1 if missing

    # Compute adaptive lambda_env
    return lambda_0 * (1 + R_soil + R_nutrient + R_environment)

# Placeholder for optimization results
optimization_results = []

# Define bounds for NPK values
npk_bounds = [
    (X['npk_nitrogen'].min(), X['npk_nitrogen'].max()),
    (X['npk_phosphorus_p2o5'].min(), X['npk_phosphorus_p2o5'].max()),
    (X['npk_potassium_k2o'].min(), X['npk_potassium_k2o'].max())
]

# Define the yield predictor
def predict_yield(params, crop, X, top_ensemble_models):
    # Predicts the yield for a given crop using the ensemble of models and their weights.
    npk_nitrogen, npk_phosphorus_p2o5, npk_potassium_k2o = params
    X['npk_nitrogen'] = npk_nitrogen
    X['npk_phosphorus_p2o5'] = npk_phosphorus_p2o5
    X['npk_potassium_k2o'] = npk_potassium_k2o

    # Get ensemble models and weights for the crop
    ensemble_info = top_ensemble_models[crop]
    predicted_yield = 0

    for model_name, weight in zip(ensemble_info['models'], ensemble_info['weights']):
        model_path = f"models/{model_name}_random_split"
        if os.path.exists(f"{model_path}.pkl"):
            model = load_model(f"{model_path}.pkl")
        elif os.path.exists(f"{model_path}.keras"):
            model = load_model(f"{model_path}.keras")
        else:
            continue
        
        # Use the model to predict
        y_pred = model.predict(X)
        predicted_yield += weight * y_pred.mean()  # Weighted prediction (scalar)

    return predicted_yield

# Define the objective function
def objective_function(params, crop, X, Y, top_ensemble_models):
    # Objective function for optimization.
    lambda_reg = compute_lambda_env(X, Y)  # Dynamically compute lambda_reg

    npk_nitrogen, npk_phosphorus_p2o5, npk_potassium_k2o = params
    X['npk_nitrogen'] = npk_nitrogen
    X['npk_phosphorus_p2o5'] = npk_phosphorus_p2o5
    X['npk_potassium_k2o'] = npk_potassium_k2o

    # Extract ensemble information for the given crop
    ensemble_info = top_ensemble_models[crop]

    # Weighted prediction using ensemble models
    predicted_yield = 0
    for model_name, weight in zip(ensemble_info['models'], ensemble_info['weights']):
        model_path = f"models/{model_name}_random_split"
        if os.path.exists(f"{model_path}.pkl"):
            model = load_model(f"{model_path}.pkl")
        elif os.path.exists(f"{model_path}.keras"):
            model = load_model(f"{model_path}.keras")
        else:
            continue

        # Use model to predict yield
        y_pred = model.predict(X)
        predicted_yield += weight * y_pred.mean()  # Ensure scalar aggregation

    # Total NPK applied
    total_npk = npk_nitrogen + npk_phosphorus_p2o5 + npk_potassium_k2o

    # Return scalar value for the optimization
    return -(predicted_yield - lambda_reg * total_npk)

# Define a helper function to calculate metrics
def calculate_metrics(params, crop, X, Y):
    lambda_reg = compute_lambda_env(X, Y)
    optimized_n, optimized_p, optimized_k = params
    total_npk = optimized_n + optimized_p + optimized_k

    # Predict yield using the optimized NPK values
    predicted_yield = predict_yield(params, crop, X, top_ensemble_models)

    # Calculate metrics
    yield_improvement = predicted_yield - baseline_yield
    nutrient_efficiency = predicted_yield / total_npk if total_npk > 0 else 0
    environmental_impact = lambda_reg * total_npk
    
    return lambda_reg, predicted_yield, yield_improvement, nutrient_efficiency, environmental_impact

# Define Bayesian Optimization
def bayesian_optimizer(crop, X, Y, top_ensemble_models):
    # Step 1: Bayesian Optimization (Faster)
    bayesian_result = gp_minimize(
        func=lambda params: objective_function(params, crop, X, Y, top_ensemble_models),
        dimensions=npk_bounds,
        n_calls=200,
        random_state=42,
        n_jobs=-1  # Parallel execution
    )

    return bayesian_result

# Define Bayesian-Evolutionary Hybrid
def bayesian_evolutionary_hybrid(crop, X, Y, top_ensemble_models):
    # Perform Bayesian Optimization first
    bayesian_result = gp_minimize(
        func=lambda params: objective_function(params, crop, X, Y, top_ensemble_models),
        dimensions=npk_bounds,
        n_calls=200,
        random_state=42,
        n_jobs=-1
    )

    # Generate an improved initial population using Latin Hypercube Sampling (LHS)
    sampler = qmc.LatinHypercube(d=len(npk_bounds))
    initial_population = qmc.scale(sampler.random(n=5), [b[0] for b in npk_bounds], [b[1] for b in npk_bounds])
    initial_population = initial_population.tolist()
    
    # Include the best result from Bayesian Optimization
    initial_population.append(bayesian_result.x)

    # Convert to NumPy array
    initial_population = np.array(initial_population)

    # Apply Genetic Algorithm with optimized parameters
    genetic_result = genetic_algorithm(
        func=lambda params: objective_function(params, crop, X, Y, top_ensemble_models),
        bounds=npk_bounds,
        strategy='best1bin',  # Efficient differential evolution strategy
        init=initial_population,
        popsize=10,  # Increase diversity instead of excessive iterations
        maxiter=500, 
        mutation=(0.5, 1),  # Adaptive mutation range
        recombination=0.7  # Slightly increased for better exploration
    )

    return genetic_result

# Define Gradient-Assisted Evolutionary Algorithm
def gradient_assisted_evolutionary(crop, X, Y, top_ensemble_models):
    # Perform Genetic Algorithm first
    genetic_result = genetic_algorithm(
        func=lambda params: objective_function(params, crop, X, Y, top_ensemble_models),
        bounds=npk_bounds,
        strategy='best1bin',
        popsize=15,
        maxiter=1000
    )

    # Use the output as the starting point for Gradient Descent
    gradient_result = minimize(
        fun=objective_function,
        x0=genetic_result.x,
        args=(crop, X, Y, top_ensemble_models),
        bounds=npk_bounds,
        method='L-BFGS-B'
    )
    return gradient_result

# Define Adaptive Differential Evolution
def adaptive_differential_evolution(crop, X, Y, top_ensemble_models):
    # Adjust mutation and crossover strategies dynamically
    adaptive_result = genetic_algorithm(
        func=lambda params: objective_function(params, crop, X, Y, top_ensemble_models),
        bounds=npk_bounds,
        strategy='randtobest1bin',  # Adaptive strategy
        mutation=(0.5, 1),          # Mutation bounds
        recombination=0.9,         # Higher recombination rate
        maxiter=1000
    )
    return adaptive_result

# Define Policy-Based Optimization
def policy_based(crop, X, Y, top_ensemble_models, episodes=3000, initial_lr=0.1, exploration_scale=10.0, decay_factor=0.9):
    policy = np.random.uniform([bound[0] for bound in npk_bounds], [bound[1] for bound in npk_bounds]).astype(np.float64)
    best_reward = -np.inf
    best_params = None

    for episode in range(episodes):
        # Adaptive exploration
        current_exploration = exploration_scale * (decay_factor ** episode)
        action = policy + np.random.uniform(-current_exploration, current_exploration, size=len(npk_bounds))

        # Clip action to within bounds
        action = np.clip(action, [bound[0] for bound in npk_bounds], [bound[1] for bound in npk_bounds])

        # Evaluate reward
        reward = -objective_function(action, crop, X, Y, top_ensemble_models)

        # Update policy based on reward
        if reward > best_reward:
            best_reward = reward
            best_params = action

        policy += initial_lr * (reward - best_reward) * (action - policy)

    return best_params, best_reward

# Define Q-Learning Optimization
def q_learning(crop, X, Y, top_ensemble_models, episodes=3000, gamma=0.9, exploration_scale=10.0, decay_factor=0.9):
    Q = np.random.uniform([bound[0] for bound in npk_bounds], [bound[1] for bound in npk_bounds]).astype(np.float64)
    best_reward = -np.inf
    best_params = None

    for episode in range(episodes):
        # Adaptive exploration
        current_exploration = exploration_scale * (decay_factor ** episode)
        current_exploration = max(current_exploration, 1.0)  # Minimum exploration scale
        action = Q + np.random.uniform(-current_exploration, current_exploration, size=len(npk_bounds))

        # Clip action to within bounds
        action = np.clip(action, [bound[0] for bound in npk_bounds], [bound[1] for bound in npk_bounds])

        # Validate action
        if np.any(np.isnan(action)):
            print(f"Warning: NaN detected in action at episode {episode}. Resetting action.")
            action = Q

        # Evaluate reward
        reward = -objective_function(action, crop, X, Y, top_ensemble_models)
        if np.isnan(reward):
            print(f"Warning: NaN detected in reward at episode {episode}. Assigning penalty.")
            reward = -1e6  # Penalize invalid rewards

        # Update Q-values using temporal difference
        Q += gamma * (reward - best_reward) * (action - Q)
        Q = np.clip(Q, [bound[0] for bound in npk_bounds], [bound[1] for bound in npk_bounds])  # Regularize Q

        # Update best parameters
        if reward > best_reward:
            best_reward = reward
            best_params = action

        # Debugging
        # print(f"Episode {episode}: Q={Q}, action={action}, reward={reward}")

    return best_params, best_reward

In [None]:
# Main optimization loop
for crop in top_ensemble_models.keys():
    print(f"Loop {crop + 1}")
    # Filter X and y for the specific crop
    X_crop = X_test_rand[X_test_rand['crop'] == crop].copy()
    y_crop = y_test_rand[X_crop.index]  # Align y with the subset X_crop

    # Calculate the baseline yield dynamically
    baseline_yield = y_crop.median()

    # Gradient Descent
    print('Running Gradient Descent')
    gradient_result = minimize(
        fun=objective_function,
        x0=[100, 50, 50],
        args=(crop, X_crop, y_crop, top_ensemble_models),
        bounds=npk_bounds,
        method='L-BFGS-B'
    )
    lambda_reg, predicted_yield, yield_improvement, nutrient_efficiency, environmental_impact = calculate_metrics(
        gradient_result.x, crop, X_crop, y_crop
    )
    optimization_results.append({
        'Crop': crop,
        'Method': 'Gradient Descent',
        'Optimized N': gradient_result.x[0],
        'Optimized P': gradient_result.x[1],
        'Optimized K': gradient_result.x[2],
        'Objective Value': -gradient_result.fun,
        'Lambda Regularization': lambda_reg,
        'Yield Improvement': yield_improvement,
        'Nutrient Efficiency': nutrient_efficiency,
        'Environmental Impact': environmental_impact
    })

    # Genetic Algorithm
    print('Running Genetic Algorithm')
    genetic_result = genetic_algorithm(
        func=lambda params: objective_function(params, crop, X_crop, y_crop, top_ensemble_models),
        bounds=npk_bounds,
        strategy='best1bin',
        maxiter=1000
    )
    lambda_reg, predicted_yield, yield_improvement, nutrient_efficiency, environmental_impact = calculate_metrics(
        genetic_result.x, crop, X_crop, y_crop
    )
    optimization_results.append({
        'Crop': crop,
        'Method': 'Genetic Algorithm',
        'Optimized N': genetic_result.x[0],
        'Optimized P': genetic_result.x[1],
        'Optimized K': genetic_result.x[2],
        'Objective Value': -genetic_result.fun,
        'Lambda Regularization': lambda_reg,
        'Yield Improvement': yield_improvement,
        'Nutrient Efficiency': nutrient_efficiency,
        'Environmental Impact': environmental_impact
    })

    # Particle Swarm Optimization
    print('Running Particle Swarm Optimization')
    pso_result = pso(
        func=lambda params: objective_function(params, crop, X_crop, y_crop, top_ensemble_models),
        lb=[bound[0] for bound in npk_bounds],
        ub=[bound[1] for bound in npk_bounds],
        swarmsize=30, # Increased from 15 to 30
        maxiter=1000
    )
    lambda_reg, predicted_yield, yield_improvement, nutrient_efficiency, environmental_impact = calculate_metrics(
        pso_result[0], crop, X_crop, y_crop
    )
    optimization_results.append({
        'Crop': crop,
        'Method': 'Particle Swarm Optimization',
        'Optimized N': pso_result[0][0],
        'Optimized P': pso_result[0][1],
        'Optimized K': pso_result[0][2],
        'Objective Value': -pso_result[1],
        'Lambda Regularization': lambda_reg,
        'Yield Improvement': yield_improvement,
        'Nutrient Efficiency': nutrient_efficiency,
        'Environmental Impact': environmental_impact
    })

    # Simulated Annealing
    print('Running Simulated Annealing')
    simulated_result = dual_annealing(
        func=lambda params: objective_function(params, crop, X_crop, y_crop, top_ensemble_models),
        bounds=npk_bounds,
        maxiter=1000
    )
    lambda_reg, predicted_yield, yield_improvement, nutrient_efficiency, environmental_impact = calculate_metrics(
        simulated_result.x, crop, X_crop, y_crop
    )
    optimization_results.append({
        'Crop': crop,
        'Method': 'Simulated Annealing',
        'Optimized N': simulated_result.x[0],
        'Optimized P': simulated_result.x[1],
        'Optimized K': simulated_result.x[2],
        'Objective Value': -simulated_result.fun,
        'Yield Improvement': yield_improvement,
        'Nutrient Efficiency': nutrient_efficiency,
        'Environmental Impact': environmental_impact
    })

    # Bayesian Optimization
    print('Running Bayesian Optimization')
    bayesian_result = bayesian_optimizer(crop, X_crop, y_crop, top_ensemble_models)
    lambda_reg, predicted_yield, yield_improvement, nutrient_efficiency, environmental_impact = calculate_metrics(
        bayesian_result.x, crop, X_crop, y_crop
    )
    optimization_results.append({
        'Crop': crop,
        'Method': 'Bayesian Optimization',
        'Optimized N': bayesian_result.x[0],
        'Optimized P': bayesian_result.x[1],
        'Optimized K': bayesian_result.x[2],
        'Objective Value': -bayesian_result.fun,
        'Lambda Regularization': lambda_reg,
        'Yield Improvement': yield_improvement,
        'Nutrient Efficiency': nutrient_efficiency,
        'Environmental Impact': environmental_impact
    })
    
    # Bayesian-Evolutionary Hybrid
    print('Running Bayesian-Evolutionary Hybrid')
    hybrid_result = bayesian_evolutionary_hybrid(crop, X_crop, y_crop, top_ensemble_models)
    lambda_reg, predicted_yield, yield_improvement, nutrient_efficiency, environmental_impact = calculate_metrics(
        hybrid_result.x, crop, X_crop, y_crop
    )
    optimization_results.append({
        'Crop': crop,
        'Method': 'Bayesian-Evolutionary Hybrid',
        'Optimized N': hybrid_result.x[0],
        'Optimized P': hybrid_result.x[1],
        'Optimized K': hybrid_result.x[2],
        'Objective Value': -hybrid_result.fun,
        'Yield Improvement': yield_improvement,
        'Nutrient Efficiency': nutrient_efficiency,
        'Environmental Impact': environmental_impact
    })

    # Gradient-Assisted Evolutionary Algorithm
    print('Running Gradient-Assisted Evolutionary Algorithm')
    gradient_assisted_result = gradient_assisted_evolutionary(crop, X_crop, y_crop, top_ensemble_models)
    lambda_reg, predicted_yield, yield_improvement, nutrient_efficiency, environmental_impact = calculate_metrics(
        gradient_assisted_result.x, crop, X_crop, y_crop
    )
    optimization_results.append({
        'Crop': crop,
        'Method': 'Gradient-Assisted Evolutionary',
        'Optimized N': gradient_assisted_result.x[0],
        'Optimized P': gradient_assisted_result.x[1],
        'Optimized K': gradient_assisted_result.x[2],
        'Objective Value': -gradient_assisted_result.fun,
        'Lambda Regularization': lambda_reg,
        'Yield Improvement': yield_improvement,
        'Nutrient Efficiency': nutrient_efficiency,
        'Environmental Impact': environmental_impact
    })

    # Adaptive Differential Evolution
    print('Running Adaptive Differential Evolution')
    adaptive_result = adaptive_differential_evolution(crop, X_crop, y_crop, top_ensemble_models)
    lambda_reg, predicted_yield, yield_improvement, nutrient_efficiency, environmental_impact = calculate_metrics(
        adaptive_result.x, crop, X_crop, y_crop
    )
    optimization_results.append({
        'Crop': crop,
        'Method': 'Adaptive Differential Evolution',
        'Optimized N': adaptive_result.x[0],
        'Optimized P': adaptive_result.x[1],
        'Optimized K': adaptive_result.x[2],
        'Objective Value': -adaptive_result.fun,
        'Lambda Regularization': lambda_reg,
        'Yield Improvement': yield_improvement,
        'Nutrient Efficiency': nutrient_efficiency,
        'Environmental Impact': environmental_impact
    })

    # Policy-Based Optimization
    print('Running Policy-Based Optimization')
    policy_params, policy_reward = policy_based(crop, X_crop, y_crop, top_ensemble_models)
    lambda_reg, predicted_yield, yield_improvement, nutrient_efficiency, environmental_impact = calculate_metrics(
        policy_params, crop, X_crop, y_crop
    )
    optimization_results.append({
        'Crop': crop,
        'Method': 'Policy-Based Optimization',
        'Optimized N': policy_params[0],
        'Optimized P': policy_params[1],
        'Optimized K': policy_params[2],
        'Objective Value': policy_reward,
        'Lambda Regularization': lambda_reg,
        'Yield Improvement': yield_improvement,
        'Nutrient Efficiency': nutrient_efficiency,
        'Environmental Impact': environmental_impact
    })
    
    # Q-Learning Optimization
    print('Running Q-Learning Optimization')
    q_params, q_reward = q_learning(crop, X_crop, y_crop, top_ensemble_models)
    lambda_reg, predicted_yield, yield_improvement, nutrient_efficiency, environmental_impact = calculate_metrics(
        q_params, crop, X_crop, y_crop
    )
    optimization_results.append({
        'Crop': crop,
        'Method': 'Q-Learning Optimization',
        'Optimized N': q_params[0],
        'Optimized P': q_params[1],
        'Optimized K': q_params[2],
        'Objective Value': q_reward,
        'Lambda Regularization': lambda_reg,
        'Yield Improvement': yield_improvement,
        'Nutrient Efficiency': nutrient_efficiency,
        'Environmental Impact': environmental_impact
    })

optimization_results_df = pd.DataFrame(optimization_results)

In [None]:
optimization_results_df

In [None]:
# Group by 'Method' and sum the columns
optimization_method_sums_df = optimization_results_df.groupby("Method").sum(numeric_only=True).reset_index()
optimization_method_sums_df[['Method', 'Objective Value', 'Yield Improvement', 'Nutrient Efficiency', 'Environmental Impact']].sort_values(by="Objective Value", ascending=False)

In [None]:
# Normalize and format Method names in title case
optimization_results_df['Method'] = optimization_results_df['Method'].str.strip().str.title()
average_values = (
    optimization_results_df.groupby('Method', as_index=False)['Objective Value']
    .mean()
    .sort_values(by='Method')  # Sort explicitly by Method
    .reset_index(drop=True)
)
average_values.rename(columns={'Objective Value': 'Average Objective Value'}, inplace=True)
average_values['Method'] = average_values['Method'].str.strip().str.title()


# Extract unique methods in the order of the bar plot
unique_methods = optimization_results_df['Method'].unique()

# Check if all methods are aligned
if not all(average_values['Method'].values == unique_methods):
    print("Mismatch detected! Correcting alignment explicitly...")
    average_values = average_values.set_index('Method').reindex(unique_methods).reset_index()

# Plot the bar chart
plt.figure(figsize=(12, 7))
sns.barplot(x='Method', y='Objective Value', hue='Crop', data=optimization_results_df, ci=None)
plt.title('Objective Value Across Methods and Crops', fontsize=16)
plt.xlabel('Optimization Method', fontsize=12)
plt.ylabel('Objective Value', fontsize=12)
plt.xticks(rotation=45, fontsize=10, ha='right')
plt.yticks(fontsize=10)
plt.legend(bbox_to_anchor=(1.05, 1), title='Crop', loc='upper left', fontsize=10)

# Overlay the average values as small black rhombuses and connect them with a line
x_coords = range(len(unique_methods))  # Align with bar positions
plt.plot(x_coords, average_values['Average Objective Value'], color='black', linestyle='-', linewidth=1, label='Average Objective Value')
plt.scatter(x_coords, average_values['Average Objective Value'], color='black', marker='D', s=40)

# Add a legend for the averages
plt.legend(bbox_to_anchor=(1.05, 0.5), loc='center left', fontsize=10)

# Save the plot
plot_path = os.path.join(optimization_dir, "objective-value-across-methods-and-crops-with-averages.png")
plt.tight_layout()
plt.savefig(plot_path, dpi=300)

# Show the plot
plt.show()

In [None]:
# Normalize and format Method names in title case
optimization_results_df['Method'] = optimization_results_df['Method'].str.strip().str.title()

average_yield_improvement = (
    optimization_results_df.groupby('Method', as_index=False)['Yield Improvement']
    .mean()
    .sort_values(by='Method')  # Sort explicitly by Method
    .reset_index(drop=True)
)
average_yield_improvement.rename(columns={'Yield Improvement': 'Average Yield Improvement'}, inplace=True)

average_yield_improvement['Method'] = average_yield_improvement['Method'].str.strip().str.title()

# Extract unique methods in the order of the bar plot
unique_methods = optimization_results_df['Method'].unique()

# Check if all methods are aligned
if not all(average_yield_improvement['Method'].values == unique_methods):
    print("Mismatch detected! Correcting alignment explicitly...")
    average_yield_improvement = average_yield_improvement.set_index('Method').reindex(unique_methods).reset_index()

# Plot the bar chart
plt.figure(figsize=(12, 7))
sns.barplot(x='Method', y='Yield Improvement', hue='Crop', data=optimization_results_df, ci=None)
plt.title('Yield Improvement Across Methods and Crops', fontsize=16)
plt.xlabel('Optimization Method', fontsize=12)
plt.ylabel('Yield Improvement (kg/ha)', fontsize=12)
plt.xticks(rotation=45, fontsize=10, ha='right')
plt.yticks(fontsize=10)
plt.legend(bbox_to_anchor=(1.05, 1), title='Crop', loc='upper left', fontsize=10)

# Overlay the average values as small black rhombuses and connect them with a line
x_coords = range(len(unique_methods))  # Align with bar positions
plt.plot(
    x_coords,
    average_yield_improvement['Average Yield Improvement'],
    color='black',
    linestyle='-',
    linewidth=1,
    label='Average Yield Improvement',
)
plt.scatter(
    x_coords,
    average_yield_improvement['Average Yield Improvement'],
    color='black',
    marker='D',
    s=40,
)

# Add a legend for the averages
plt.legend(bbox_to_anchor=(1.05, 0.5), loc='center left', fontsize=10)

# Save the plot
plot_path = os.path.join(optimization_dir, "yield-improvement-across-methods-and-crops-with-averages.png")
plt.tight_layout()
plt.savefig(plot_path, dpi=300)

# Show the plot
plt.show()

In [None]:
# Normalize and format Method names in title case
optimization_results_df['Method'] = optimization_results_df['Method'].str.strip().str.title()

average_efficiency = (
    optimization_results_df.groupby('Method', as_index=False)['Nutrient Efficiency']
    .mean()
    .sort_values(by='Method')  # Sort explicitly by Method
    .reset_index(drop=True)
)
average_efficiency.rename(columns={'Nutrient Efficiency': 'Average Nutrient Efficiency'}, inplace=True)
average_efficiency['Method'] = average_efficiency['Method'].str.strip().str.title()

# Extract unique methods in the order of the bar plot
unique_methods = optimization_results_df['Method'].unique()

# Check if all methods are aligned
if not all(average_efficiency['Method'].values == unique_methods):
    print("Mismatch detected! Correcting alignment explicitly...")
    average_efficiency = average_efficiency.set_index('Method').reindex(unique_methods).reset_index()

# Plot the bar chart
plt.figure(figsize=(12, 7))
sns.barplot(x='Method', y='Nutrient Efficiency', hue='Crop', data=optimization_results_df, ci=None)
plt.title('Nutrient Efficiency Across Methods and Crops', fontsize=16)
plt.xlabel('Optimization Method', fontsize=12)
plt.ylabel('Nutrient Efficiency (Yield/Total NPK)', fontsize=12)
plt.xticks(rotation=45, fontsize=10, ha='right')
plt.yticks(fontsize=10)
plt.legend(bbox_to_anchor=(1.05, 1), title='Crop', loc='upper left', fontsize=10)

# Overlay the average values as small black rhombuses and connect them with a line
x_coords = range(len(unique_methods))  # Align with bar positions
plt.plot(
    x_coords,
    average_efficiency['Average Nutrient Efficiency'],
    color='black',
    linestyle='-',
    linewidth=1,
    label='Average Nutrient Efficiency',
)
plt.scatter(
    x_coords,
    average_efficiency['Average Nutrient Efficiency'],
    color='black',
    marker='D',
    s=40,
)

# Add a legend for the averages
plt.legend(bbox_to_anchor=(1.05, 0.5), loc='center left', fontsize=10)

# Save the plot
plot_path = os.path.join(optimization_dir, "nutrient-efficiency-across-methods-and-crops-with-averages.png")
plt.tight_layout()
plt.savefig(plot_path, dpi=300)

# Show the plot
plt.show()

In [None]:
# Normalize and format Method names in title case
optimization_results_df['Method'] = optimization_results_df['Method'].str.strip().str.title()

# Calculate the average Environmental Impact for each method
average_environmental_impact = (
    optimization_results_df.groupby('Method', as_index=False)['Environmental Impact']
    .mean()
    .sort_values(by='Method')  # Sort explicitly by Method
    .reset_index(drop=True)
)
average_environmental_impact.rename(columns={'Environmental Impact': 'Average Environmental Impact'}, inplace=True)
average_environmental_impact['Method'] = average_environmental_impact['Method'].str.strip().str.title()

# Extract unique methods in the order of the bar plot
unique_methods = optimization_results_df['Method'].unique()

# Check if all methods are aligned
if not all(average_environmental_impact['Method'].values == unique_methods):
    print("Mismatch detected! Correcting alignment explicitly...")
    average_environmental_impact = average_environmental_impact.set_index('Method').reindex(unique_methods).reset_index()

# Plot the bar chart
plt.figure(figsize=(12, 7))
sns.barplot(x='Method', y='Environmental Impact', hue='Crop', data=optimization_results_df, ci=None)
plt.title('Environmental Impact Across Methods and Crops', fontsize=16)
plt.xlabel('Optimization Method', fontsize=12)
plt.ylabel('Environmental Impact (Total NPK)', fontsize=12)
plt.xticks(rotation=45, fontsize=10, ha='right')
plt.yticks(fontsize=10)
plt.legend(bbox_to_anchor=(1.05, 1), title='Crop', loc='upper left', fontsize=10)

# Overlay the average values as small black rhombuses and connect them with a line
x_coords = range(len(unique_methods))  # Align with bar positions
plt.plot(
    x_coords,
    average_environmental_impact['Average Environmental Impact'],
    color='black',
    linestyle='-',
    linewidth=1,
    label='Average Environmental Impact',
)
plt.scatter(
    x_coords,
    average_environmental_impact['Average Environmental Impact'],
    color='black',
    marker='D',
    s=40,  # Smaller rhombus
)

# Add a legend for the averages
plt.legend(bbox_to_anchor=(1.05, 0.5), loc='center left', fontsize=10)

# Save the plot
plot_path = os.path.join(optimization_dir, "environmental-impact-across-methods-and-crops-with-averages.png")
plt.tight_layout()
plt.savefig(plot_path, dpi=300)

# Show the plot
plt.show()

In [None]:
def _grid_mask_smooth(x, y, z, nx=150, ny=150, smooth_frac=0.3, gauss_sigma=0.8, rbf_func='thin_plate'):
    """Interpolate with RBF, mask outside convex hull, lightly smooth."""
    gx = np.linspace(x.min(), x.max(), nx)
    gy = np.linspace(y.min(), y.max(), ny)
    GX, GY = np.meshgrid(gx, gy)

    rbf = Rbf(x, y, z, function=rbf_func, smooth=z.std() * smooth_frac if np.std(z) > 0 else 1e-6)
    GZ = rbf(GX, GY)

    hull = Delaunay(np.c_[x, y])
    outside = hull.find_simplex(np.c_[GX.ravel(), GY.ravel()]) < 0
    GZ = GZ.astype(float)
    GZ.ravel()[outside] = np.nan

    if gauss_sigma and gauss_sigma > 0:
        GZ = gaussian_filter(GZ, sigma=gauss_sigma)
    return GX, GY, GZ

def _panel_norm(GZ, mode='sequential', vcenter=0.0, q=(2, 98)):
    """Per-panel robust normalization from gridded values."""
    v = GZ[~np.isnan(GZ)]
    if v.size == 0:
        # fallback to something sane
        return Normalize(vmin=0, vmax=1)
    vmin, vmax = np.percentile(v, q)
    if mode == 'diverging':
        return TwoSlopeNorm(vmin=vmin, vcenter=vcenter, vmax=vmax)
    return Normalize(vmin=vmin, vmax=vmax)

def plot_npk_objective_surfaces_paper(crop_data, crop,
                                      cmap='viridis',
                                      mode='sequential',  # 'sequential' or 'diverging'
                                      vcenter=0.0,
                                      save_dir='optimization'):
    # Inputs
    N = crop_data['Optimized N'].to_numpy()
    P = crop_data['Optimized P'].to_numpy()
    K = crop_data['Optimized K'].to_numpy()
    Z = crop_data['Objective Value'].to_numpy()

    # Build three surfaces
    GX_np, GY_np, GZ_np = _grid_mask_smooth(N, P, Z)
    GX_nk, GY_nk, GZ_nk = _grid_mask_smooth(N, K, Z)
    GX_pk, GY_pk, GZ_pk = _grid_mask_smooth(P, K, Z)

    # Figure
    fig = plt.figure(figsize=(18, 6))
    plt.subplots_adjust(left=0.05, right=0.95, wspace=0.45)

    # Helper to plot a single panel with its OWN norm
    def _plot(ax, GX, GY, GZ, title, xl, yl):
        norm = _panel_norm(GZ, mode=mode, vcenter=vcenter, q=(2, 98))
        surf = ax.plot_surface(GX, GY, GZ,
                               cmap=cmap, norm=norm,
                               edgecolor='none', antialiased=True, shade=False,
                               rstride=1, cstride=1, linewidth=0)
        ax.set_title(title, fontsize=14)
        ax.set_xlabel(xl, fontsize=12)
        ax.set_ylabel(yl, fontsize=12)
        ax.set_zlabel('Objective value', fontsize=12)
        # Colorbar
        cbar = fig.colorbar(surf, ax=ax, shrink=0.55, pad=0.10)
        cbar.set_label('Objective value', fontsize=10)
        cbar.locator = mticker.MaxNLocator(5)
        cbar.formatter = mticker.ScalarFormatter(useMathText=True)
        cbar.update_ticks()
        # A consistent, readable view
        ax.view_init(elev=28, azim=-55)

    # Panels (each gets its own color scale)
    ax1 = fig.add_subplot(131, projection='3d')
    _plot(ax1, GX_np, GY_np, GZ_np, f'N-P Objective Surface for {crop}', 'N (kg/ha)', 'P (kg/ha)')

    ax2 = fig.add_subplot(132, projection='3d')
    _plot(ax2, GX_nk, GY_nk, GZ_nk, f'N-K Objective Surface for {crop}', 'N (kg/ha)', 'K (kg/ha)')

    ax3 = fig.add_subplot(133, projection='3d')
    _plot(ax3, GX_pk, GY_pk, GZ_pk, f'P-K Objective Surface for {crop}', 'P (kg/ha)', 'K (kg/ha)')

    plt.tight_layout()
    import os
    os.makedirs(save_dir, exist_ok=True)
    plt.savefig(f'{save_dir}/{crop}_3D_NPK_Objective_Surfaces_PERPANEL.png', dpi=240)
    plt.show()

for crop in optimization_results_df['Crop'].unique():
    crop_df = optimization_results_df[optimization_results_df['Crop'] == crop]
    plot_npk_objective_surfaces_paper(crop_df, crop,
                                      cmap='viridis',      # or 'cividis' (colorblind-safe)
                                      mode='sequential',   # or 'diverging'
                                      vcenter=0.0,
                                      save_dir='optimization')