# a-kncn using selected features

In [1]:
#a-kncn using selected features
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import accuracy_score

# Load data from CSV file
data = pd.read_csv("crt_data.csv")

# Split data into features (independent variables) and target (dependent variable)
X = data[['N', 'P', 'pH','Area_in_hectares']]
y = data['Soil_Type']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Fit KMeans clustering to training data to find centroids for each class
kmeans = KMeans(n_clusters=len(data['Soil_Type'].unique()),random_state=42)
kmeans.fit(X_train)
centroids = kmeans.cluster_centers_

# Fit Nearest Neighbors model
nbrs = NearestNeighbors(n_neighbors=1, algorithm='brute').fit(centroids)

# Classify test data
predicted_labels = []
for x in X_test.values:
    distances, indices = nbrs.kneighbors([x])
    predicted_labels.append(y_train.iloc[indices[0][0]])

# Calculate accuracy
accuracy = accuracy_score(y_test, predicted_labels)
print("Accuracy:", accuracy)


Accuracy: 0.4125114995400184


# akncn-ELM-BOA:(Extreme learning machine-Butterfly optimisation algorithm)

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import accuracy_score, mean_absolute_error, r2_score, median_absolute_error, explained_variance_score, mean_squared_error, 
mean_squared_log_error
from sklearn.preprocessing import MinMaxScaler

# Extreme Learning Machine (ELM) Implementation
class ELMRegressorCustom:
    def __init__(self, n_hidden):
        self.n_hidden = n_hidden

    def fit(self, X_train, y_train):
        n_samples, n_features = X_train.shape
        self.input_weights = np.random.randn(n_features, self.n_hidden)
        H = np.dot(X_train, self.input_weights)
        self.output_weights = np.dot(np.linalg.pinv(H), y_train)

    def predict(self, X_test):
        H = np.dot(X_test, self.input_weights)
        y_pred = np.dot(H, self.output_weights)
        return y_pred

# Butterfly Optimization Algorithm (BOA) Implementation
class BOACustom:
    def __init__(self, max_iter, pop_size):
        self.max_iter = max_iter
        self.pop_size = pop_size

    def _initialize_population(self, n_features):
        return np.random.uniform(-1, 1, size=(self.pop_size, n_features))

    def _evaluate_fitness(self, X_train, y_train, population):
        fitness = []
        for individual in population:
            elm = ELMRegressorCustom(n_hidden=len(X_train[0]))
            elm.fit(X_train, y_train)
            y_pred = elm.predict(X_train)
            fitness.append(mean_squared_error(y_train, y_pred))
        return np.array(fitness)

    def optimize(self, X_train, y_train):
        n_features = len(X_train[0])
        population = self._initialize_population(n_features)
        for _ in range(self.max_iter):
            fitness = self._evaluate_fitness(X_train, y_train, population)
            sorted_indices = np.argsort(fitness)
            best_individual = population[sorted_indices[0]]
            new_population = [best_individual]
            for i in range(1, self.pop_size):
                new_individual = best_individual + np.random.normal(scale=0.1, size=n_features)
                new_population.append(new_individual)
            population = np.array(new_population)
        return {'n_hidden': len(X_train[0])}

# Load data from CSV file
data = pd.read_csv("crt_data.csv")

# Split data into features (independent variables) and target (dependent variable)
X = data[['N', 'P', 'pH', 'Area_in_hectares']]
y = data['Soil_Type']

# Split data into training and testing sets for a-KNCN
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Fit KMeans clustering to training data to find centroids for each class
kmeans = KMeans(n_clusters=len(data['Soil_Type'].unique()), random_state=42)
kmeans.fit(X_train)
centroids = kmeans.cluster_centers_

# Fit Nearest Neighbors model
nbrs = NearestNeighbors(n_neighbors=1, algorithm='brute').fit(centroids)

# Classify test data using a-KNCN
predicted_labels = []
for x in X_test.values:
    distances, indices = nbrs.kneighbors([x])
    predicted_labels.append(y_train.iloc[indices[0][0]])

# Calculate accuracy for a-KNCN
accuracy = accuracy_score(y_test, predicted_labels)
print("Accuracy of a-KNCN:", accuracy)

# Prepare data for ELM
X_ELM = data[['Soil_Type', 'rainfall', 'temperature']]
y_ELM = data['Yield_ton_per_hec']

# Normalize data for ELM
scaler = MinMaxScaler()
X_ELM_normalized = scaler.fit_transform(X_ELM)

# Split data into training and testing sets for ELM
X_train_ELM, X_test_ELM, y_train_ELM, y_test_ELM = train_test_split(X_ELM_normalized, y_ELM, test_size=0.3, random_state=42)

# Define and optimize ELM model using Butterfly Optimization Algorithm
boa = BOACustom(max_iter=100, pop_size=50)
best_params = boa.optimize(X_train_ELM, y_train_ELM)

# Train ELM model with optimized parameters
elm = ELMRegressorCustom(**best_params)
elm.fit(X_train_ELM, y_train_ELM)

# Predict using trained ELM model
y_pred_ELM = elm.predict(X_test_ELM)

# Error metrics calculation for ELM
mae = mean_absolute_error(y_test_ELM, y_pred_ELM)
r2 = r2_score(y_test_ELM, y_pred_ELM)
medae = median_absolute_error(y_test_ELM, y_pred_ELM)
evs = explained_variance_score(y_test_ELM, y_pred_ELM)
msle = mean_squared_log_error(y_test_ELM, y_pred_ELM)
mse = mean_squared_error(y_test_ELM, y_pred_ELM)
rmse = mse ** 0.5

# Calculate mean absolute percentage error (MAPE)
mape = np.mean(np.abs((y_test_ELM - y_pred_ELM) / np.clip(np.abs(y_test_ELM), 1e-1, None))) * 100

# Error metrics normalization
mae_norm = mae / (y_ELM.max() - y_ELM.min())
r2_norm = (r2 + 1) / 2  # Ranges from -1 to 1, normalize to 0 to 1
medae_norm = medae / (y_ELM.max() - y_ELM.min())
evs_norm = (evs + 1) / 2  # Ranges from -1 to 1, normalize to 0 to 1
msle_norm = msle / (y_ELM.max() - y_ELM.min())  # Assuming positive target values
mse_norm = mse / ((y_ELM.max() - y_ELM.min()) ** 2)
rmse_norm = rmse / (y_ELM.max() - y_ELM.min())
mape_norm = mape / 100  # Percentage to decimal

# Print error metrics
print("Mean Absolute Error:", mae_norm)
print("R2 Score:", r2_norm)
print("Median Absolute Error:", medae_norm)
print("Explained Variance Score:", evs_norm)
print("Mean Squared Log Error:", msle_norm)
print("Mean Squared Error:", mse_norm)
print("Root Mean Squared Error:", rmse_norm)
print("Mean Absolute Percentage Error:", mape_norm)


Accuracy of a-KNCN: 0.4125114995400184
Mean Absolute Error: 0.0003578695885072519
R2 Score: 0.5010712286426116
Median Absolute Error: 0.00014665084704878006
Explained Variance Score: 0.5010822368989825
Mean Squared Log Error: 7.031190107495536e-05
Mean Squared Error: 3.79223679023735e-05
Root Mean Squared Error: 0.006158113989069502
Mean Absolute Percentage Error: 4.103119425051535


# aKNC-RF(k neighbour classifier-Random forest)

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, mean_absolute_error, r2_score, median_absolute_error, explained_variance_score, mean_squared_log_error, 
mean_squared_error
import numpy as np

# Load the data
data = pd.read_csv("crt_data.csv")

# Split the data into features (X) and target variable (y) for classification
X_classification = data.drop('Soil_Type', axis=1)
y_classification = data['Soil_Type']

# Train the aKNC model for classification
aknc = KNeighborsClassifier(n_neighbors=5)
aknc.fit(X_classification, y_classification)

# Get the class labels from aKNC
class_labels = aknc.predict(X_classification)

# Add class labels as a feature to the dataset
data['class_labels'] = class_labels

# Split the data into features (X) and target variable (y) for prediction
X_prediction = data.drop('Yield_ton_per_hec', axis=1)
y_prediction = data['Yield_ton_per_hec']

# Split the data into training and testing sets for prediction
X_train, X_test, y_train, y_test = train_test_split(X_prediction, y_prediction, test_size=0.2, random_state=42)

# Train the Random Forest model for prediction
rf_model = RandomForestRegressor(n_estimators=200, max_depth=10, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions using the combined model
predictions = rf_model.predict(X_test)

# Calculate accuracy for classification 
accuracy = accuracy_score(y_classification, class_labels)

# Calculate error metrics for regression
mae = mean_absolute_error(y_test, predictions)
r_squared = r2_score(y_test, predictions)
medae = median_absolute_error(y_test, predictions)
evs = explained_variance_score(y_test, predictions)
msle = mean_squared_log_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
mape = np.mean(np.abs((y_test - predictions) / np.clip(np.abs(y_test), 1e-4, None)))
mape /= np.max(y_test)

# Normalize error metrics to range [0, 1]
max_y = np.max(y_test)
mse /= max_y ** 2
rmse /= max_y

# Print error metrics
print("Accuracy:", accuracy)
print("Mean Absolute Error (MAE):", mae)
print("R-squared:", r_squared)
print("Median Absolute Error (MedAE):", medae)
print("Explained Variance Score (EVS):", evs)
print("Mean Squared Logarithmic Error (MSLE):", msle)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("Mean Absolute Percentage Error (MAPE):", mape)


Accuracy: 0.8789604998785631
Mean Absolute Error (MAE): 0.8101421177498689
R-squared: 0.8086717686260186
Median Absolute Error (MedAE): 0.3405273030444411
Explained Variance Score (EVS): 0.8086717777890843
Mean Squared Logarithmic Error (MSLE): 0.06285719753742476
Mean Squared Error (MSE): 0.00019974241444261942
Root Mean Squared Error (RMSE): 0.014133025664825612
Mean Absolute Percentage Error (MAPE): 0.153546500928083


# aKNC-GB(k neighbour classifier-gradient boost)

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, mean_absolute_error, r2_score, median_absolute_error, explained_variance_score, mean_squared_error, mean_squared_log_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load the data
data = pd.read_csv("crt_data.csv")

# Ensure target variable (Yield_ton_per_hec) is non-negative
data['Yield_ton_per_hec'] = data['Yield_ton_per_hec'].clip(lower=0)

# Split the data into features (X) and target variable (y) for classification
X_classification = data.drop('Soil_Type', axis=1)
y_classification = data['Soil_Type']

# Train the aKNC model for classification
aknc = KNeighborsClassifier(n_neighbors=5)
aknc.fit(X_classification, y_classification)

# Get the class labels from aKNC
class_labels = aknc.predict(X_classification)

# Add class labels as a feature to the dataset
data['class_labels'] = class_labels

# Split the data into features (X) and target variable (y) for prediction
X_prediction = data.drop('Yield_ton_per_hec', axis=1)
y_prediction = data['Yield_ton_per_hec']

# Split the data into training and testing sets for prediction
X_train, X_test, y_train, y_test = train_test_split(X_prediction, y_prediction, test_size=0.2, random_state=42)

# Train the Gradient Boosting model for prediction
gb_model = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1,max_depth=5,random_state=42)
gb_model.fit(X_train, y_train)

# Make predictions using the combined model
predictions = gb_model.predict(X_test)

# Ensure predictions are non-negative
predictions = np.maximum(predictions, 0)

# Calculate accuracy for classification
accuracy = accuracy_score(y_classification, class_labels)

# Calculate error metrics for regression
mae = mean_absolute_error(y_test, predictions)
r_squared = r2_score(y_test, predictions)
medae = median_absolute_error(y_test, predictions)
evs = explained_variance_score(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)

# Normalize MSE, RMSE, and MSLE to range [0, 1]
max_y = np.max(y_test)
mse /= max_y ** 2
rmse /= max_y
msle = mean_squared_log_error(y_test, predictions)
msle_rescaled = np.mean(msle) / np.log(1 + max_y)

# Calculate Mean Absolute Percentage Error (MAPE)
mape = np.mean(np.abs((y_test - predictions) / np.clip(np.abs(y_test), 1e-4, None)))
mape /= np.max(y_test)

# Print accuracy and error metrics
print("Accuracy:", accuracy)
print("Mean Absolute Error (MAE):", mae)
print("R-squared:", r_squared)
print("Median Absolute Error (MedAE):", medae)
print("Explained Variance Score (EVS):", evs)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("Mean Squared Logarithmic Error (MSLE):", msle_rescaled)
print("Mean Absolute Percentage Error (MAPE):", mape)


Accuracy: 0.8789604998785631
Mean Absolute Error (MAE): 0.39185149864359886
R-squared: 0.9706123793957174
Median Absolute Error (MedAE): 0.15137676388456955
Explained Variance Score (EVS): 0.9706125444671301
Mean Squared Error (MSE): 3.068002172010528e-05
Root Mean Squared Error (RMSE): 0.005538954930319011
Mean Squared Logarithmic Error (MSLE): 0.003098843563190742
Mean Absolute Percentage Error (MAPE): 0.08124104156954418


# aKNC-ANN(k neighbour classifier-artificial neural network)

In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, mean_absolute_error, r2_score, median_absolute_error, explained_variance_score, mean_squared_error, 
mean_squared_log_error
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor

# Load the data
data = pd.read_csv("crt_data.csv")

# Split the data into features (X) and target variable (y) for classification
X_classification = data.drop('Soil_Type', axis=1)
y_classification = data['Soil_Type']

# Train the KNC model for classification
aknc = KNeighborsClassifier(n_neighbors=5)
aknc.fit(X_classification, y_classification)

# Get the class labels from aKNC
class_labels = aknc.predict(X_classification)

# Add class labels as a feature to the dataset
data['class_labels'] = class_labels

# Split the data into features (X) and target variable (y) for prediction
X_prediction = data.drop('Yield_ton_per_hec', axis=1)
y_prediction = data['Yield_ton_per_hec']

# Split the data into training and testing sets for prediction
X_train, X_test, y_train, y_test = train_test_split(X_prediction, y_prediction, test_size=0.2, random_state=42)

# Standardize features for prediction
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the ANN model for prediction
ann_model = MLPRegressor(hidden_layer_sizes=(100,), activation='relu', solver='adam', random_state=42)
ann_model.fit(X_train_scaled, y_train)

# Make predictions using the ANN model
predictions = ann_model.predict(X_test_scaled)

# Ensure predictions and ground truth values are non-negative
predictions = np.maximum(predictions, 0)
y_test = np.maximum(y_test, 0)

# Calculate accuracy for classification
accuracy = accuracy_score(y_classification, class_labels)

# Calculate error metrics for regression
mae = mean_absolute_error(y_test, predictions)
r_squared = r2_score(y_test, predictions)
medae = median_absolute_error(y_test, predictions)
evs = explained_variance_score(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
max_y = np.max(y_test)
mse /= max_y ** 2
rmse /= max_y
msle_re = mean_squared_log_error(y_test, predictions)
msle = np.mean(msle_re) / np.log(1 + max_y)

# Calculate Mean Absolute Percentage Error (MAPE)
mape = np.mean(np.abs((y_test - predictions) / np.clip(np.abs(y_test), 1e-4, None)))
mape /= np.max(y_test)

# Print accuracy and error metrics
print("Accuracy:", accuracy)
print("Mean Absolute Error (MAE):", mae)
print("R-squared:", r_squared)
print("Median Absolute Error (MedAE):", medae)
print("Explained Variance Score (EVS):", evs)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("Mean Squared Logarithmic Error (MSLE):", msle)
print("Mean Absolute Percentage Error (MAPE):", mape)


Accuracy: 0.8789604998785631
Mean Absolute Error (MAE): 1.9553142893566406
R-squared: 0.5656779228659645
Median Absolute Error (MedAE): 0.884876262832373
Explained Variance Score (EVS): 0.5726323666455635
Mean Squared Error (MSE): 0.000453422580188463
Root Mean Squared Error (RMSE): 0.021293721614327146
Mean Squared Logarithmic Error (MSLE): 0.06435072044150483
Mean Absolute Percentage Error (MAPE): 3.856140969635657




 # aKNCN-ELM

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import accuracy_score, mean_absolute_error, r2_score, median_absolute_error, explained_variance_score, mean_squared_log_error, 
mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import numpy as np

class ELMRegressor:
    def __init__(self, input_size, hidden_layer_size):
        self.input_size = input_size
        self.hidden_layer_size = hidden_layer_size
        self.input_weights = np.random.rand(input_size, hidden_layer_size)
        self.bias = np.random.rand(hidden_layer_size)
        self.output_weights = None

    def train(self, X, y):
        # Calculate hidden layer output
        hidden_output = np.dot(X, self.input_weights) + self.bias
        hidden_output = self._sigmoid(hidden_output)

        # Moore-Penrose pseudo-inverse to calculate output weights
        self.output_weights = np.dot(np.linalg.pinv(hidden_output), y)

    def predict(self, X):
        hidden_output = np.dot(X, self.input_weights) + self.bias
        hidden_output = self._sigmoid(hidden_output)
        return np.dot(hidden_output, self.output_weights)

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

# Load data from CSV file
data = pd.read_csv("crt_data.csv")

# Classification of soil types using KMeans and Nearest Neighbors
X_classification = data[['N', 'P', 'pH', 'Area_in_hectares']]
y_classification = data['Soil_Type']

X_train_classification, X_test_classification, y_train_classification, y_test_classification = train_test_split(X_classification, y_classification, 
                                                                                                                test_size=0.3, random_state=42)

kmeans = KMeans(n_clusters=len(data['Soil_Type'].unique()), random_state=42)
kmeans.fit(X_train_classification)
centroids = kmeans.cluster_centers_

nbrs = NearestNeighbors(n_neighbors=4, algorithm='brute').fit(centroids)

predicted_labels = []
for x in X_test_classification.values:
    distances, indices = nbrs.kneighbors([x])
    predicted_labels.append(y_train_classification.iloc[indices[0][0]])

accuracy = accuracy_score(y_test_classification, predicted_labels)
print("Accuracy:", accuracy)

# Predicting crop yield using Extreme Learning Machine (ELM)
X_regression = data[['Soil_Type', 'rainfall', 'temperature']]
y_regression = data['Yield_ton_per_hec']

X_train_regression, X_test_regression, y_train_regression, y_test_regression = train_test_split(X_regression, y_regression, test_size=0.3, 
                                                                                                random_state=42)

# Scale features
scaler = MinMaxScaler()
X_train_regression_scaled = scaler.fit_transform(X_train_regression)
X_test_regression_scaled = scaler.transform(X_test_regression)

# Train ELM model
elm = ELMRegressor(input_size=X_train_regression_scaled.shape[1], hidden_layer_size=80)  # Adjust hidden_layer_size as needed
elm.train(X_train_regression_scaled, y_train_regression)

# Predict crop yield
y_pred_regression = elm.predict(X_test_regression_scaled)

# Transform target values to ensure they are non-negative
y_pred_regression_transformed = y_pred_regression - y_pred_regression.min() + 1

# Calculate error metrics
mae = mean_absolute_error(y_test_regression, y_pred_regression) / np.max(y_test_regression)
r2 = r2_score(y_test_regression, y_pred_regression)
medae = median_absolute_error(y_test_regression, y_pred_regression) / np.max(y_test_regression)
evs = explained_variance_score(y_test_regression, y_pred_regression)
msle = mean_squared_log_error(y_test_regression, y_pred_regression_transformed) / np.max(y_test_regression)
mse = mean_squared_error(y_test_regression, y_pred_regression) / np.max(y_test_regression)
rmse = np.sqrt(mse) / np.max(y_test_regression)

# Calculate Mean Absolute Percentage Error (MAPE)
mape = np.mean(np.abs((y_test_regression - y_pred_regression) / np.clip(np.abs(y_test_regression), 1e-4, None)))
mape = mape / np.max(y_test_regression)

print("Mean Absolute Error:", mae)
print("R^2 Score:", r2)
print("Median Absolute Error:", medae)
print("Explained Variance Score:", evs)
print("Mean Squared Log Error:", msle)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("Mean Absolute Percentage Error:", mape)


Accuracy: 0.4125114995400184
Mean Absolute Error: 0.0003357099677737959
R^2 Score: 0.004366158967688549
Median Absolute Error: 0.00012095560043299153
Explained Variance Score: 0.00439412073557599
Mean Squared Log Error: 0.00012024393010726097
Mean Squared Error: 0.37084885421591673
Root Mean Squared Error: 6.213382380968588e-05
Mean Absolute Percentage Error: 0.10548734561242336


 # aKNC-SVM

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_absolute_error, r2_score, median_absolute_error, explained_variance_score, mean_squared_log_error, 
mean_squared_error
from sklearn.decomposition import PCA

# Load dataset
data = pd.read_csv("crt_data.csv")  # Assuming you have a CSV file named "crt_data.csv"

# Split data into features (X) and target (y)
X = data[['N', 'P', 'pH', 'Area_in_hectares']]
y = data['Soil_Type']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train k-Nearest Neighbors Classifier
knc = KNeighborsClassifier()
knc.fit(X_train_scaled, y_train)

# Predict Soil_Type for test set
y_pred = knc.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy of k-Nearest Neighbors Classifier:", accuracy)

# Reduce dimensionality using PCA
pca = PCA(n_components=2)  # Choose appropriate number of components
X_reg_pca = pca.fit_transform(X)

# Split regression data into train and test sets
X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(X_reg_pca, y, test_size=0.2, random_state=42)

# Train Random Forest Regressor
rf_reg = RandomForestRegressor()
rf_reg.fit(X_reg_train, y_reg_train)

# Predict Soil_Type for test set
y_reg_pred = rf_reg.predict(X_reg_test)

# Error metrics
mae = mean_absolute_error(y_reg_test, y_reg_pred)
r2 = r2_score(y_reg_test, y_reg_pred)
medianae = median_absolute_error(y_reg_test, y_reg_pred)
evs = explained_variance_score(y_reg_test, y_reg_pred)
msle = mean_squared_log_error(y_reg_test, y_reg_pred)
mse = mean_squared_error(y_reg_test, y_reg_pred)
rmse = np.sqrt(mse)

# Calculate Mean Absolute Percentage Error (MAPE)
mape = np.mean(np.abs((y_reg_test - y_reg_pred) / np.clip(np.abs(y_reg_test),1e2, None)))

# Print error metrics
print("\nError Metrics:")
print("Mean Absolute Error:", mae)
print("R2 Score:", r2)
print("Median Absolute Error:", medianae)
print("Explained Variance Score:", evs)
print("Mean Squared Log Error:", msle)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("Mean Absolute Percentage Error:", mape)


Accuracy of k-Nearest Neighbors Classifier: 0.7274383176022521

Error Metrics:
Mean Absolute Error: 0.559131837080325
R2 Score: 0.44622379555260683
Median Absolute Error: 0.28
Explained Variance Score: 0.4464382441401469
Mean Squared Log Error: 0.208065281009776
Mean Squared Error: 0.7793203338602849
Root Mean Squared Error: 0.88279121759354
Mean Absolute Percentage Error: 0.005591318370803251


# aKNCN-ELM-mBOA(modified Butterfly Optimisation Algorithm)

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import accuracy_score, mean_absolute_error, r2_score, median_absolute_error, explained_variance_score, mean_squared_error,
mean_squared_log_error
from sklearn.preprocessing import MinMaxScaler
import time

# Extreme Learning Machine (ELM) Implementation
class ELMRegressorCustom:
    def __init__(self, n_hidden):
        self.n_hidden = n_hidden

    def fit(self, X_train, y_train):
        n_samples, n_features = X_train.shape
        self.input_weights = np.random.randn(n_features, self.n_hidden)
        H = np.dot(X_train, self.input_weights)
        self.output_weights = np.dot(np.linalg.pinv(H), y_train)

    def predict(self, X_test):
        H = np.dot(X_test, self.input_weights)
        y_pred = np.dot(H, self.output_weights)
        return y_pred

# Modified Butterfly Optimization Algorithm (MBOA) Implementation
class MBOACustom:
    def __init__(self, max_iter, pop_size, weighing_constant):
        self.max_iter = max_iter
        self.pop_size = pop_size
        self.weighing_constant = weighing_constant

    def _initialize_population(self, n_features):
        return np.random.uniform(-1, 1, size=(self.pop_size, n_features))

    def _evaluate_fitness(self, X_train, y_train, population, n_hidden):
        fitness = []
        for individual in population:
            elm = ELMRegressorCustom(n_hidden=n_hidden)
            elm.fit(X_train, y_train)
            y_pred = elm.predict(X_train)
            fitness.append(mean_squared_error(y_train, y_pred))
        return np.array(fitness)

    def _local_search(self, population):
        mutated_population = []
        for individual in population:
            new_individual = individual + np.random.normal(scale=self.weighing_constant, size=len(individual))
            mutated_population.append(new_individual)
        return np.array(mutated_population)

    def optimize(self, X_train, y_train, n_hidden):
        n_features = len(X_train[0])
        population = self._initialize_population(n_features)
        for _ in range(self.max_iter):
            fitness = self._evaluate_fitness(X_train, y_train, population, n_hidden)
            sorted_indices = np.argsort(fitness)
            best_individual = population[sorted_indices[0]]
            new_population = [best_individual]
            for i in range(1, self.pop_size):
                new_individual = best_individual + np.random.normal(scale=0.1, size=n_features)
                new_population.append(new_individual)
            local_population = self._local_search(population)
            population = np.concatenate((np.array(new_population), local_population))
        return {'n_hidden': n_hidden}

# Load data from CSV file
data = pd.read_csv("crt_data.csv")

# Split data into features (independent variables) and target (dependent variable)
X = data[['N', 'P', 'pH', 'Area_in_hectares']]
y = data['Soil_Type']

# Split data into training and testing sets for a-KNCN
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Fit KMeans clustering to training data to find centroids for each class
kmeans = KMeans(n_clusters=len(data['Soil_Type'].unique()), random_state=42)
kmeans.fit(X_train)
centroids = kmeans.cluster_centers_

# Fit Nearest Neighbors model
nbrs = NearestNeighbors(n_neighbors=1, algorithm='brute').fit(centroids)

# Classify test data using a-KNCN
predicted_labels = []
for x in X_test.values:
    distances, indices = nbrs.kneighbors([x])
    predicted_labels.append(y_train.iloc[indices[0][0]])

# Calculate accuracy for a-KNCN
accuracy = accuracy_score(y_test, predicted_labels)
print("Accuracy of a-KNCN:", accuracy)

# Prepare data for ELM
X_ELM = data[['Soil_Type', 'rainfall', 'temperature']]
y_ELM = data['Yield_ton_per_hec']

# Normalize data for ELM
scaler = MinMaxScaler()
X_ELM_normalized = scaler.fit_transform(X_ELM)

# Split data into training and testing sets for ELM
X_train_ELM, X_test_ELM, y_train_ELM, y_test_ELM = train_test_split(X_ELM_normalized, y_ELM, test_size=0.3, random_state=42)

# Define and optimize ELM model using Modified Butterfly Optimization Algorithm
start_time = time.time()
mboa = MBOACustom(max_iter=10, pop_size=10, weighing_constant=0.01)  # Adjusted parameters
best_params = mboa.optimize(X_train_ELM, y_train_ELM, n_hidden=10)

# Train ELM model with optimized parameters
elm = ELMRegressorCustom(**best_params)
elm.fit(X_train_ELM, y_train_ELM)

# Predict using trained ELM model
y_pred_ELM = elm.predict(X_test_ELM)

# Error metrics calculation for ELM
mae = mean_absolute_error(y_test_ELM, y_pred_ELM)
r2 = r2_score(y_test_ELM, y_pred_ELM)
medae = median_absolute_error(y_test_ELM, y_pred_ELM)
evs = explained_variance_score(y_test_ELM, y_pred_ELM)
msle = mean_squared_log_error(y_test_ELM, y_pred_ELM)
mse = mean_squared_error(y_test_ELM, y_pred_ELM)
rmse = mse ** 0.5


# Calculate mean absolute percentage error (MAPE)
mape = np.mean(np.abs((y_test_ELM - y_pred_ELM) / np.maximum(np.abs(y_test_ELM), 1))) * 100

# Error metrics normalization
mae_norm = mae / (y_ELM.max() - y_ELM.min())
r2_norm = (r2 + 1) / 2  # Ranges from -1 to 1, normalize to 0 to 1
medae_norm = medae / (y_ELM.max() - y_ELM.min())
evs_norm = (evs + 1) / 2  # Ranges from -1 to 1, normalize to 0 to 1
msle_norm = msle / (y_ELM.max() - y_ELM.min())  # Assuming positive target values
mse_norm = mse / ((y_ELM.max() - y_ELM.min()) ** 2)
rmse_norm = rmse / (y_ELM.max() - y_ELM.min())
mape_norm = mape / 100  # Percentage to decimal

# Print error metrics
print("Mean Absolute Error:", mae_norm)
print("R2 Score:", r2_norm)
print("Median Absolute Error:", medae_norm)
print("Explained Variance Score:", evs_norm)
print("Mean Squared Log Error:", msle_norm)
print("Mean Squared Error:", mse_norm)
print("Root Mean Squared Error:", rmse_norm)
print("Mean Absolute Percentage Error:", mape_norm)


Accuracy of a-KNCN: 0.4125114995400184
Mean Absolute Error: 0.00035782988832440627
R2 Score: 0.5010707370734182
Median Absolute Error: 0.00014664929233942965
Explained Variance Score: 0.5010818628134308
Mean Squared Log Error: 7.030317421469163e-05
Mean Squared Error: 3.7922405265357684e-05
Root Mean Squared Error: 0.0061581170227073215
Mean Absolute Percentage Error: 1.473432988635159
