In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tabulate import tabulate
import warnings
from sklearn_genetic import GASearchCV
from sklearn_genetic.space import Categorical, Integer, Continuous
from sklearn.metrics import make_scorer
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split,  cross_validate
from sklearn.preprocessing import StandardScaler
import time
import sys, os
from sklearn.pipeline import make_pipeline


path = 'datasets/Dataset_2.xlsx'

# Read and load dataset
df= pd.read_excel(path, sheet_name="preproc")





INPUT DATA

In [None]:
X = (df.iloc[:,14:]).values
print(X.shape)
std_scaler = StandardScaler()

OUTPUT DATA

In [None]:
Y = (df.iloc[:,7]).values
print(Y.shape)

In [None]:
**NRMSEPiqr**

In [None]:


def NRMSEPiqr(observed_values, predicted_values):
    # Calculate RMSEP
    rmsep = np.sqrt(np.mean((observed_values - predicted_values) ** 2))
    # Calculate Q1 (25th percentile) and Q3 (75th percentile)
    Q1 = np.percentile(observed_values, 25)
    Q3 = np.percentile(observed_values, 75)

    # Calculate IQR
    IQR = Q3 - Q1

    return rmsep/IQR

NRMSEPiqrscorer = make_scorer(NRMSEPiqr,greater_is_better=False)


SVR

In [None]:

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" 

#number of trials
trials = 100
np.random.seed(42)
seeds = np.random.randint(trials, size=trials)

ripartisions = [0.25,0.5,0.75]
results = []  

# create 3x1 subfigs
fig, axs = plt.subplots(3, 1,figsize=(20, 25),gridspec_kw={'hspace': 0.5})
fig.suptitle('SVR NITOROGEN NRMSEPiqr test' + "\n")
axs = axs.flatten()
k = 0

for ripartition in ripartisions:

  
        NRMSEP = []

        best_NRMSEP = 100000000
        best_data_1 = {"Y_test": None, "Y_pred": None, "seed": None,"alpha":None,"r":None}

        run_name = str(ripartition) + " ripartition"
        print(run_name)
 
        for i in range(trials):
            print("Trial:",i+1)
            #Ripartition in training and test and
            X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=ripartition, random_state=seeds[i])
            X_train= std_scaler.fit_transform(X_train)
            X_test= std_scaler.transform(X_test)
            param_grid = {'C': Continuous(1/100000,100)}
            elastic_reg_cd = SVR()
    
            evolved_estimator = GASearchCV(estimator=elastic_reg_cd,
                              cv=5,
                              scoring=NRMSEPiqrscorer,
                              param_grid=param_grid,
                              n_jobs=-1,
                              verbose=False,
                              population_size=10,
                              generations=10,
                              criteria = "min")
    
            start_time = time.time()
            evolved_estimator.fit(X_train, Y_train)
            elapsed_time = (time.time() -start_time) #elapsed time in seconds
    
            # Make predictions using the testing set
            Y_pred_elastic_cd =  evolved_estimator.best_estimator_.predict(X_test)
    
            NRMSEP.append(NRMSEPiqr(Y_test, Y_pred_elastic_cd))
    
        
            if NRMSEP[-1] < best_NRMSEP:
              best_NRMSEP = NRMSEP[-1]
              best_data_1["Y_test"] = Y_test
              best_data_1["Y_pred"] = Y_pred_elastic_cd
              best_data_1["C"] = float(evolved_estimator.best_params_["C"])
              best_data_1["seed"] = seeds[i]
              best_data_1["model"] = evolved_estimator.best_estimator_
              best_data_1["time"] = elapsed_time

    
    
       
        results.append([str(ripartition *100 ), str(best_NRMSEP),
                        (np.mean(NRMSEP)), str(np.var(NRMSEP)),
                        str(best_data_1["C"]),str(best_data_1["seed"]),best_data_1["time"]])

        '''PLOT THE BEST MODELS'''
        axs[k].scatter(best_data_1["Y_test"], best_data_1["Y_pred"], c='blue', label='Actual vs Predicted')
        axs[k].plot([min(best_data_1["Y_test"]), max(best_data_1["Y_test"])],
                    [min(best_data_1["Y_test"]), max(best_data_1["Y_test"])], '--', c='red', label='Perfect Prediction')
        axs[k].legend()
        axs[k].set_xlabel("Actual values")
        axs[k].set_ylabel("Predicted value")
        axs[k].set_title( f"Test size(%): {ripartition * 100}" "\n" + "BEST NRMSEPiqr: " + results[k][1] )
        k += 1


'''SHOW RESULTS'''

headers = ["Test size(%)", "Best(NRMSEPiqr)", "Mean(NRMSEPiqr)", "Var(NRMSEPiqr)","C","Seed","Time(sec)"]
print(tabulate(results, headers,  tablefmt="double_outline"))
