In [21]:
"""
    Exploring different combinations of features and applying various kernal Support Vector Machine Regression (SVR) on each average fitted ellipse 
    parameter subset.
    Here data is standardised before fitting SVMR. 
    Z - Normalisation is used. 
"""

'\n    Exploring different combinations of features and applying various kernal Support Vector Machine Regression (SVR) on each average fitted ellipse \n    parameter subset.\n'

In [22]:
import warnings
warnings.filterwarnings("ignore")

In [23]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ExpSineSquared, RationalQuadratic, Matern, RBF

In [24]:
df = pd.read_csv('../Data/avg_fitted_ellipse_parameter_and_true_volume.csv')  

In [25]:
columns = ['apple_label', 'semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity', 'volume']

column_dict = {
    0: 'semi_major_axis',
    1: 'semi_minor_axis',
    2: 'area',
    3: 'perimeter',
    4: 'eccentricity'
}

seeds = [0, 10, 20, 30, 40]

In [26]:
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [27]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ------------------------------------------- Linear Kernal ---------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [28]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
        scaler = StandardScaler()

        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        
        svr_model = SVR(kernel='linear')
        svr_model.fit(X_train, y_train)
        y_pred = svr_model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 16.947
Mean Squared Error (MSE): 287.21
Mean Percentage Error (MPE): 8.737



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 16.925
Mean Squared Error (MSE): 286.442
Mean Percentage Error (MPE): 8.7



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 16.973
Mean Squared Error (MSE): 288.068
Mean Percentage Error (MPE): 8.744



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 16.88
Mean Squared Error (MSE): 284.95
Mean Percentage Error (MPE): 8.684



*******************************

In [29]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ------------------------------------------- Guassian Kernal -------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [30]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        
        svr_model = SVR(kernel='rbf')
        svr_model.fit(X_train, y_train)
        y_pred = svr_model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 25.167
Mean Squared Error (MSE): 633.39
Mean Percentage Error (MPE): 13.457



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 25.196
Mean Squared Error (MSE): 634.819
Mean Percentage Error (MPE): 13.461



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 25.294
Mean Squared Error (MSE): 639.81
Mean Percentage Error (MPE): 13.599



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 25.347
Mean Squared Error (MSE): 642.458
Mean Percentage Error (MPE): 13.633



************************

In [31]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ------------------------------------------- Polynomial Kernal -----------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [32]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        
        svr_model = SVR(kernel='poly', degree=3)
        svr_model.fit(X_train, y_train)
        y_pred = svr_model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 21.294
Mean Squared Error (MSE): 453.44
Mean Percentage Error (MPE): 11.239



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 21.12
Mean Squared Error (MSE): 446.039
Mean Percentage Error (MPE): 11.023



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 21.241
Mean Squared Error (MSE): 451.19
Mean Percentage Error (MPE): 11.095



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 21.026
Mean Squared Error (MSE): 442.097
Mean Percentage Error (MPE): 10.859



*************************

In [33]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ------------------------------------------- Exponential Kernal ----------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [34]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        

        kernel = ExpSineSquared(length_scale=1.0, periodicity=1.0)
        
        alpha = 1e-6  
        while True:
            try:
                gpr = GaussianProcessRegressor(kernel=kernel, alpha=alpha, random_state=0)
                gpr.fit(X_train, y_train)
                break  
            except Exception as e:
                alpha *= 10 

        y_pred, sigma = gpr.predict(X_test, return_std=True)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 35.239
Mean Squared Error (MSE): 1241.784
Mean Percentage Error (MPE): 17.905



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 35.239
Mean Squared Error (MSE): 1241.784
Mean Percentage Error (MPE): 17.905



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 35.239
Mean Squared Error (MSE): 1241.784
Mean Percentage Error (MPE): 17.905



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 35.239
Mean Squared Error (MSE): 1241.784
Mean Percentage Error (MPE): 17.905



******************

In [35]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------- Rational Quadratic Kernal --------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [36]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

        kernel = RationalQuadratic(length_scale=1.0, alpha=1.0)

        gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)
        gpr.fit(X_train, y_train)
        y_pred, sigma = gpr.predict(X_test, return_std=True)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 18.188
Mean Squared Error (MSE): 330.792
Mean Percentage Error (MPE): 9.479



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 17.97
Mean Squared Error (MSE): 322.906
Mean Percentage Error (MPE): 9.367



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 18.425
Mean Squared Error (MSE): 339.49
Mean Percentage Error (MPE): 9.622



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 18.181
Mean Squared Error (MSE): 330.538
Mean Percentage Error (MPE): 9.484



****************************

In [37]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# --------------------------------------------- Maxtern Kernal ------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [38]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

        kernel = Matern(length_scale=1.0, nu=2.5)

        gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)
        gpr.fit(X_train, y_train)
        y_pred, sigma = gpr.predict(X_test, return_std=True)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**************

In [39]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ---------------------- Gaussian Process Regression Squared exponential kernal -------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [40]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

        kernel = RBF(length_scale=1.0)

        gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)
        gpr.fit(X_train, y_train)
        y_pred, sigma = gpr.predict(X_test, return_std=True)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**************