In [1]:
"""
    Exploring different combinations of features and applying various kernal Support Vector Machine Regression (SVR) on each average fitted ellipse 
    parameter subset.
    Here data is standardised before fitting SVMR.
    Robust Scaling is used here.
"""

'\n    Exploring different combinations of features and applying various kernal Support Vector Machine Regression (SVR) on each average fitted ellipse \n    parameter subset.\n    Here data is standardised before fitting SVMR.\n'

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ExpSineSquared, RationalQuadratic, Matern, RBF

In [4]:
df = pd.read_csv('../Data/avg_fitted_ellipse_parameter_and_true_volume.csv')  

In [5]:
columns = ['apple_label', 'semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity', 'volume']

column_dict = {
    0: 'semi_major_axis',
    1: 'semi_minor_axis',
    2: 'area',
    3: 'perimeter',
    4: 'eccentricity'
}

seeds = [0, 10, 20, 30, 40]

In [6]:
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [7]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ------------------------------------------- Linear Kernal ---------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [8]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
        
        scaler = RobustScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        
        svr_model = SVR(kernel='linear')
        svr_model.fit(X_train, y_train)
        y_pred = svr_model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 16.922
Mean Squared Error (MSE): 286.342
Mean Percentage Error (MPE): 8.72



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 16.795
Mean Squared Error (MSE): 282.059
Mean Percentage Error (MPE): 8.597



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 17.067
Mean Squared Error (MSE): 291.268
Mean Percentage Error (MPE): 8.875



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 16.933
Mean Squared Error (MSE): 286.736
Mean Percentage Error (MPE): 8.754



***************************

In [9]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ------------------------------------------- Guassian Kernal -------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [10]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        scaler = RobustScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        
        svr_model = SVR(kernel='rbf')
        svr_model.fit(X_train, y_train)
        y_pred = svr_model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 25.142
Mean Squared Error (MSE): 632.114
Mean Percentage Error (MPE): 13.437



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 25.17
Mean Squared Error (MSE): 633.52
Mean Percentage Error (MPE): 13.425



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 25.242
Mean Squared Error (MSE): 637.152
Mean Percentage Error (MPE): 13.543



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 25.307
Mean Squared Error (MSE): 640.445
Mean Percentage Error (MPE): 13.597



************************

In [11]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ------------------------------------------- Polynomial Kernal -----------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [12]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        scaler = RobustScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        
        svr_model = SVR(kernel='poly', degree=3)
        svr_model.fit(X_train, y_train)
        y_pred = svr_model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 21.814
Mean Squared Error (MSE): 475.855
Mean Percentage Error (MPE): 11.445



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 21.694
Mean Squared Error (MSE): 470.641
Mean Percentage Error (MPE): 11.31



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 21.863
Mean Squared Error (MSE): 477.984
Mean Percentage Error (MPE): 11.426



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 21.561
Mean Squared Error (MSE): 464.86
Mean Percentage Error (MPE): 11.057



************************

In [13]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ------------------------------------------- Exponential Kernal ----------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [14]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        scaler = RobustScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        

        kernel = ExpSineSquared(length_scale=1.0, periodicity=1.0)
        
        alpha = 1e-6  
        while True:
            try:
                gpr = GaussianProcessRegressor(kernel=kernel, alpha=alpha, random_state=0)
                gpr.fit(X_train, y_train)
                break  
            except Exception as e:
                alpha *= 10 

        y_pred, sigma = gpr.predict(X_test, return_std=True)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 70.739
Mean Squared Error (MSE): 5004.008
Mean Percentage Error (MPE): 37.764



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 35.239
Mean Squared Error (MSE): 1241.784
Mean Percentage Error (MPE): 17.905



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 35.239
Mean Squared Error (MSE): 1241.784
Mean Percentage Error (MPE): 17.905



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 35.239
Mean Squared Error (MSE): 1241.784
Mean Percentage Error (MPE): 17.905



******************

In [15]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------- Rational Quadratic Kernal --------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [16]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        scaler = RobustScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

        kernel = RationalQuadratic(length_scale=1.0, alpha=1.0)

        gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)
        gpr.fit(X_train, y_train)
        y_pred, sigma = gpr.predict(X_test, return_std=True)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 18.231
Mean Squared Error (MSE): 332.375
Mean Percentage Error (MPE): 9.497



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 17.994
Mean Squared Error (MSE): 323.785
Mean Percentage Error (MPE): 9.37



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 18.462
Mean Squared Error (MSE): 340.861
Mean Percentage Error (MPE): 9.632



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 18.196
Mean Squared Error (MSE): 331.103
Mean Percentage Error (MPE): 9.484



***************************

In [17]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# --------------------------------------------- Maxtern Kernal ------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [18]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        scaler = RobustScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

        kernel = Matern(length_scale=1.0, nu=2.5)

        gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)
        gpr.fit(X_train, y_train)
        y_pred, sigma = gpr.predict(X_test, return_std=True)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**************

In [19]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ---------------------- Gaussian Process Regression Squared exponential kernal -------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [20]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        scaler = RobustScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

        kernel = RBF(length_scale=1.0)

        gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)
        gpr.fit(X_train, y_train)
        y_pred, sigma = gpr.predict(X_test, return_std=True)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**************