In [None]:
# Exploring different combinations of features and applying linear Support Vector Machine Regression (SVR) on each subset.

In [130]:
import warnings
warnings.filterwarnings("ignore")

In [152]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.svm import SVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ExpSineSquared, RationalQuadratic, Matern, RBF

In [132]:
df = pd.read_csv('../Data/avg_fitted_ellipse_parameter_and_true_volume.csv')  

In [136]:
columns = ['apple_label', 'semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity', 'volume']

column_dict = {
    0: 'semi_major_axis',
    1: 'semi_minor_axis',
    2: 'area',
    3: 'perimeter',
    4: 'eccentricity'
}

seeds = [0, 10, 20, 30, 40]

In [134]:
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [None]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ------------------------------------------- Linear Kernal ---------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [135]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        svr_model = SVR(kernel='linear')
        svr_model.fit(X_train, y_train)
        y_pred = svr_model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 25826.161
Mean Squared Error (MSE): 666990599.563
Mean Percentage Error (MPE): 13190.896



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 32466.701
Mean Squared Error (MSE): 1054086645.882
Mean Percentage Error (MPE): 20735.843



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 14238.564
Mean Squared Error (MSE): 202736712.239
Mean Percentage Error (MPE): 8120.05



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 34548.451
Mean Squared Error (MSE): 1193595447.375
Mean Percentage 

In [None]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ------------------------------------------- Guassian Kernal -------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [138]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        svr_model = SVR(kernel='rbf')
        svr_model.fit(X_train, y_train)
        y_pred = svr_model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 27.819
Mean Squared Error (MSE): 773.909
Mean Percentage Error (MPE): 15.026



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 27.566
Mean Squared Error (MSE): 759.907
Mean Percentage Error (MPE): 14.894



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 27.566
Mean Squared Error (MSE): 759.899
Mean Percentage Error (MPE): 14.894



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 27.087
Mean Squared Error (MSE): 733.71
Mean Percentage Error (MPE): 14.638



***********************

In [None]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ------------------------------------------- Polynomial Kernal -----------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [140]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        svr_model = SVR(kernel='poly', degree=3)
        svr_model.fit(X_train, y_train)
        y_pred = svr_model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 17.994
Mean Squared Error (MSE): 323.791
Mean Percentage Error (MPE): 10.178



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 17.707
Mean Squared Error (MSE): 313.536
Mean Percentage Error (MPE): 9.863



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 17.707
Mean Squared Error (MSE): 313.521
Mean Percentage Error (MPE): 9.862



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 17.474
Mean Squared Error (MSE): 305.355
Mean Percentage Error (MPE): 9.518



*************************

In [None]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ------------------------------------------- Exponential Kernal ----------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [145]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        kernel = ExpSineSquared(length_scale=1.0, periodicity=1.0)
        
        alpha = 1e-6  
        while True:
            try:
                gpr = GaussianProcessRegressor(kernel=kernel, alpha=alpha, random_state=0)
                gpr.fit(X_train, y_train)
                break  
            except Exception as e:
                alpha *= 10 

        y_pred, sigma = gpr.predict(X_test, return_std=True)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 40.751
Mean Squared Error (MSE): 1660.66
Mean Percentage Error (MPE): 19.512



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 40.225
Mean Squared Error (MSE): 1618.043
Mean Percentage Error (MPE): 19.491



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 41.23
Mean Squared Error (MSE): 1699.876
Mean Percentage Error (MPE): 20.917



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 33.143
Mean Squared Error (MSE): 1098.465
Mean Percentage Error (MPE): 17.057



********************

In [None]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------- Rational Quadratic Kernal --------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [148]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        kernel = RationalQuadratic(length_scale=1.0, alpha=1.0)

        gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)
        gpr.fit(X_train, y_train)
        y_pred, sigma = gpr.predict(X_test, return_std=True)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 18.721
Mean Squared Error (MSE): 350.469
Mean Percentage Error (MPE): 9.623



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 18.736
Mean Squared Error (MSE): 351.031
Mean Percentage Error (MPE): 9.632



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 18.732
Mean Squared Error (MSE): 350.901
Mean Percentage Error (MPE): 9.63



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 18.758
Mean Squared Error (MSE): 351.849
Mean Percentage Error (MPE): 9.646



***************************

In [None]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# --------------------------------------------- Maxtern Kernal ------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [150]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        kernel = Matern(length_scale=1.0, nu=2.5)

        gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)
        gpr.fit(X_train, y_train)
        y_pred, sigma = gpr.predict(X_test, return_std=True)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**************

In [None]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ---------------------- Gaussian Process Regression Squared exponential kernal -------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [153]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        kernel = RBF(length_scale=1.0)

        gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)
        gpr.fit(X_train, y_train)
        y_pred, sigma = gpr.predict(X_test, return_std=True)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 158.747
Mean Squared Error (MSE): 25200.625
Mean Percentage Error (MPE): 100.0



**************