In [1]:
'''
    Exploring different combinations of features and applying various kernal Support Vector Machine Regression (SVR) on each fitted ellipse 
    parameter subset.
'''

'\n    Exploring different combinations of features and applying various kernal Support Vector Machine Regression (SVR) on each fitted ellipse \n    parameter subset.\n'

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.svm import SVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ExpSineSquared, RationalQuadratic, Matern, RBF

In [4]:
df = pd.read_csv('../Data/fitted_ellipse_parameter_and_true_volumn_mapper.csv')  

In [5]:
columns = ['apple_label', 'semi_major_axis', 'semi_minor_axis', 'xc', 'yc', 'theta', 'area', 'perimeter', 'eccentricity', 'volume']

column_dict = {
    0: 'semi_major_axis',
    1: 'semi_minor_axis',
    2: 'area',
    3: 'perimeter',
    4: 'eccentricity'
}

seeds = [0]

In [6]:
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [7]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ------------------------------------------- Linear Kernal ---------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [None]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label', 'xc', 'yc', 'theta']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        svr_model = SVR(kernel='linear')
        svr_model.fit(X_train, y_train)
        y_pred = svr_model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")



**********************************************************
With features : ['semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 27623.276
Mean Squared Error (MSE): 763045354.651
Mean Percentage Error (MPE): 15431.507



**********************************************************
With features : ['semi_minor_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 52047.411
Mean Squared Error (MSE): 2708932977.359
Mean Percentage Error (MPE): 23705.104



**********************************************************
With features : ['semi_major_axis', 'area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 35159.52
Mean Squared Error (MSE): 1236191860.87
Mean Percentage Error (MPE): 16644.913



**********************************************************
With features : ['area', 'perimeter', 'eccentricity']
Root Mean Squared Error (RMSE): 26999.775
Mean Squared Error (MSE): 728987838.672
Mean Percentage 

In [None]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ------------------------------------------- Guassian Kernal -------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [None]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label', 'xc', 'yc', 'theta']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        svr_model = SVR(kernel='rbf')
        svr_model.fit(X_train, y_train)
        y_pred = svr_model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")

In [None]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ------------------------------------------- Polynomial Kernal -----------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [None]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label', 'xc', 'yc', 'theta']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        svr_model = SVR(kernel='poly', degree=3)
        svr_model.fit(X_train, y_train)
        y_pred = svr_model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")

In [None]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ------------------------------------------- Exponential Kernal ----------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [None]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        kernel = ExpSineSquared(length_scale=1.0, periodicity=1.0)
        
        alpha = 1e-6  
        while True:
            try:
                gpr = GaussianProcessRegressor(kernel=kernel, alpha=alpha, random_state=0)
                gpr.fit(X_train, y_train)
                break  
            except Exception as e:
                alpha *= 10 

        y_pred, sigma = gpr.predict(X_test, return_std=True)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")

In [None]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------- Rational Quadratic Kernal --------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [None]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        kernel = RationalQuadratic(length_scale=1.0, alpha=1.0)

        gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)
        gpr.fit(X_train, y_train)
        y_pred, sigma = gpr.predict(X_test, return_std=True)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")

In [None]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# --------------------------------------------- Maxtern Kernal ------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [None]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        kernel = Matern(length_scale=1.0, nu=2.5)

        gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)
        gpr.fit(X_train, y_train)
        y_pred, sigma = gpr.predict(X_test, return_std=True)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")

In [None]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ---------------------- Gaussian Process Regression Squared exponential kernal -------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [None]:
for i in range (0, 31):
    
    i_temp = i
    drop_columns = ['volume', 'apple_label']
    for j in range(5):
        if i_temp % 2 == 1:
            drop_columns.append(column_dict[j])
        i_temp //= 2

    print('\n')
    X = df.drop(columns = drop_columns).values.tolist() 
    y = df['volume'].values.tolist()  

    rmses = np.empty(0)
    mses = np.empty(0)
    mpes = np.empty(0)
    
    for i in range(len(seeds)):
    
        seed = seeds[i]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

        kernel = RBF(length_scale=1.0)

        gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)
        gpr.fit(X_train, y_train)
        y_pred, sigma = gpr.predict(X_test, return_std=True)

        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mse = mean_squared_error(y_test, y_pred)
        mpe = mean_absolute_percentage_error(y_test, y_pred)

        np.append(rmses, rmse)
        np.append(mses, mse)
        np.append(mpes, mpe)

    drop_column_set = set(drop_columns)
    left_columns = [s for s in columns if s not in drop_column_set]

    print('**********************************************************')
    print("With features :", left_columns)
    print("Root Mean Squared Error (RMSE):", round(np.mean(rmse), 3))
    print("Mean Squared Error (MSE):", round(np.mean(mse), 3))
    print("Mean Percentage Error (MPE):", round(np.mean(mpe), 3))
    print("")