In [63]:
"""
    Comparing SVMR performance across different kernel functions.
"""

'\n    Comparing SVMR performance across different kernel functions.\n'

In [64]:
import warnings
warnings.filterwarnings("ignore")

In [65]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.svm import SVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ExpSineSquared, RationalQuadratic, Matern, RBF
import matplotlib.pyplot as plt

In [66]:
df = pd.read_csv('../Data/avg_fitted_ellipse_parameter_and_true_volume.csv')  

In [67]:
columns = ['apple_label', 'semi_major_axis', 'semi_minor_axis', 'area', 'perimeter', 'eccentricity', 'volume']

column_dict = {
    0: 'semi_major_axis',
    1: 'semi_minor_axis',
    2: 'area',
    3: 'perimeter',
    4: 'eccentricity'
}

seeds = [0, 10, 20, 30, 40]

In [68]:
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [69]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ------------------------------------------- Linear Kernal ---------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

In [70]:
kernals = ['Linear', 'Guassian', 'Polynomial', 'Exponential', 'Rational Quadratic', 'Maxtern', 'Sq. Exponential'] 
rmses_per_kernal = []
mpes_per_kernal = []

In [71]:
X = df.drop(columns = ['apple_label', 'area', 'volume']).values.tolist() 
y = df['volume'].values.tolist()  

rmses = np.empty(0)
mpes = np.empty(0)

for i in range(len(seeds)):

    seed = seeds[i]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

    svr_model = SVR(kernel='linear')
    svr_model.fit(X_train, y_train)
    y_pred = svr_model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mpe = mean_absolute_percentage_error(y_test, y_pred)

    rmses = np.append(rmses, rmse)
    mpes = np.append(mpes, mpe)

rmses_per_kernal.append(round(np.mean(rmses), 3))
mpes_per_kernal.append(round(np.mean(mpes), 3))

In [72]:
X = df.drop(columns = ['apple_label', 'area', 'volume']).values.tolist() 
y = df['volume'].values.tolist()  

rmses = np.empty(0)
mpes = np.empty(0)

for i in range(len(seeds)):

    seed = seeds[i]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

    svr_model = SVR(kernel='rbf')
    svr_model.fit(X_train, y_train)
    y_pred = svr_model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mpe = mean_absolute_percentage_error(y_test, y_pred)

    rmses = np.append(rmses, rmse)
    mpes = np.append(mpes, mpe)

rmses_per_kernal.append(round(np.mean(rmses), 3))
mpes_per_kernal.append(round(np.mean(mpes), 3))

In [73]:
X = df.drop(columns = ['apple_label', 'area', 'volume']).values.tolist() 
y = df['volume'].values.tolist()  

rmses = np.empty(0)
mpes = np.empty(0)

for i in range(len(seeds)):

    seed = seeds[i]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

    svr_model = SVR(kernel='poly', degree=3)
    svr_model.fit(X_train, y_train)
    y_pred = svr_model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mpe = mean_absolute_percentage_error(y_test, y_pred)

    rmses = np.append(rmses, rmse)
    mpes = np.append(mpes, mpe)

rmses_per_kernal.append(round(np.mean(rmses), 3))
mpes_per_kernal.append(round(np.mean(mpes), 3))

In [None]:
X = df.drop(columns = ['apple_label', 'area', 'volume']).values.tolist() 
y = df['volume'].values.tolist()  

rmses = np.empty(0)
mpes = np.empty(0)

for i in range(len(seeds)):

    seed = seeds[i]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

    kernel = ExpSineSquared(length_scale=1.0, periodicity=1.0)
        
    alpha = 1e-6  
    while True:
        try:
            gpr = GaussianProcessRegressor(kernel=kernel, alpha=alpha, random_state=0)
            gpr.fit(X_train, y_train)
            break  
        except Exception as e:
            alpha *= 10 

    y_pred, sigma = gpr.predict(X_test, return_std=True)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mpe = mean_absolute_percentage_error(y_test, y_pred)

    rmses = np.append(rmses, rmse)
    mpes = np.append(mpes, mpe)

rmses_per_kernal.append(round(np.mean(rmses), 3))
mpes_per_kernal.append(round(np.mean(mpes), 3))

In [None]:
X = df.drop(columns = ['apple_label', 'area', 'volume']).values.tolist() 
y = df['volume'].values.tolist()  

rmses = np.empty(0)
mpes = np.empty(0)

for i in range(len(seeds)):

    seed = seeds[i]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

    kernel = RationalQuadratic(length_scale=1.0, alpha=1.0)

    gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)
    gpr.fit(X_train, y_train)
    y_pred, sigma = gpr.predict(X_test, return_std=True)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mpe = mean_absolute_percentage_error(y_test, y_pred)

    rmses = np.append(rmses, rmse)
    mpes = np.append(mpes, mpe)

rmses_per_kernal.append(round(np.mean(rmses), 3))
mpes_per_kernal.append(round(np.mean(mpe), 3))

In [None]:
X = df.drop(columns = ['apple_label', 'area', 'volume']).values.tolist() 
y = df['volume'].values.tolist()  

rmses = np.empty(0)
mpes = np.empty(0)

for i in range(len(seeds)):

    seed = seeds[i]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

    kernel = Matern(length_scale=1.0, nu=2.5)

    gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)
    gpr.fit(X_train, y_train)
    y_pred, sigma = gpr.predict(X_test, return_std=True)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mpe = mean_absolute_percentage_error(y_test, y_pred)

    rmses = np.append(rmses, rmse)
    mpes = np.append(mpes, mpe)

rmses_per_kernal.append(round(np.mean(rmses), 3))
mpes_per_kernal.append(round(np.mean(mpes), 3))

In [None]:
X = df.drop(columns = ['apple_label', 'area', 'volume']).values.tolist() 
y = df['volume'].values.tolist()  

rmses = np.empty(0)
mpes = np.empty(0)

for i in range(len(seeds)):

    seed = seeds[i]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

    kernel = RBF(length_scale=1.0)

    gpr = GaussianProcessRegressor(kernel=kernel, random_state=0)
    gpr.fit(X_train, y_train)
    y_pred, sigma = gpr.predict(X_test, return_std=True)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mpe = mean_absolute_percentage_error(y_test, y_pred)

    rmses = np.append(rmses, rmse)
    mpes = np.append(mpes, mpe)

rmses_per_kernal.append(round(np.mean(rmses), 3))
mpes_per_kernal.append(round(np.mean(mpes), 3))

In [None]:
def plot_data(list1, list2, y_label, x_label, title):
    plt.bar(list1, list2, color='green', width=0.5)
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.title(title)
    plt.xticks(rotation=45)
    plt.show()

In [None]:
print(rmses_per_kernal)
plot_data(kernals, rmses_per_kernal, 'RMSE', 'Kernal', 'RMSE vs Kernal')

In [None]:
plot_data(kernals, mpes_per_kernal, 'MPE', 'Kernal', 'MPE vs Kernal')