In [14]:
import numpy as np
import pandas as pd
from time import process_time

df = pd.read_csv('../bodyfat.csv')
df.drop(columns="Density", inplace=True)
df.head()

Unnamed: 0,BodyFat,Age,Weight,Height,Neck,Chest,Abdomen,Hip,Thigh,Knee,Ankle,Biceps,Forearm,Wrist
0,12.3,23,154.25,67.75,36.2,93.1,85.2,94.5,59.0,37.3,21.9,32.0,27.4,17.1
1,6.1,22,173.25,72.25,38.5,93.6,83.0,98.7,58.7,37.3,23.4,30.5,28.9,18.2
2,25.3,22,154.0,66.25,34.0,95.8,87.9,99.2,59.6,38.9,24.0,28.8,25.2,16.6
3,10.4,26,184.75,72.25,37.4,101.8,86.4,101.2,60.1,37.3,22.8,32.4,29.4,18.2
4,28.7,24,184.25,71.25,34.4,97.3,100.0,101.9,63.2,42.2,24.0,32.2,27.7,17.7


In [15]:
from sklearn.preprocessing import MaxAbsScaler, StandardScaler

scaler = MaxAbsScaler()

In [16]:
from src.method_1 import SGTM_GRNN
from src.GRNN_ import GRNN
from src.SGTM import GTM

In [17]:
def errors_to_dict(description, mae, rmse, time=None):
    temp = {
        description: {
            "MAE": mae,
            "RMSE": rmse,
            "time(sec)": time
        }
    }
    return pd.DataFrame(temp).T

In [55]:
def print_scores(results):
    mae_test = np.mean([np.mean(-item["test_neg_mean_absolute_error"]) for item in results])
    rmse_test = np.mean([np.mean(-item["test_neg_root_mean_squared_error"]) for item in results])
    mae_train = np.mean([np.mean(-item["train_neg_mean_absolute_error"]) for item in results])
    rmse_train = np.mean([np.mean(-item["train_neg_root_mean_squared_error"]) for item in results])
    print(f"Test scores:\nMAE: {mae_test}\nRMSE: {rmse_test}\n\n"
          f"Train scores:\nMAE: {mae_train}\nRMSE: {rmse_train}\n")

scoring = ["neg_mean_absolute_error", "neg_root_mean_squared_error"]

In [19]:
from sklearn.model_selection import cross_validate


def repeat(func):
    def wrapper(df, n=20, k=5):
        results = []

        start = process_time()
        for _ in range(n):
            # shaffle
            df = df.sample(frac=1)
            y = df.BodyFat
            X = df.drop(columns="BodyFat").to_numpy()

            # scaler = StandardScaler()
            X = scaler.fit_transform(X)

            scores = func(X, y, k)
            results.append(scores)
        return results, process_time() - start
    return wrapper

In [48]:
@repeat
def sgtm_grnn(X, y, cv):
    clf = SGTM_GRNN(sigma=0.08)
    scores = cross_validate(
        clf, X, y, cv=cv,
        scoring=scoring,
        return_train_score=True
    )
    return scores
results, calc_time = sgtm_grnn(df)

In [56]:
print(f"Calculation time: {calc_time}")
print_scores(results)

Calculation time: 3.65625
Test scores:
MAE: 3.315925634690023
RMSE: 4.1469707586934295

Train scores:
MAE: 2.799861322409623
RMSE: 3.5132796037075407



In [22]:
@repeat
def sgtm(X, y, cv):
    clf = GTM(numb_of_steps=X.shape[1])
    scores = cross_validate(
        clf, X, y, cv=cv,
        scoring=scoring,
        return_train_score=True
    )
    return scores


results, calc_time = sgtm(df)

In [23]:
print(f"Calculation time: {calc_time}")
print_scores(results)

Calculation time: 0.859375
Test scores:
MAE: 3.6667719787653814
RMSE: 4.485579518079564

Train scores:
MAE: 3.417824574838513
RMSE: 4.161816312890316



In [24]:
@repeat
def grnn(X, y, cv):
    clf = GRNN(sigma=0.079)
    scores = cross_validate(
        clf, X, y, cv=cv,
        scoring=scoring,
        return_train_score=True
    )
    return scores


results, calc_time = grnn(df)

In [25]:
print(f"Calculation time: {calc_time}")
print_scores(results)

Calculation time: 2.359375
Test scores:
MAE: 4.516603338251232
RMSE: 5.445908509996508

Train scores:
MAE: 3.6413934813949638
RMSE: 4.466171048696301



## Repeat the experiments from the article to be sure

In [26]:
from sklearn.ensemble import RandomForestRegressor

@repeat
def rf(X, y, cv):
    clf = RandomForestRegressor(n_estimators=1000, max_depth=5)
    scores = cross_validate(
        clf, X, y, cv=cv,
        scoring=scoring,
        return_train_score=True
    )
    return scores

results, calc_time = rf(df)

In [27]:
print(f"Calculation time: {calc_time}")
print_scores(results)

Calculation time: 325.90625
Test scores:
MAE: 3.852658333419753
RMSE: 4.695583658361156

Train scores:
MAE: 2.2655436444119372
RMSE: 2.727779874014659



In [28]:
from sklearn.svm import SVR

@repeat
def rf(X, y, cv):
    clf = SVR(C=10.0, epsilon=0.01)
    scores = cross_validate(
        clf, X, y, cv=cv,
        scoring=scoring,
        return_train_score=True
    )
    return scores

results, calc_time = rf(df)

In [29]:
print(f"Calculation time: {calc_time}")
print_scores(results)

Calculation time: 1.21875
Test scores:
MAE: 3.785889211971687
RMSE: 4.629165961135543

Train scores:
MAE: 3.150746095881393
RMSE: 4.095125363362994



In [30]:
from sklearn.neural_network import MLPRegressor

@repeat
def rf(X, y, cv):
    clf = MLPRegressor(500, max_iter=500)
    scores = cross_validate(
        clf, X, y, cv=cv,
        scoring=scoring,
        return_train_score=True
    )
    return scores


results, calc_time = rf(df)

In [31]:
print(f"Calculation time: {calc_time}")
print_scores(results)

Calculation time: 133.890625
Test scores:
MAE: 6.378877638909438
RMSE: 7.7320153248276755

Train scores:
MAE: 6.360134807758287
RMSE: 7.735201713248881

