In [2]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import StratifiedKFold, KFold

from graspy.simulations import sbm
from graspy.embed import MultipleASE, OmnibusEmbed
from graspy.plot import heatmap, pairplot
from graspy.utils import symmetrize

from rerf.rerfClassifier import rerfClassifier

from tqdm import tqdm_notebook as tqdm

%matplotlib inline

In [3]:
#Load data and wrangle it
file = np.load('../data/raw/COBRE.npz')

X = file['X']
y = file['y'].astype(int)

y[y == -1] = 0

idx = np.triu_indices(263, k=1)

X_graphs = np.zeros((124, 263, 263))

for i, x in enumerate(X):
    X_graphs[i][idx] = x
    X_graphs[i] = symmetrize(X_graphs[i], 'triu')

In [5]:
def run_grerf(XTRAIN, YTRAIN, XTEST, YTEST):
    XTRAIN_samples = XTRAIN.shape[0]
    XTEST_samples = XTEST.shape[0]
    
    img_height = XTRAIN.shape[1]
    
    XTRAIN = XTRAIN.copy().reshape(XTRAIN_samples, -1)
    XTEST = XTEST.copy().reshape(XTEST_samples, -1)

    cls = rerfClassifier(
        projection_matrix="Graph-RerF",
        max_features=img_height**2,
        n_jobs=94,
        n_estimators=1000,
        oob_score=False,
        random_state=None,
        image_height=img_height,
        image_width=img_height,
        patch_height_max=48,
        patch_height_min=16,
        patch_width_max=48,
        patch_width_min=16
    )
    
    cls.fit(XTRAIN, YTRAIN)
    
    preds = np.array(cls.predict(XTEST))
    error = np.mean(preds != YTEST)
    
    return error


def run_classification(train_idx, test_idx, X, y):
    XTRAIN = X[train_idx]
    YTRAIN = y[train_idx]
    XTEST = X[test_idx]
    YTEST = y[test_idx]

    data = [XTRAIN, YTRAIN, XTEST, YTEST]

    srerf_error = run_srerf(*data)
    rerf_error = run_rerf(*data)
    mase_error = run_mase(*data)

    return srerf_error, rerf_error, mase_error

def run_classification2(train_idx, test_idx, X, y):
    XTRAIN = X[train_idx]
    YTRAIN = y[train_idx]
    XTEST = X[test_idx]
    YTEST = y[test_idx]

    data = [XTRAIN, YTRAIN, XTEST, YTEST]

    grerf_error = run_grerf(*data)

    return grerf_error

In [6]:
kfold = KFold(n_splits=10)

errors = []
for train_idx, test_idx in kfold.split(X=X_graphs, y=y):
    errors.append(run_classification2(train_idx, test_idx, X_graphs, y))

In [7]:
errors

[0.38461538461538464,
 0.5384615384615384,
 0.3076923076923077,
 0.38461538461538464,
 0.16666666666666666,
 0.5833333333333334,
 0.5,
 0.4166666666666667,
 0.25,
 0.3333333333333333]

In [8]:
np.array(errors).mean()

0.38653846153846155