In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import StratifiedKFold, KFold

from graspy.simulations import sbm
from graspy.embed import MultipleASE, OmnibusEmbed
from graspy.plot import heatmap, pairplot
from graspy.utils import symmetrize, pass_to_ranks

from rerf.rerfClassifier import rerfClassifier

from tqdm import tqdm_notebook as tqdm

%matplotlib inline

In [2]:
#Load data and wrangle it
file = np.load('../data/raw/COBRE.npz')

X = file['X']
y = file['y'].astype(int)

y[y == -1] = 0

idx = np.triu_indices(263, k=1)

X_graphs = np.zeros((124, 263, 263))

for i, x in enumerate(X):
    X_graphs[i][idx] = x
    X_graphs[i] = symmetrize(X_graphs[i], 'triu')
    
X_graphs = X_graphs - X_graphs.min(axis=(1, 2)).reshape(-1, 1, 1)

for i, x in enumerate(X_graphs):
    X_graphs[i] = pass_to_ranks(X_graphs[i])

In [13]:
def run_srerf(XTRAIN, YTRAIN, XTEST, YTEST):
    XTRAIN_samples = XTRAIN.shape[0]
    XTEST_samples = XTEST.shape[0]
    
    img_height = XTRAIN.shape[1]
    
    XTRAIN = XTRAIN.copy().reshape(XTRAIN_samples, -1)
    XTEST = XTEST.copy().reshape(XTEST_samples, -1)

    cls = rerfClassifier(
        projection_matrix="S-RerF",
        max_features=img_height,
        n_jobs=90,
        n_estimators=1000000,
        oob_score=False,
        random_state=None,
        image_height=img_height,
        image_width=img_height,
        patch_height_max=16,
        patch_height_min=2,
        patch_width_max=16,
        patch_width_min=2
    )
    
    cls.fit(XTRAIN, YTRAIN)
    
    preds = np.array(cls.predict(XTEST))
    error = np.mean(preds != YTEST)
    
    return error

def run_srerf_permuted(XTRAIN, YTRAIN, XTEST, YTEST):
    idx = np.arange(XTRAIN.shape[1])
    np.random.shuffle(idx)
    
    img_height = XTRAIN.shape[1]
    XTRAIN_samples = XTRAIN.shape[0]
    XTEST_samples = XTEST.shape[0]
    
    
    XTRAIN = np.array([x[np.ix_(idx, idx)] for x in XTRAIN])
    XTEST = np.array([x[np.ix_(idx, idx)] for x in XTEST])

    XTRAIN = XTRAIN.copy().reshape(XTRAIN_samples, -1)
    XTEST = XTEST.copy().reshape(XTEST_samples, -1)

    cls = rerfClassifier(
        projection_matrix="S-RerF",
        max_features=img_height,
        n_jobs=90,
        n_estimators=1000000,
        oob_score=False,
        random_state=None,
        image_height=img_height,
        image_width=img_height,
        patch_height_max=16,
        patch_height_min=2,
        patch_width_max=16,
        patch_width_min=2
    )
    
    cls.fit(XTRAIN, YTRAIN)
    
    preds = np.array(cls.predict(XTEST))
    error = np.mean(preds != YTEST)
    
    return error

def run_rerf(XTRAIN, YTRAIN, XTEST, YTEST):
    n_features = XTRAIN.shape[1]
    idx = np.triu_indices(n_features, k=1)
    
    XTRAIN = np.array([x[idx] for x in XTRAIN])
    XTEST = np.array([x[idx] for x in XTEST])

    cls = rerfClassifier(
        projection_matrix="RerF",
        n_jobs=90,
        max_features=n_features,
        feature_combinations=2,
        n_estimators=1000000,
        oob_score=False,
        random_state=None,
    )
    
    cls.fit(XTRAIN, YTRAIN)
    
    preds = np.array(cls.predict(XTEST))
    error = np.mean(preds != YTEST)
    
    return error

def run_mase(XTRAIN, YTRAIN, XTEST, YTEST):
    train_samples = XTRAIN.shape[0]
    test_samples = XTEST.shape[0]
    n_samples = train_samples + test_samples

    
    X = np.vstack([XTRAIN, XTEST])
    
    mase = MultipleASE(n_components=2, scaled=True)
    mase.fit(X)
    
    rhats = mase.scores_.reshape(n_samples, -1)
    
    knn = KNeighborsClassifier(n_neighbors=1, metric='euclidean')
    knn.fit(rhats[:train_samples], YTRAIN)
    
    preds = knn.predict(rhats[train_samples:])
    error = np.mean(preds != YTEST)
    
    return np.mean(error)


def run_grerf(XTRAIN, YTRAIN, XTEST, YTEST):
    XTRAIN_samples = XTRAIN.shape[0]
    XTEST_samples = XTEST.shape[0]
    
    img_height = XTRAIN.shape[1]
    
    XTRAIN = XTRAIN.copy().reshape(XTRAIN_samples, -1)
    XTEST = XTEST.copy().reshape(XTEST_samples, -1)

    cls = rerfClassifier(
        projection_matrix="Graph-Node-RerF",
        max_features=img_height,
        n_jobs=90,
        n_estimators=1000000,
        oob_score=False,
        random_state=None,
        image_height=img_height,
        image_width=img_height,
        patch_height_max=16,
        patch_height_min=2,
        patch_width_max=16,
        patch_width_min=2
    )
    
    cls.fit(XTRAIN, YTRAIN)
    
    preds = np.array(cls.predict(XTEST))
    error = np.mean(preds != YTEST)
    
    return error

def run_grerf2(XTRAIN, YTRAIN, XTEST, YTEST):
    XTRAIN_samples = XTRAIN.shape[0]
    XTEST_samples = XTEST.shape[0]
    
    img_height = XTRAIN.shape[1]
    
    XTRAIN = XTRAIN.copy().reshape(XTRAIN_samples, -1)
    XTEST = XTEST.copy().reshape(XTEST_samples, -1)

    cls = rerfClassifier(
        projection_matrix="Graph-Edge-RerF",
        max_features=img_height,
        n_jobs=90,
        n_estimators=1000000,
        oob_score=False,
        random_state=None,
        image_height=img_height,
        image_width=img_height,
        patch_height_max=16,
        patch_height_min=2,
        patch_width_max=16,
        patch_width_min=2
    )
    
    cls.fit(XTRAIN, YTRAIN)
    
    preds = np.array(cls.predict(XTEST))
    error = np.mean(preds != YTEST)
    
    return error


def run_classification(train_idx, test_idx, X, y):
    XTRAIN = X[train_idx]
    YTRAIN = y[train_idx]
    XTEST = X[test_idx]
    YTEST = y[test_idx]

    data = [XTRAIN, YTRAIN, XTEST, YTEST]

    srerf_error = run_srerf(*data)
    rerf_error = run_rerf(*data)
    mase_error = run_mase(*data)
    grerf_error = run_grerf(*data)
    grerf_error2 = run_grerf2(*data)
    srerf_permuted_error = run_srerf_permuted(*data)

    return srerf_error, rerf_error, mase_error, grerf_error, grerf_error2, srerf_permuted_error

def run_classification2(train_idx, test_idx, X, y):
    XTRAIN = X[train_idx]
    YTRAIN = y[train_idx]
    XTEST = X[test_idx]
    YTEST = y[test_idx]

    data = [XTRAIN, YTRAIN, XTEST, YTEST]

    grerf_error = run_grerf(*data)

    return grerf_error

In [14]:
kfold = KFold(n_splits=10, random_state=1)

errors = []
for train_idx, test_idx in kfold.split(X=X_graphs, y=y):
    errors.append(run_classification(train_idx, test_idx, X_graphs, y))

In [15]:
res = np.array(errors)

df = pd.DataFrame(res, columns = ['S-RerF', 'SPORF', 'MASE', 'Graph-Node-RerF', 'Graph-Edge-RerF', 'S-RerF-Permuted'])

In [25]:
df.to_csv("COBRE_res.csv", index=False)

In [4]:
df = pd.read_csv("COBRE_res.csv")

In [14]:
df

Unnamed: 0,S-RerF,SPORF,MASE,Graph-Node-RerF,Graph-Edge-RerF
0,0.384615,0.692308,0.615385,0.538462,0.615385
1,0.461538,0.384615,0.538462,0.384615,0.307692
2,0.307692,0.384615,0.538462,0.384615,0.230769
3,0.230769,0.0,0.461538,0.153846,0.076923
4,0.166667,0.166667,0.333333,0.166667,0.166667
5,0.416667,0.333333,0.833333,0.416667,0.333333
6,0.416667,0.583333,0.333333,0.416667,0.416667
7,0.333333,0.083333,0.416667,0.166667,0.166667
8,0.333333,0.166667,0.5,0.166667,0.166667
9,0.333333,0.25,0.75,0.25,0.166667


In [16]:
print(df.mean())

S-RerF             0.338462
SPORF              0.304487
MASE               0.532051
Graph-Node-RerF    0.304487
Graph-Edge-RerF    0.264744
dtype: float64


In [21]:
print(df.mean())

S-RerF             0.299359
SPORF              0.312821
MASE               0.532051
Graph-Node-RerF    0.321154
Graph-Edge-RerF    0.266026
dtype: float64


In [29]:
print(df.mean())

S-RerF             0.299359
SPORF              0.280128
MASE               0.532051
Graph-Node-RerF    0.289103
Graph-Edge-RerF    0.273077
S-RerF-Permuted    0.335897
dtype: float64


In [12]:
print(df.mean())

S-RerF             0.321154
SPORF              0.344231
MASE               0.426282
Graph-Node-RerF    0.296795
Graph-Edge-RerF    0.328846
S-RerF-Permuted    0.280769
dtype: float64
