In [1]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from hyperframe import HyperFrame
from sklearn.model_selection import train_test_split
from demo.helpers import metrics, X, y

In [2]:
X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.33, random_state=42)

In [3]:
clf = KNeighborsClassifier()
clf.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

# Initialisation

In [4]:
dimension_labels = ["train_test", "species", "metric"]

index_labels = {"train_test": ["train", "test"],
                "species": ["setosa", "versicolor", "virginica"],
                "metric": ["precision", "recall", "f1"]}

scores = HyperFrame(dimension_labels, index_labels)

# Setting data

In [5]:
yhat = clf.predict(X_train)
#iset alternative 1
scores.iset(metrics(y_train, yhat), "train", "", "")

<hyperframe.HyperFrame at 0x7fc074a19eb8>

In [6]:
yhat = clf.predict(X_test)
#iset alternative 2
scores.iset(metrics(y_test, yhat), train_test="test")

<hyperframe.HyperFrame at 0x7fc074a19eb8>

# Getting data

In [7]:
#iget alternative 1
scores.iget("train", "", "", return_type="pandas")

Unnamed: 0,precision,recall,f1
setosa,0.935484,0.935484,0.935484
versicolor,0.722222,0.742857,0.732394
virginica,0.787879,0.764706,0.776119


In [8]:
#iget alternative 2
scores.iget(species="versicolor", return_type="pandas")

Unnamed: 0,precision,recall,f1
train,0.722222,0.742857,0.732394
test,0.846154,0.733333,0.785714


In [9]:
#iget alternative 3
scores.iget0("species", "train_test", return_type="pandas")

{'metric': 'precision'}


Unnamed: 0,setosa,versicolor,virginica
train,0.935484,0.722222,0.787879
test,0.95,0.846154,0.764706


# Another hyperframe with the same labels

In [10]:
scores_lr = HyperFrame(dimension_labels, index_labels)
clf = LogisticRegression(penalty="none", max_iter=1000)
clf.fit(X_train, y_train)

yhat = clf.predict(X_train)
scores_lr.iset(metrics(y_train, yhat), "train", "", "")

yhat = clf.predict(X_test)
scores_lr.iset(metrics(y_test, yhat), "test", "", "")

<hyperframe.HyperFrame at 0x7fc0749a5128>

# Merging hyperframes

In [11]:
print(scores.shape)
print(scores_lr.shape)

(2, 3, 3)
(2, 3, 3)


In [12]:
scores_models = scores.merge(scores_lr, "model", ["knn", "logistic regression"])

In [13]:
scores_models.iget("test", "", "f1", "", return_type="pandas")

Unnamed: 0,knn,logistic regression
setosa,0.974359,0.974359
versicolor,0.785714,0.714286
virginica,0.787879,0.727273


In [14]:
scores_models.iget("", "", "f1", "logistic regression", return_type="pandas")

Unnamed: 0,setosa,versicolor,virginica
train,0.952381,0.753623,0.794118
test,0.974359,0.714286,0.727273


# Yet another hyperframe with the same dimensions as the original hyperframe

In [15]:
scores_rf = HyperFrame(dimension_labels, index_labels)
clf = RandomForestClassifier()
clf.fit(X_train, y_train)

yhat = clf.predict(X_train)
scores_rf.iset(metrics(y_train, yhat), "train", "", "")

yhat = clf.predict(X_test)
scores_rf.iset(metrics(y_test, yhat), "test", "", "")

<hyperframe.HyperFrame at 0x7fc0749a5940>

In [16]:
scores_rf.iget("test", "", "", return_type="pandas")

Unnamed: 0,precision,recall,f1
setosa,0.947368,0.947368,0.947368
versicolor,0.75,0.6,0.666667
virginica,0.684211,0.8125,0.742857


# Expanding A DataFrame

In [17]:
print(scores_models.shape)
print(scores_rf.shape)

(2, 3, 3, 2)
(2, 3, 3)


In [18]:
scores_models = scores_models.expand(scores_rf, "model", "random forest")

In [19]:
scores_models.iget("test", "", "f1", "", return_type="pandas")

Unnamed: 0,knn,logistic regression,random forest
setosa,0.974359,0.974359,0.947368
versicolor,0.785714,0.714286,0.666667
virginica,0.787879,0.727273,0.742857


# Writing to file

In [20]:
scores_models.write_file("./demo/scores_models")

# Reading from file

In [21]:
scores_models = scores_models.read_file("./demo/scores_models")