In [1]:
import PF_wrapper as PF
import numpy as np
import pandas as pd

from aeon.datasets import load_japanese_vowels

In [2]:
X3, y3 = load_japanese_vowels(split="TRAIN")
X3t, y3t = load_japanese_vowels(split="TEST")

In [3]:
def aeonToFile(X3, filename, entry_separator = ',', array_separator = ':'):

    with open(filename, 'w') as f:
        # Loop over the first dimension (the "planes")
        for plane in X3:
            # Loop over the second dimension (the "rows")
            for i, row in enumerate(plane):
                # Convert the row to a string, joining with the item separator
                row_str = entry_separator.join(str(item) for item in row)
                
                # Write the row string to the file
                f.write(row_str)
                
                # Add the sub-array separator if not the last row
                if i < len(plane) - 1:
                    f.write(array_separator)
            
            # Add a newline after each plane
            f.write('\n')

In [4]:
aeonToFile(X3, 'Data/ThreeDdata.txt')
aeonToFile(X3t, 'Data/ThreeDdata_test.txt')

In [5]:
y3df = pd.DataFrame(y3)
y3df.to_csv("Data/ThreeDlabels.csv", index=False, header=None)

y3tdf = pd.DataFrame(y3t)
y3tdf.to_csv("Data/ThreeDlabels_test.csv", index=False, header=None)

In [6]:
dir1 = "training_output"
dir2 = "training_predictions"

In [7]:
# First, we train a PF model and give it the name 'Spartacus'

PF.train("Data/ThreeDdata.txt", test_file="Data/ThreeDdata_test.txt", distances=['dtw_i', 'dtw_d'], 
                  train_labels="Data/ThreeDlabels.csv", test_labels="Data/ThreeDlabels_test.csv",
                  return_proximities=True, output_directory=dir1, array_separator=":", entry_separator=",", 
                  model_name="Spartacus", data_dimension=2, return_training_outlier_scores=True,
           num_trees=100, parallel_train=True, r=5, parallel_prox=True)


0:3mb
finished in 0:0:0.034

0:16mb
finished in 0:0:0.020

-----------------Repetition No: 1 (ThreeDdata.txt)   -----------------
Using: 3 MB, Free: 17 MB, Allocated Pool: 20 MB, Max Available: 1024 MB
core.ProximityForestResult@2f4d3709
5.1.6.3.7.4.0.2.9.10.12.11.13.8.15.14.16.17.18.19.21.20.22.23.24.26.25.28.27.30.29.32.31.33.34.35.36.38.37.41.40.39.42.43.44.46.45.48.47.49.51.50.52.55.53.54.56.57.59.58.60.62.61.64.63.65.66.67.68.69.71.70.72.74.73.75.76.77.81.78.80.82.83.79.84.85.86.88.87.91.89.90.92.94.93.95.99.97.96.98.
Using: 47 MB, Free: 113 MB, Allocated Pool: 160 MB, Max Available: 1024 MB
****
Training Time: 2775.425491ms (0:0:2.775)
Prediction Time: 1222.171999ms (0:0:1.222)
Correct(TP+TN): 361 vs Incorrect(FP+FN): 9
Accuracy: 0.9756756756756757
Error Rate: 0.024324324324324298
REPEAT:1 ,ThreeDdata.txt, 0.9756756756756757, 2775.425491, 1222.171999, 4.77
Computing Training Proximities...
Computing Training Outlier Scores...
Computing Test/Train Proximities...
Done Computing Te

In [8]:
# Here are the predictions on the provided test set.
f0 = open(dir1 + "/Validation_Predictions.txt")
f1 = f0.read()
preds = eval("np.array(" + f1 + ")")
f0.close()

In [9]:
# We can now read a model by name and obtain predictions on another dataset. This creates:
    #1. Predictions_saved.txt: the predicted labels of the read-in model.
# Let's get predictions on the training set to illustrate.
PF.predict(dir1 + "/Spartacus", "Data/ThreeDdata.txt", test_labels="Data/ThreeDlabels.csv",
           output_directory=dir2, entry_separator=",", array_separator=":", data_dimension=2)
# Now let's get predictions on the test set.
PF.predict(dir1 + "/Spartacus", "Data/ThreeDdata_test.txt", test_labels="Data/ThreeDlabels_test.csv",
           entry_separator=",", array_separator=":", data_dimension=2)


0:3mb
finished in 0:0:0.030

***

0:3mb
finished in 0:0:0.043

****


In [10]:
# Here are the predictions (of the saved model) on the training set.
f0 = open(dir2 + "/Predictions_saved.txt")
f1 = f0.read()
train_preds_saved = eval("np.array(" + f1 + ")")
f0.close()

In [11]:
# Here are the predictions (of the saved model) on the test set.
f0 = open("Predictions_saved.txt")
f1 = f0.read()
preds_saved = eval("np.array(" + f1 + ")")
f0.close()

In [12]:
print(len(train_preds_saved))
print(len(preds_saved))
print(len(preds))

270
370
370


In [13]:
# Just checking: are the outputs of the saved model equal to the original predictions?
np.unique([preds[i]-preds_saved[i] for i in range(len(preds))])

array([0])

In [14]:
# the following can be used to obtain the training proximities
p=PF.getArray(dir1 + "/TrainingProximities.txt")

In [15]:
p.shape

(270, 270)

In [16]:
# We can also access the test/train proximities
pt=PF.getArray(dir1 + "/TestTrainProximities.txt")

In [17]:
pt.shape

(370, 270)

In [18]:
# The raw proximities are not symmetric. But in some applications, one desires symmetry.
p = 0.5*(p + p.transpose())

In [19]:
# The following can be used to obtain outlier scores for the training set.
# Note that these are intra-class outlier scores.
outlier_scores = PF.getArray(dir1 + "/outlier_scores.txt")
outlier_scores.shape

(270,)