# Experiments

> A _Pisces experiment_ is a combined specification of data sets, validation method(s), and model architectures to use.

Let's go through the example provied in `<pisces>/experiment_config/base_config.json`:
```json
{
  "data_config": {
    "data_directory": "./data_sets",
    "sets": ""
  },
  "validation": {
    "method": "LEAVE_ONE_OUT",
    "parameter": null
  },
  "models": [
    "LOGISTIC"
  ],
  "features": [
    "ACTIVITY"
  ]
}
```
Let's break this down, to give you a sense for the kinds of experiments Pisces can run and how to modify this to suit your own investigations.

* `"data_config"`: This is a dictionary that specifies the data sets to use. 
  * The `"data_directory": "./data_sets` specifies that the data sets are located in the `data_sets` directory, which is a subdirectory of the current working directory.
  * The `"sets": ""` key specifies the names of the data sets to use. Since this is an empty string, all data sets in the directory will be used. Otherwise, this would be a comma-separated list of data set names, which are folders inside the `"data_directory"` that have subdirectories matching `cleaned_*`.
* `"validation"`: This is a dictionary that specifies the validation method to use.  
  * The `"method"` key specifies the validation method to use. Here, we use `"LEAVE_ONE_OUT"`, which means that each data set will be used to train a model, and then the model will be tested on the same data set.
  * The `"parameter"` key specifies any parameters that the validation method requires. `"LEAVE_ONE_OUT"`, then the parameter is the name of the column to use for the leave-one-out validation.

In [1]:
#| default_exp evaluations

In [2]:
#| hide 
%load_ext autoreload
%autoreload 2

In [3]:
#| hide
from nbdev.showdoc import *


In [4]:
#| export

from enum import Enum
from typing import List

from enum import Enum, auto
from typing import Dict, List, Optional, Tuple, Union

from pathlib import Path

import numpy as np

In [5]:
#| export

class DataSetObject:
    def __init__(self, name: str, path: Path):
        self.name = name
        self.path = path


In [None]:

import random
import string


def random_string(length: int) -> str:
    return ''.join(random.sample(string.ascii_lowercase, length))

ids = [
    "abx123",
    "bx887",
    "MN3f23"
]
acc_txts_1 = [
    f"{id}_cleaned_acc.txt"
    for id in ids
]

acc_txts_2 = [
    f"exported-{id}.{random_string(3)}"
    for id in ids
]

# just ID.csv, no pre/suffix
acc_txts_3 = [
    "{id}.csv"
    for id in ids
]


# just ID, no pre/suffix
acc_txts_4 = [
    "{id}"
    for id in ids
]

acc_txts = [acc_txts_1, acc_txts_2, acc_txts_3, acc_txts_4]


In [63]:
class SimplifiablePrefixTree:
    def __init__(self, delimiter: str = "", key: str = ""):
        self.key = key
        self.children: Dict[str, SimplifiablePrefixTree] = {}
        self.is_end_of_word = False
        self.delimiter = delimiter
    
    def chars_from(self, word: str):
        return word.split(self.delimiter) if self.delimiter else word

    def insert(self, word: str):
        node = self
        for char in self.chars_from(word):
            if char not in node.children:
                node.children[char] = SimplifiablePrefixTree(self.delimiter, key=char)
            node = node.children[char]
        node.is_end_of_word = True

    def search(self, word: str):
        node = self
        for char in self.chars_from(word):
            if char not in node.children:
                return False
            node = node.children[char]
        return node.is_end_of_word
    
    def simplify(self):
        if len(self.children) == 1 and not self.is_end_of_word:
            child_key = list(self.children.keys())[0]
            self.key += child_key
            self.children = self.children[child_key].children
            self.simplify()
        else:
            for child in self.children.values():
                child.simplify()
        return self

    def __str__(self):
        # prints .children recursively with indentation
        return self.print_tree(self)

    @staticmethod
    def print_tree(node: 'SimplifiablePrefixTree', indent=0) -> str:
        result = ""
        for key, child in node.children.items():
            result += "| " * indent + "( " + child.key + "\n"
            result += SimplifiablePrefixTree.print_tree(child, indent + 1)
        return result



In [64]:
p = SimplifiablePrefixTree()
for txt in acc_txts_1:
    p.insert(txt)
print(p)

( a
| ( b
| | ( x
| | | ( 1
| | | | ( 2
| | | | | ( 3
| | | | | | ( _
| | | | | | | ( c
| | | | | | | | ( l
| | | | | | | | | ( e
| | | | | | | | | | ( a
| | | | | | | | | | | ( n
| | | | | | | | | | | | ( e
| | | | | | | | | | | | | ( d
| | | | | | | | | | | | | | ( _
| | | | | | | | | | | | | | | ( a
| | | | | | | | | | | | | | | | ( c
| | | | | | | | | | | | | | | | | ( c
| | | | | | | | | | | | | | | | | | ( .
| | | | | | | | | | | | | | | | | | | ( t
| | | | | | | | | | | | | | | | | | | | ( x
| | | | | | | | | | | | | | | | | | | | | ( t
( b
| ( x
| | ( 8
| | | ( 8
| | | | ( 7
| | | | | ( _
| | | | | | ( c
| | | | | | | ( l
| | | | | | | | ( e
| | | | | | | | | ( a
| | | | | | | | | | ( n
| | | | | | | | | | | ( e
| | | | | | | | | | | | ( d
| | | | | | | | | | | | | ( _
| | | | | | | | | | | | | | ( a
| | | | | | | | | | | | | | | ( c
| | | | | | | | | | | | | | | | ( c
| | | | | | | | | | | | | | | | | ( .
| | | | | | | | | | | | | | | | | | ( t
| | | | | | | | | | | | | | | | 

In [65]:
p = p.simplify()

In [66]:
print(p)

( abx123_cleaned_acc.txt
( bx887_cleaned_acc.txt
( MN3f23_cleaned_acc.txt



In [67]:
q = SimplifiablePrefixTree()

for txt in acc_txts_1:
    rev_txt = txt[::-1]
    print("inserting", rev_txt)
    q.insert(rev_txt)
print(q)
q.simplify()
print(f"SIMPLIFIED:\n{q}")


inserting txt.cca_denaelc_321xba
inserting txt.cca_denaelc_788xb
inserting txt.cca_denaelc_32f3NM
( t
| ( x
| | ( t
| | | ( .
| | | | ( c
| | | | | ( c
| | | | | | ( a
| | | | | | | ( _
| | | | | | | | ( d
| | | | | | | | | ( e
| | | | | | | | | | ( n
| | | | | | | | | | | ( a
| | | | | | | | | | | | ( e
| | | | | | | | | | | | | ( l
| | | | | | | | | | | | | | ( c
| | | | | | | | | | | | | | | ( _
| | | | | | | | | | | | | | | | ( 3
| | | | | | | | | | | | | | | | | ( 2
| | | | | | | | | | | | | | | | | | ( 1
| | | | | | | | | | | | | | | | | | | ( x
| | | | | | | | | | | | | | | | | | | | ( b
| | | | | | | | | | | | | | | | | | | | | ( a
| | | | | | | | | | | | | | | | | | ( f
| | | | | | | | | | | | | | | | | | | ( 3
| | | | | | | | | | | | | | | | | | | | ( N
| | | | | | | | | | | | | | | | | | | | | ( M
| | | | | | | | | | | | | | | | ( 7
| | | | | | | | | | | | | | | | | ( 8
| | | | | | | | | | | | | | | | | | ( 8
| | | | | | | | | | | | | | | | | | | ( x
| | | | | | | | | | | | 