## Purpose of This Notebook

This notebook serves as an exploratory tool for examining the log file data from the horse behavioural experiments conducted in October and November 2023 that are loaded into a local DuckDB database using `logfile-to-database-RPE.ipynb`.

It facilitates loading and querying the data from the database using some example SQL queries.



### Setup

In [None]:
# | echo: false

from pathlib import Path
from pprint import pprint

import duckdb
import pandas as pd
from IPython.display import Markdown, display
from loguru import logger

from horse_logic.utils import set_custom_logger_format

In [None]:
# | echo: false

# Display dataframes in a more friendly paginated manner

import itables.options as opt
from itables import init_notebook_mode
init_notebook_mode(all_interactive=True)  
opt.pageLength = 20


In [None]:
# | echo: false

set_custom_logger_format()

### Database information

In [None]:
# | echo: false

DATA_DIR = Path("../data")
EXPERIMENT_TYPE = "RPE"

assert DATA_DIR.exists()

DATA_DB  = DATA_DIR / f"Experiments_{EXPERIMENT_TYPE}_2023-Q4.ddb"  # DuckDB database name
db_exists = DATA_DB.exists()

logger.info(f"Database file: {DATA_DB.resolve()}")

### Connect to database

In [None]:
# | echo: false

def connect_ddb(database=DATA_DB):
    try:
        if db_exists:  
            con = duckdb.connect(database=str(database))
        else:
            logger.error(f"Database file: {database.resolve()} not found.")
    except Exception as e:
        print(f"Error with Database file: {database.resolve()} - Is database already open - check for .wal lock file?")
        logger.error(f"{e}")
    logger.info(f"CONNECTED - Database file: {database.resolve()}")
    return con


con = connect_ddb()


### Perform some example database queries (local DuckDB) 

Show the tables in the database - should be `Events`, `Experiments` and `Trials`.

In [None]:
# Cross-check queries

con.sql("SHOW TABLES;")

### Show the distinct `EventTypes` in alphabetical order

In [None]:
con.sql("SELECT DISTINCT EventType FROM Events ORDER BY EventType")

### List all of the Experiments

In [None]:
experiments_df = con.sql("SELECT * FROM Experiments").df()

In [None]:
experiments_df

### Get the information for Experiment with ID = 128 (the arbitrary id assigned when loaded)

In [None]:
con.sql(
    """
    SELECT
        ExperimentID, 
        Cohort,
        SubjectName, 
        SubjectNumber, 
        SessionNumber,
        ExperimentType, 
        Comment, 
        DateTime, 
        LogFileName
    FROM Experiments 
    WHERE ExperimentID = 128;
"""
).df()

### Reconstruct the event information for a specific experiment

In [None]:
con.sql("""
    SELECT 
        -- Trials.TrialID,
        Trials.TrialNumber,
        -- TrialStartTime,
        -- TrialEndTime,
        -- EventID, 
        EventNumber, 
        EventTime, 
        EventType,
        EventElapsedTime
    FROM Experiments
    INNER JOIN Trials ON Experiments.ExperimentID = Trials.ExperimentID
    INNER JOIN Events ON Trials.TrialID = Events.TrialID
    WHERE Experiments.ExperimentID = 128;
""").df()

### Reconstruct the event information for a specific subject (gio) and session number (2)

In [None]:
con.sql("""
    SELECT 
        -- Experiments.SubjectName,
        -- Experiments.SessionNumber,
        -- Trials.TrialID,
        Trials.TrialNumber,
        -- TrialStartTime,
        -- TrialEndTime,
        -- EventID, 
        EventNumber, 
        EventTime, 
        EventType,
        EventElapsedTime
    FROM Experiments
    INNER JOIN Trials ON Experiments.ExperimentID = Trials.ExperimentID
    INNER JOIN Events ON Trials.TrialID = Events.TrialID
    WHERE Experiments.SubjectName = 'gio' AND  Experiments.SessionNumber = 2;
""").df()

### Close the connection to the database

To avoid file lock errors.

In [None]:
con.close()