# Assigning Paths and Imports

In [3]:
# -----------------------------------------------------------------------------
# Copyright 2020 by ShabaniPy Authors, see AUTHORS for more details.
#
# Distributed under the terms of the MIT license.
#
# The full license is in the file LICENCE, distributed with this software.
# -----------------------------------------------------------------------------

"""Template for general aggregation and processing of data for analysis.

"""

import os

import numpy as np
import h5py
import matplotlib

from shabanipy.bulk.data_classifying import (
    DataClassifier,
    FilenamePattern,
    LogPattern,
    MeasurementPattern,
    NamePattern,
    RampPattern,
    StepPattern,
    ValuePattern,
)

from shabanipy.bulk.data_processing import (
    PreProcessingStep,
    ProcessCoordinator,
    ProcessingStep,
    SummarizingStep,
)

YEAR = '2021'
MONTH = ''
MONTHDAY = ''

#: List of path to inspect looking for labber hdf5 data. Path is directed to google drive folder where labber data is stored
FOLDERS = ["/Volumes/GoogleDrive/.shortcut-targets-by-id/1p4A2foj_vBr4k6wrGEIpURdVkG-qAFgb/NYU Quantum Engineering Lab/"
           f"Labber Data Backups/vector9/{YEAR}/{MONTH}/Data_{MONTHDAY}"]


#: Name of the sample that must appear in the measurement name usually of the form "{Wafer}-{Piece}_{Design}-{Iteration}_{Junction}_{Cooldown}"
SAMPLE_NAME = "{Wafer}-{Piece}_{Design}-{Iteration}"
SAMPLE_ID = "{Wafer}-{Piece}_{Design}-{Iteration}_{Junction}_{Cooldown}"

#: hdf5 file number 
FILE_NUM = ''


#: Path to store generated files
PATH = (f"/Users/bh/Desktop/Code/Topological JJ/Samples/{SAMPLE_NAME}/{SAMPLE_ID}")

#: Name of generated raw data file
RAW_DATA_NAME = (f"{PATH}/Data/{SAMPLE_ID}_aggregated-data-{FILE_NUM}.hdf5")

#: Name of generated processed data file
PROCESSED_DATA_NAME = (f"{PATH}/Data/{SAMPLE_ID}_processed-data-{FILE_NUM}.hdf5")

# Data Aggregation

In [None]:
# --- Configuation ---------------------------------------------------------------------

clf = DataClassifier(
    patterns=[
        MeasurementPattern(
            name="Data",
            filename_pattern=FilenamePattern(
                regex=f"{SAMPLE_ID}-{FILE_NUM}",
            ),
            patterns=[
#                 StepPattern(
#                     name="Temperature",
#                     name_pattern=NamePattern(names=["Fridge - Temperature closed loop - target temperature"]),
#                     use_in_classification=True,
#                     classifier_level=2,
#                 ),
                StepPattern(
                    name="In-plane Field - Y",
                    name_pattern=NamePattern(names=["Vector Magnet - Field Y"]),
                    use_in_classification=True,
                    classifier_level=1,
                ),
                StepPattern(
                    name="In-plane Field - Z",
                    name_pattern=NamePattern(names=["Vector Magnet - Field Z"]),
                    use_in_classification=True,
                    classifier_level=1,
                ),
#                 StepPattern(
#                     name="Microwave: Frequency",
#                     name_pattern=NamePattern(names=["Microwave - Frequency"]),
#                     use_in_classification=True,
#                     classifier_level=2,
#                 ),
                StepPattern(
                    name="Vg",
                    name_pattern=NamePattern(names=["SM3 - Source voltage"]),
                    use_in_classification=True,
                    classifier_level=3,
                 ),
#                 LogPattern(name="Voltage Drop", pattern=NamePattern(names=("DMM - SingleValue"))),
#                 LogPattern(name="dR", pattern=NamePattern(names=("SRS3 - Value")))
                LogPattern(name="Voltage Drop", pattern=NamePattern(names=("VICurveTracer - VI curve")),x_name = "Bias"),
                LogPattern(name="dR", pattern=NamePattern(names=("VICurveTracer - dR vs I curve")),x_name = "Bias")
            ],
        ),

    ]
)

# --- Run ------------------------------------------------------------------------------

clf.identify_datasets(FOLDERS)

for n, datasets in clf._datasets.items():
    print(f"Measurement: {n}")
    for d in datasets:
        print(f"  - {d.rsplit(os.sep, 1)[-1]}")

clf.classify_datasets()
for n, ds in clf._classified_datasets.items():
    print(f"Measurement: {n}")
    for path, classifiers in ds.items():
        print(f"  - {d.rsplit(os.sep, 1)[-1]}")
        for level, values in classifiers.items():
            print(f"    - level {level}: {values}")

clf.consolidate_dataset(RAW_DATA_NAME)

# Data Processing

In [None]:
# --- Configuation ---------------------------------------------------------------------

coor = ProcessCoordinator(
    archive_path=RAW_DATA_NAME,
    duplicate_path=PROCESSED_DATA_NAME,
    processing_path=PROCESSED_DATA_NAME,
    summary_directory=PATH,
    preprocessing_steps=[
        PreProcessingStep(
            name="voltage_scaling",
            measurements=["Data",
                         ],
            input_quantities=["Voltage Drop"],
            parameters={"scaling": 1e-2}, #Correct for amplifier gain (usually 100)
            routine=lambda volt, scaling, : volt * scaling,
            output_quantities=["ScaledVoltage"],
        ),
    ],
    processing_steps = [],
    summary_steps = [])
    
# --- Run ------------------------------------------------------------------------------
    
coor.run_preprocess()