In [None]:
import importlib
import os
from collections import Counter
from utils import (
    structure_noah,
    generic_helper
)
from utils.definitions import ROOT_DIR
importlib.reload(structure_noah)
importlib.reload(generic_helper)

In [None]:
# get the contents of the noah_raw_data; the downloaded refined data 
absolute_path = f"{ROOT_DIR}/noah_raw_data"
extracted_contents = os.listdir(absolute_path)
print(extracted_contents)

In [None]:
# how many files in the refined folder?
len(extracted_contents)

In [None]:
# create a list of absolute paths to cells' data
absolute_path_to_files = [f"{absolute_path}/{file}" for file in extracted_contents]

# get structured data
structured_data = structure_noah.get_structured_data(
    path_to_files=absolute_path_to_files
)

In [None]:
# check the number of cells per group in the original data
check_cell_per_group = [
    value["summary"]["cathode_group"] for value in structured_data.values()
]
Counter(check_cell_per_group)



In [None]:
# check keys
structured_data.keys()

In [None]:
# check the keys for a cell
structured_data['batch_B10A_cell_3'].keys()

In [None]:
# check the keys for pulses 
structured_data['batch_B10A_cell_3']["pulses"].keys()

In [None]:
# check the keys for a single pulse 
structured_data['batch_B10A_cell_3']["pulses"][16].keys()

In [None]:
# check summary data
structured_data['batch_B10A_cell_3']['summary'].keys()

In [None]:
# check cycles at which pulse testing were caried out
cell_cyc = {}

for cell in structured_data.keys():
    
    list_of_cyles = list(structured_data[cell]['pulses'].keys())
    if not list_of_cyles:  # those cells without pulse testing
        print(cell)
    cell_cyc[cell] = list_of_cyles

print(cell_cyc)

In [None]:
# check whether the first pulse test was carried out within the first
# 100 cycles

for cell in cell_cyc:

    if not cell_cyc[cell]:
        continue

    if cell_cyc[cell][0] > 100:
        print(cell)

In [None]:
# there are some cells without pulse testing
# check them out and remove them
cells_without_pulse = [
    cell for cell in structured_data
    if len(structured_data[cell]['pulses'].keys()) == 0
]
cells_without_pulse

In [None]:
# remove cells with irregular voltage
irregular_cells = [
    'batch_B1A_cell_4', 
    'batch_B35H_cell_1',
    'batch_B26K_cell_2'
]


In [None]:
# get structured data for only cells with pulse testing and not in irregular cells list
structured_data_with_pulse = {
    k: structured_data[k] for k in structured_data.keys()
   if k not in cells_without_pulse + irregular_cells
}
len(structured_data_with_pulse)

In [None]:
# dump the structured data with pulse test
generic_helper.dump_data(
    data=structured_data_with_pulse,
    path=f"{ROOT_DIR}/data",
    fname="noah_structured_data.pkl"
)
