# Data visualization of the battery time-series data using Datashader. 
### This notebook allows the visualization of millions of data points.

### Remark: Holowview und Geckodriver might require additional setup steps.

In [None]:
import gc
import os
import warnings

import matplotlib.pyplot as plt

from src.batt_data import batt_data, data_utils, plotting
from src.path_setup import setup_paths

%load_ext autoreload
%autoreload 2

# Use seaborn style defaults and set the default figure size
plt.style.use("seaborn-v0_8-white")

setup_paths()
data_utils.build_data_cache()

In [2]:
generate_static_plots_for_all_batteries = True
cell_characteristics = data_utils.read_cell_characteristics()

def plot_all_batteries(id_list):
    warnings.simplefilter(action="ignore", category=FutureWarning)
    for batt_id_int in id_list:
        batt_id = str(batt_id_int)
        print(f"Generating plots for battery {batt_id}")
        battdata = batt_data.BattData(
            batt_id,
            cell_characteristics,
            segment_selection=False,
            gap_removal=False,
        )

        plotting.diagnostic_plot_datashader(battdata, save=True, dynamic=False, scale=0.3)
        # Delete battdata to free up memory
        del battdata
        gc.collect()

In [None]:
id_list = [i for i in range(1, 10)]
if generate_static_plots_for_all_batteries:
    plot_all_batteries(id_list)

In [None]:
id_list = [i for i in range(10, 20)]
if generate_static_plots_for_all_batteries:
    plot_all_batteries(id_list)

In [None]:
id_list = [i for i in range(20, 29)]
if generate_static_plots_for_all_batteries:
    plot_all_batteries(id_list)

## Remark: Some of the data files have duplicated indices.

It is not entirely clear why this happens.\
The measurements themselves are different, suggesting that the BMS time stamp resolution might not be high enough.

In [None]:
nb_of_data_point = 0 
for i in range(1, 29):
    batt_id = str(i)
    print(f"Loading data for battery {batt_id}")
    battdata = batt_data.BattData(
        batt_id,
        cell_characteristics,
        segment_selection=False,
        gap_removal=False,
    )
    nb_of_data_point += len(battdata.df)
    # check whether there are duplicated indices
    duplicated_indices = battdata.df.index[battdata.df.index.duplicated(keep=False)]
    print(f"Number of duplicated indices: {len(duplicated_indices)}")
    del battdata
    gc.collect()
print(f"Total number of data points: {nb_of_data_point}")