In [None]:
import numpy as np
import pandas as pd

from utils_preprocessing import eis_dataframe_from_csv
from utils_preprocessing import unwrap_z, sort_circuits
from utils import visualize_raw_spectra

# autoreload modules when code is run
%load_ext autoreload
%autoreload 2

In [None]:
save_figs = 1
plot_labelled = 0
plot_unlabelled = 0

# Load the csv into a data frame.
df = unwrap_z(eis_dataframe_from_csv("data/train_data_filtered.csv"))
df_test = unwrap_z(eis_dataframe_from_csv("data/test_data_filtered.csv"))
df_unlabelled = unwrap_z(eis_dataframe_from_csv("data/unlabeled_data.csv"))

df_sorted = sort_circuits(df).copy()
df_test_sorted = sort_circuits(df_test).copy()

# merege df_sorted and df_test_sorted
df_labelled = pd.concat([df_sorted, df_test_sorted], ignore_index=True)
# reset the index
df_labelled = df_labelled.reset_index(drop=True)
# sort by circuit
df_labelled = df_labelled.sort_values(by=['Circuit'])

df_unfiltered = unwrap_z(eis_dataframe_from_csv("data/train_data.csv"))
df_unfiltred_test = unwrap_z(eis_dataframe_from_csv("data/test_data.csv"))

In [None]:
# Filtered out train data
df_filtered_out_train = df_unfiltered[~df_unfiltered.index.isin(df.index)].copy()
df_filtered_out_test = df_unfiltred_test[~df_unfiltred_test.index.isin(df_test.index)].copy()

# merege df_sorted and df_test_sorted
df_filtered_out = sort_circuits(pd.concat([df_filtered_out_train, df_filtered_out_test], ignore_index=True))

In [None]:
# Get df_filtered out for "L-R-RCPE" circuit
df_filtered_out_LRRCPE = df_filtered_out[df_filtered_out['Circuit'] == 'L-R-RCPE'].copy()
# Other circuits
df_filtered_out_other = df_filtered_out[df_filtered_out['Circuit'] != 'L-R-RCPE'].copy()

# Loop thorugh all circuits and print number of filtered out spectra 
for circuit in df_filtered_out['Circuit'].unique():
    print(f"{circuit}: {len(df_filtered_out[df_filtered_out['Circuit'] == circuit])}")

In [None]:
fig = visualize_raw_spectra(
    df_filtered_out_LRRCPE.sample(150, random_state=42).copy(), 
    show=1, 
    save_figs=save_figs, 
    row_col_ratio=1.8, 
    pdf=True, 
    fig_name="eis_spectra_filtered_out_LRRCPE",
    sup_title="",
    axis_off=False,
    label_fontsize=20,
    )

In [None]:
fig = visualize_raw_spectra(
    df_filtered_out_other.copy(), 
    show=1, 
    save_figs=save_figs, 
    row_col_ratio=1.8, 
    pdf=True, 
    fig_name="eis_spectra_filtered_out_other",
    sup_title="",
    axis_off=False,
    label_fontsize=18,
    )

In [None]:
# Sanity check: Are any sepctra in the laballed and unlabelled dataset that are the same.
# Get the mean of zimag
df_labelled['z_real_mean'] = df_labelled['zreal'].apply(lambda x: np.mean(x))
df_labelled['z_imag_mean'] = df_labelled['zimag'].apply(lambda x: np.mean(x))
df_unlabelled['z_real_mean'] = df_unlabelled['zreal'].apply(lambda x: np.mean(x))
df_unlabelled['z_imag_mean'] = df_unlabelled['zimag'].apply(lambda x: np.mean(x))

# check if there are any spectra in the labelled and unlabelled dataset that are the same.
test_zreal_mean = df_labelled['z_real_mean'].isin(df_unlabelled['z_real_mean'])
print(np.sum(test_zreal_mean))
test_zimag_mean = df_labelled['z_imag_mean'].isin(df_unlabelled['z_imag_mean']).any()
print(np.sum(test_zimag_mean))

In [None]:
if plot_labelled:
    # Get all unique circuits 
    circuits = df_labelled['Circuit'].unique()
    # Loop through all circuits
    for circuit in circuits:
        # plot the data for the circuit
        fig = visualize_raw_spectra(
            df_labelled[df_labelled['Circuit'] == circuit].copy(), 
            show=1, 
            save_figs=save_figs, 
            row_col_ratio=0.6, 
            pdf=True, 
            fig_name=f"eis_art_circuit_{circuit}",
            sup_title=f"EIS Spectra: {circuit}",
            axis_off=False,
            )

In [None]:
if plot_unlabelled:
    # Visualize all the unlabelled data by looping through it in batches of 2000. 
    rows_unlabelled = df_unlabelled.shape[0]
    iterations = int(np.ceil(rows_unlabelled/2000))

    for i in range(iterations):
        ind_low = i*2000
        ind_high = np.min(((i+1)*2000, rows_unlabelled))
        fig = visualize_raw_spectra(
            df_unlabelled[ind_low:ind_high].copy(), 
            show=1, 
            save_figs=save_figs, 
            row_col_ratio=0.6, 
            pdf=True, 
            fig_name=f"eis_art_unlabelled_{ind_low}_{ind_high}",
            sup_title=f"Unlabelled EIS Spectra: {ind_low} to {ind_high}"
            )

In [None]:
def extract_freq_ranges_measurements_points(df):
    df['freq_min_max'] = df['freq'].apply(lambda x: (x[0], x[-1]))
    df['meas_count'] = df['freq'].apply(lambda x: len(x))

    freq_ranges = df['freq_min_max'].unique()
    meas_counts = np.sort(df['meas_count'].unique())
    print(f"freq_range {freq_ranges}")
    print(f"Number of freq ranges {len(freq_ranges)}")
    print(f" Meas count {meas_counts}")
    print(f"Number of meas counts {len(meas_counts)}")

    # largest shared frequency range
    freq_range = (np.max([freq_range[0] for freq_range in freq_ranges]), np.min([freq_range[1] for freq_range in freq_ranges]))
    print(f"Largest shared frequency range {freq_range}")
    return freq_ranges, meas_counts

In [None]:
print("Labelled data:")
freq_ranges_l, meas_counts_l = extract_freq_ranges_measurements_points(df_labelled)
print("\n")

print("Unlabelled data:")
freq_ranges_ul, meas_counts_ul = extract_freq_ranges_measurements_points(df_unlabelled)
print("\n")
