In [None]:
# This notebook load the tsfresh + xgboost model and plots the raw spectra of all misclassified spectra.
import numpy as np
import matplotlib.pyplot as plt
import xgboost as xgb

from utils import plot_cm, visualize_raw_spectra, confusion_nyquist_plot
from utils_preprocessing import eis_dataframe_from_csv, interpolate_to_freq_range
from clf_tsfresh_xgb import load_features_le

%load_ext autoreload
%autoreload 2

In [None]:
train_data_f = "data/tsfresh/train_tsfresh_filtered.csv"
test_data_f = "data/tsfresh/test_tsfresh_filtered.csv"

le_f = "data/le_name_mapping.json"
output_dir = "results/clf/xgb/2023-01-26_11-19-01"

X_train, y_train, X_test, y_test, le = load_features_le(
    train_data_f, test_data_f, le_f)

df_test = eis_dataframe_from_csv("data/test_data_filtered.csv")
interpolated_basis = np.geomspace(10, 1e5, num=30)
df_test = interpolate_to_freq_range(df_test, interpolated_basis)
df_test['zreal_norm'] = df_test['zreal'].apply(lambda x: (x - x.min()) / (x.max() - x.min()))
df_test['zimag_norm'] = df_test['zimag'].apply(lambda x: (x - x.min()) / (x.max() - x.min()))

df_test['zreal_norm2'] = df_test['zreal'].apply(lambda x: (x) / (x.max()))
df_test['zimag_norm2'] = df_test['zimag'].apply(lambda x: (x) / (- x.min()))

In [None]:
# Read results/clf_filtered/xgb/manuscript/pred_test.txt
y_test_pred = np.loadtxt("results/clf_filtered/xgb/manuscript/pred_test.txt")
plot_cm(y_test, y_test_pred, le, save=0, figname=f"{output_dir}/test_confusion")
plt.close()

In [None]:
confusion_nyquist_plot(
    df_test,
    le,
    y_test,
    y_test_pred,
    columns=["zreal_norm", "zimag_norm"],
    lw=0.2,
    alpha=0.4,
    save=True,
    figname="confusion_nyquist",
    drop_below_zero=False
)

In [None]:
# Get the ids of all misclassified samples
misclassified = list(np.where(y_test != y_test_pred)[0])

fig = visualize_raw_spectra(
    df_test.iloc[misclassified].copy(),
    show=1,
    save_figs=True,
    row_col_ratio=0.6,
    pdf=True,
    fig_name="missclassified_tsfreshXGB",
    sup_title="Misclassififed EIS Spectra: tsfresh-XGBoost",
    axis_off=False,
)

In [None]:

df_train = unwrap_z(eis_dataframe_from_csv("data/train_data.csv"))
model = xgb.XGBClassifier(random_state=42, n_jobs=-1)
model.fit(X_train, y_train)

In [None]:
# Make predictions for train data
y_train_pred = model.predict(X_train)
plot_cm(y_train, y_train_pred, le, save=0, figname=f"{output_dir}/train_confusion")
plt.close()
# Make predictions for test data
y_test_pred = model.predict(X_test)
plot_cm(y_test, y_test_pred, le, save=0, figname=f"{output_dir}/test_confusion")
plt.close()
# Run the classification on the X_test data
y_test_pred = model.predict(X_test)

In [None]:
# Lets calculate teh tsfresh features for the misclassified spectra and make new predictions to test that we didn't make a mistake
from preprocess import extract_tsfresh
from utils_preprocessing import preprocess_data

df_test = preprocess_data("data/test_data.csv")
df_train = preprocess_data("data/train_data.csv")

extract_tsfresh(
    df_train, 
    df_test.iloc[misclassified].copy().reset_index(drop=True), 
    output_path="data/tsfresh/mis_train_tsfresh.csv", 
    le=le,
    if_exclude_outlier= False,
)

In [None]:
train_data_f = "data/tsfresh/mis_train_tsfresh.csv"
test_data_f = "data/tsfresh/mis_test_tsfresh.csv"

X_train, y_train, X_test_miss, y_test_miss, le = load_features_le(
    train_data_f, test_data_f, le_f)

y_test_mid_pred = model.predict(X_test_miss)
plot_cm(y_test_miss, y_test_mid_pred, le, save=0, figname=f"{output_dir}/test_miss_confusion")