In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from matplotlib import rcParams
import matplotlib.patches as mpatches
from imblearn.over_sampling import ADASYN
from sklearn.impute import SimpleImputer

In [None]:
df = pd.read_excel('./code/data/hybrid_machine_learning/train_without.xlsx')

In [None]:
# Replace NaNs with mean value of the column
imputer = SimpleImputer(strategy='mean')
df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)
X = df.drop('Misdiag', axis=1)
y = df['Misdiag']
ada = ADASYN(sampling_strategy='minority', random_state=42)
X_res, y_res = ada.fit_resample(X, y)

# Merge them back into one DataFrame:
df_res = pd.DataFrame(X_res, columns=X.columns)
df_res['Misdiag'] = y_res

In [None]:
# Set the font to Times New Roman
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 12

# Define the list of columns to plot
columns_to_plot = ['WBC', 'HGB', 'Pulse', 'Respiration', 'BMI', 'BodySurfaceArea']
# Define the colors for the violin plots
colors = ["#64699B", "#C74647"]
# Create violin plots
for col in columns_to_plot:
    fig, ax = plt.subplots(figsize=(4, 4))
    sns.violinplot(x='sampling', y=col, hue='Misdiag', split=True, 
                   data=pd.concat([df.assign(sampling='Train'), 
                                   df_res.assign(sampling='Resampled')]), 
                   palette=colors, ax=ax)
    plt.xlabel('')  # remove the abscissa
    # Add legend
    class0_patch = mpatches.Patch(color=colors[0], label='Correct diagnosis')
    class1_patch = mpatches.Patch(color=colors[1], label='Misdiagnosis')
    plt.legend(handles=[class0_patch, class1_patch])
    plt.savefig(f'./new_figure/fig3_a_violin_plot_{col}.png', dpi=1200)
    plt.show()

In [None]:
df_res.to_excel('./code/data/hybrid_machine_learning/train_with.xlsx', index=False)