In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
tex_fonts = {
    # Use LaTeX to write all text
    #"text.usetex": True,
    "font.family": "sans-serif",
    # Use 10pt font in plots, to match 10pt font in document
    "axes.labelsize": 8,
    "font.size": 10,
    # Make the legend/label fonts a little smaller
    "legend.fontsize": 8,
    "xtick.labelsize": 8,
    "ytick.labelsize": 8
}

# Classes

In [None]:
df=pd.read_csv('tables/LabelSplitInfoAll.csv',index_col=0)

In [None]:
classes=5
class_labeles=['positive','planktonic','clumped','rods','filaments']

In [None]:

class_prob=np.zeros((2,classes))
class_prob[0,:]=df.labels_train.values/df.N_train.values
class_prob[1,:]=df.labels_val.values/df.N_val.values

pos_label_prob=np.zeros((2,classes))
pos_label_prob[0,:]=df.labels_train.values/np.sum(df.labels_train.values)
pos_label_prob[1,:]=df.labels_val.values/np.sum(df.labels_val.values)

In [None]:
class_prob,pos_label_prob

In [None]:
x = np.arange(classes)  # the label locations
width = 0.25  # the width of the bars
multiplier = 0
colors=['gray','magenta']
fig=plt.figure(figsize=(2, 2), dpi=200)
ax=plt.gca()
for c  in range(2):
    offset = width * multiplier
    rects = ax.bar(x + offset, class_prob[c,:], width,color=colors[c])
    multiplier += 1

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('')
#ax.set_xticks(x + width, species)


plt.xticks(x+width/2)
ax.set_xticklabels('' for tick in x)
yticks=np.arange(0,1.4,0.1)
plt.yticks(yticks)
ax.set_yticklabels([0]+[str(np.round(tick,1)) if i%2 else '' for i,tick in enumerate(yticks[1:])])

ax.set_ylim(0, 1)


plt.show()

In [None]:
fig=plt.figure(figsize=(2, 2), dpi=200)
#plt.pie(df.labels_val.values, labels=class_labeles, autopct='%1.1f%%')
plt.pie(df.labels_val.values) # without labels

In [None]:
fig=plt.figure(figsize=(2, 2), dpi=200)
#plt.pie(df.labels_train.values, labels=class_labeles, autopct='%1.1f%%')
plt.pie(df.labels_train.values) #without labels

# Performance Metric

In [None]:
df=pd.read_csv('tables/Scores_All.csv',index_col=0)
performance=df.to_numpy()

In [None]:
x = np.arange(classes)  # the label locations
width = 0.25  # the width of the bars
multiplier = 0
colors=sns.color_palette()
fig=plt.figure(figsize=(2, 2), dpi=200)
ax=plt.gca()

for c  in range(3):
    offset = width * multiplier
    rects = ax.bar(x + offset, performance[:,c], width,color=colors[c])
    multiplier += 1

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('')


plt.xticks(x+width/2)
ax.set_xticklabels('' for tick in x)
yticks=np.arange(0,1.4,0.1)
plt.yticks(yticks)
ax.set_yticklabels([0]+[str(np.round(tick,1)) if i%2 else '' for i,tick in enumerate(yticks[1:])])
ax.set_ylim(0, 1)

plt.show()

# Logits

In [None]:
df=pd.read_csv('tables/ValidationPredictions_All.csv',index_col=0)

In [None]:
def plot_logit_cumprob(data,xlim,ylim,lw_big,lw_small,jitter=0.1,ax=None):
    # data: numpy array columns logit,y class
    df=pd.DataFrame({'logit':data[:,0],'class':data[:,1],'y': data[:,1]})
    if(ax is None):
        ax=plt.gca()
    sns.kdeplot(data=df,x='logit',hue='class',ax=ax,lw=lw_big,common_norm=False,cumulative=True)
    sns.stripplot(data=df,x='logit',y='y',hue='class',native_scale=True, orient="h",ax=ax,jitter=jitter,zorder=0)
    ax.legend([],[], frameon=False)
    
    ax.plot([-15,15],[0,0],'gray',lw=lw_small)
    ax.plot([-15,15],[1,1],'gray',lw=lw_small)
    ax.plot([0,0],[-2,2],'gray',lw=lw_small,linestyle='--')
    ax.plot(np.arange(-15,15,0.1),1/(1+ np.exp(-np.arange(-15,15,0.1))),color='gray',lw=lw_big)
    # axis settings
    plt.setp(ax.spines.values(), linewidth=1)
    ax.tick_params(axis='both', which='major', labelsize=8)
    #ticks
    xticks=np.arange(-10,11,2)
    yticks=np.arange(0,2,0.1)
    ax.set_xticks(xticks)
    ax.set_yticks(yticks)
    ax.set_yticklabels('' for tick in yticks)
    #now remove labeles
    ax.set_xticklabels(['' for tick in xticks])
    yticks=np.arange(0, 1.2, 0.1)

   
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)


In [None]:
classes=5
lw_big=2
lw_small=1
jit=0.1
xlim=[-10.5,10.5]

ylim=[-0.15,1.15]

fig, axs = plt.subplots(classes,1, figsize=[4, 10],dpi=200 )
plot_logit_cumprob(df[['pred_logit_0','positive']].to_numpy(),xlim,ylim,lw_big,lw_small,jit,axs[0])
plot_logit_cumprob(df[['pred_logit_1','rods']].to_numpy(),xlim,ylim,lw_big,lw_small,jit,axs[1])
plot_logit_cumprob(df[['pred_logit_2','planktonic']].to_numpy(),xlim,ylim,lw_big,lw_small,jit,axs[2])
plot_logit_cumprob(df[['pred_logit_3','filaments']].to_numpy(),xlim,ylim,lw_big,lw_small,jit,axs[3])
plot_logit_cumprob(df[['pred_logit_4','clumped']].to_numpy(),xlim,ylim,lw_big,lw_small,jit,axs[4])

plt.rcParams.update(tex_fonts)

fig.tight_layout()