In [None]:
from Scripts.essentials import *

from sklearn.metrics import balanced_accuracy_score

p = "Data/"
train_x = np.load(p + "train_x_MANUAL.npy")
test_x = np.load(p + "test_x_MANUAL.npy")
val_x = np.load(p + "val_x_MANUAL.npy")

train_y = np.load(p + "train_y_46.npy")
test_y = np.load(p + "test_y_46.npy")
val_y = np.load(p + "val_y_46.npy")

train_lgm = np.load(p + "train_lgm.npy")
test_lgm = np.load(p + "test_lgm.npy")
val_lgm = np.load(p + "val_lgm.npy")

# Binary encoding from lgm to mutant vs. wildtype
train_lgm = np.argmax(train_lgm, axis = 1)
test_lgm = np.argmax(test_lgm, axis = 1)
val_lgm = np.argmax(val_lgm, axis = 1)

train_lgm = np.where(train_lgm > 2, 0, 1)
test_lgm = np.where(test_lgm > 2, 0, 1)
val_lgm = np.where(val_lgm > 2, 0, 1)

eye = np.eye(2)

train_lgm = eye[train_lgm]
val_lgm = eye[val_lgm]
test_lgm = eye[test_lgm]

# Class weights for the sample ids
counts = np.bincount(np.argmax(train_y, axis = 1))
class_weights = np.sqrt((1/(counts/np.max(counts))))

cw_id = {}

for i in range(len(class_weights)):
    cw_id[i] = class_weights[i]
    print(i,":", cw_id[i], "(", counts[i], " spectra in training set)")

# Class weights for the lgm classes
counts = np.bincount(np.argmax(train_lgm, axis = 1))
class_weights = np.sqrt((1/(counts/np.max(counts))))

cw_lgm = {}

for i in range(len(class_weights)):
    cw_lgm[i] = class_weights[i]
    print(i,":", cw_lgm[i], "(", counts[i], " spectra in training set)")


In [None]:
from sklearn.tree import DecisionTreeClassifier
accuracies = []
for i in range(1738):
    d = train_x[:, i]

    if np.sum(d) == 0:
        accuracies.append(0)
        continue # Do not learn 0-features

    # Normalize the features and save the min and max to preprocess val and test data
    min_ = np.min(d)
    max_ = np.max(d)
    d = (d - min_)/(max_ - min_)
    
    model = DecisionTreeClassifier(random_state=0, class_weight=cw_id)
    model.fit(d.reshape((-1, 1)), np.argmax(train_y, axis = 1))

    # Evaluate on the test data
    preds = model.predict(((test_x[:, i] - min_) / (max_ - min_)).reshape((-1, 1)))
    

    acc = balanced_accuracy_score(np.argmax(test_y, axis = 1), preds)
    print(i, ":", acc)
    accuracies.append(acc)
    

In [None]:
plt.plot(accuracies)
plt.show()

In [None]:
# Can we use one feature to predict the lgm calsses?

accuracies_lgm = []
for i in range(1738):
    d = train_x[:, i]

    if np.sum(d) == 0:
        accuracies_lgm.append(0)
        continue # Do not learn 0-features

    # Normalize the features and save the min and max to preprocess val and test data
    min_ = np.min(d)
    max_ = np.max(d)
    d = (d - min_)/(max_ - min_)

    model = DecisionTreeClassifier(random_state=0, class_weight=cw_lgm)
    model.fit(d.reshape((-1, 1)), np.argmax(train_lgm, axis = 1))

    # Evaluate on the test data
    preds = model.predict(((test_x[:, i] - min_) / (max_ - min_)).reshape((-1, 1)))

    acc = balanced_accuracy_score(np.argmax(test_lgm, axis = 1), preds)
    print(i, ":", acc)
    accuracies_lgm.append(acc)

In [None]:
plt.plot(accuracies_lgm)
plt.show()

In [None]:
np.save("Results/(MANUAL)SingleFeatureMetrics/single_id.npy", accuracies)
np.save("Results/(MANUAL)SingleFeatureMetrics/single_lgm.npy", accuracies_lgm)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
p = "Data/"
train_x = np.load(p + "train_x_MANUAL.npy")
accuracies_id = np.load("Results/(MANUAL)SingleFeatureMetrics/single_id.npy")
accuracies_lgm = np.load("Results/(MANUAL)SingleFeatureMetrics/single_lgm.npy")

In [None]:
plt.rcParams.update({'font.size': 30})
plt.rcParams["font.family"] = "Times New Roman"

plt.figure(figsize = (15, 5))
train_x = np.load(p + "train_x_MANUAL.npy")
accuracies_id = np.load("Results/(MANUAL)SingleFeatureMetrics/single_id.npy")
accuracies_lgm = np.load("Results/(MANUAL)SingleFeatureMetrics/single_lgm.npy")
mean = np.mean(train_x, axis = 0)

x = np.arange(len(train_x[0]))
x = x[mean != 0.0]
accuracies_id = accuracies_id[mean != 0.0]
accuracies_lgm = accuracies_lgm[mean != 0.0]
mean = mean[mean != 0.0]

plt.plot(mean, color = "black", linestyle = "--")

plt.fill_between(np.arange(len(x)), np.where(accuracies_id > 1/46 + 0.02, mean, 0), 0, color = "red", alpha = 0.5)
plt.fill_between(np.arange(len(x)), np.where(accuracies_lgm > 1/2 + 0.1, mean, 0), 0, color = "green", alpha = 0.5)

plt.ylim([-0.05, 1.05])
plt.xticks([])
plt.savefig("Images/(MANUAL)OneFeatureLearning.png", format="png", transparent = True,
                    dpi = 300,
                    bbox_inches='tight',
                    pad_inches=0.5)
plt.show()