In [2]:
import numpy as np
import pickle as pkl
import torch
from sklearn.model_selection import train_test_split
import torch.nn as nn   
import time
import torch.optim as optim
import torch
from torch.utils.data import TensorDataset, DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

def load_data(path):
    with open(path, 'rb') as f:
        data = pkl.load(f)
    return data

# Load the data
Interaction_matrices = load_data('interaction_matrices_10binned.pkl')
spectral_data = load_data('spectra_dataset_10binned.pkl')

print(Interaction_matrices.shape)
print(spectral_data.shape)

cpu
torch.Size([10000, 6, 6])
torch.Size([10000, 2000])


In [3]:
import numpy as np
import torch

# Flatten each matrix separately then store in an array
flattened_matrices = [matrix.flatten() for matrix in Interaction_matrices]

# Stack the flattened matrices on top to give shape N x 36
flattened_matrix = torch.stack(flattened_matrices)
print(flattened_matrix.shape)

print(flattened_matrix[0:10]) ## fine. 

torch.Size([10000, 36])
tensor([[0., 4., 2., 0., 0., 0., 2., 5., 0., 0., 0., 3., 2., 2., 0., 1., 6., 0.,
         4., 1., 4., 0., 6., 0., 0., 1., 1., 0., 0., 0., 5., 5., 0., 0., 0., 3.],
        [0., 0., 0., 0., 6., 2., 4., 6., 4., 3., 0., 1., 0., 3., 2., 5., 6., 3.,
         6., 1., 1., 0., 0., 0., 0., 0., 4., 6., 3., 0., 0., 0., 2., 6., 2., 3.],
        [0., 0., 5., 5., 0., 0., 0., 0., 6., 6., 3., 2., 4., 0., 0., 4., 6., 0.,
         6., 1., 0., 0., 1., 4., 0., 0., 2., 0., 0., 6., 2., 0., 2., 0., 1., 6.],
        [0., 0., 0., 1., 6., 0., 0., 0., 3., 0., 0., 0., 2., 0., 3., 0., 0., 3.,
         3., 0., 3., 0., 5., 6., 2., 3., 0., 0., 0., 4., 0., 0., 1., 0., 5., 0.],
        [0., 3., 0., 5., 0., 3., 5., 0., 0., 2., 3., 0., 2., 6., 2., 4., 0., 0.,
         3., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 2., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 5., 0., 2., 0., 0., 0., 1., 0., 3., 4., 0., 6., 0., 0.,
         2., 3., 0., 5., 0., 0., 5., 0., 0., 0., 0., 1., 0., 3., 0., 0., 6., 0.]

In [4]:
import torch 
import numpy

threshold = 0.5

binary_flat_matrices = (flattened_matrix >= threshold).float()


# making them flat instead of including 
# abundance as that is not important for now and requires more

for matrix in binary_flat_matrices:
  for i in range (len(matrix)):
    value = matrix[i]



In [5]:
print(flattened_matrix[1])
print(binary_flat_matrices[1])

tensor([0., 0., 0., 0., 6., 2., 4., 6., 4., 3., 0., 1., 0., 3., 2., 5., 6., 3.,
        6., 1., 1., 0., 0., 0., 0., 0., 4., 6., 3., 0., 0., 0., 2., 6., 2., 3.])
tensor([0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 0., 1., 0., 1., 1., 1., 1., 1.,
        1., 1., 1., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 1., 1., 1., 1.])


In [6]:
import pandas as pd
import torch
import numpy as np


# need to turn tensors to pandas df then append my flattened matrices to the end.
matrix_columns = [f'PA{i // 6}PB{i % 6}' for i in range(len(flattened_matrices[0]))]

bnry_int_mat_df = pd.DataFrame(binary_flat_matrices, columns = matrix_columns)
print(bnry_int_mat_df.shape)
print(bnry_int_mat_df)
spec_df = pd.DataFrame(spectral_data)
print(spec_df.shape)


(10000, 36)
      PA0PB0  PA0PB1  PA0PB2  PA0PB3  PA0PB4  PA0PB5  PA1PB0  PA1PB1  PA1PB2  \
0        0.0     1.0     1.0     0.0     0.0     0.0     1.0     1.0     0.0   
1        0.0     0.0     0.0     0.0     1.0     1.0     1.0     1.0     1.0   
2        0.0     0.0     1.0     1.0     0.0     0.0     0.0     0.0     1.0   
3        0.0     0.0     0.0     1.0     1.0     0.0     0.0     0.0     1.0   
4        0.0     1.0     0.0     1.0     0.0     1.0     1.0     0.0     0.0   
...      ...     ...     ...     ...     ...     ...     ...     ...     ...   
9995     0.0     1.0     0.0     1.0     0.0     1.0     0.0     0.0     0.0   
9996     0.0     0.0     1.0     0.0     1.0     1.0     1.0     1.0     1.0   
9997     0.0     1.0     0.0     0.0     1.0     0.0     1.0     0.0     0.0   
9998     0.0     1.0     1.0     1.0     1.0     1.0     0.0     1.0     0.0   
9999     0.0     0.0     0.0     1.0     0.0     1.0     1.0     1.0     0.0   

      PA1PB3  ...  PA4PB2  

In [7]:
# next  concat the two together 
import pandas as pd



concat_df = pd.concat([spec_df, bnry_int_mat_df], axis =1)

In [8]:
print(concat_df.shape)
print(concat_df[9000:9010])

(10000, 2036)
        0    1    2    3    4    5    6    7    8    9  ...  PA4PB2  PA4PB3  \
9000  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...     0.0     1.0   
9001  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...     0.0     1.0   
9002  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...     1.0     0.0   
9003  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...     0.0     0.0   
9004  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...     0.0     1.0   
9005  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...     1.0     0.0   
9006  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...     0.0     1.0   
9007  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...     1.0     0.0   
9008  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...     0.0     1.0   
9009  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...     1.0     1.0   

      PA4PB4  PA4PB5  PA5PB0  PA5PB1  PA5PB2  PA5PB3  PA5PB4  PA5PB5  
9000     0.0     1.0     0.0     1.0     1.0 

In [9]:
# concat_df.to_csv("binned_by_10_labelled_ANN.csv") # save the data to a csv file.  but its too big lol

In [10]:
# import libraries
import torch
import torch.nn as nn



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


cpu


  display.set_matplotlib_formats('svg')


In [11]:
# pre process data some more
  
X_spec = concat_df.iloc[:, :2000].values # spectra data
Y_matr = concat_df.iloc[:, 2000:].values # matrices 

In [12]:
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split

# splitting into train test val split 80, 20 
X_train, X_test, y_train, y_test = train_test_split(X_spec, Y_matr, test_size=0.2, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.2, random_state=42)


In [13]:
import torch 
from torch.utils.data import DataLoader, TensorDataset

X_train = torch.Tensor(X_train).to(device)
X_test = torch.Tensor(X_test).to(device)
X_val = torch.Tensor(X_val).to(device)
y_val= torch.Tensor(y_val).to(device)
y_train = torch.Tensor(y_train).to(device)
y_test = torch.Tensor(y_test).to(device)


batch_size = 256 ## 512 is a good number for now. ?? 256 also worked well.
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [14]:
print(X_train.shape) # confirmed shape 8000,2000

torch.Size([8000, 2000])


In [60]:
class SpectralNet(nn.Module):
    def __init__(self):
        super(SpectralNet, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(2000, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),  # Added layer
            nn.ReLU(),  # Added activation function
            nn.Linear(128, 36),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.model(x)

In [61]:
from torch.optim.lr_scheduler import ReduceLROnPlateau

model = SpectralNet().to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
#scheduler = ReduceLROnPlateau(optimizer, 'min', patience=10, factor=0.5, min_lr=1e-6)

In [62]:
print(model)

SpectralNet(
  (model): Sequential(
    (0): Linear(in_features=2000, out_features=1024, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1024, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=256, bias=True)
    (5): ReLU()
    (6): Linear(in_features=256, out_features=128, bias=True)
    (7): ReLU()
    (8): Linear(in_features=128, out_features=36, bias=True)
    (9): Sigmoid()
  )
)


In [63]:
# Training loop
epoch_losses = []
epoch_accuracies = []


In [65]:
num_epochs = 3000


for epoch in range(num_epochs):
    epoch_loss = 0
    epoch_accuracy = 0

    for batch_X, batch_y in train_loader:
        # Forward pass
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        epoch_loss += loss.item()

        # Backpropagation and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Compute accuracy
        predictions = outputs.detach() >= 0.5
        accuracy = torch.mean(torch.all(predictions == batch_y, dim=1).float())
        epoch_accuracy += accuracy.item()

    epoch_loss /= len(train_loader)
    epoch_losses.append(epoch_loss)
    epoch_accuracy /= len(train_loader)
    epoch_accuracies.append(epoch_accuracy)


    # Print the loss and accuracy every 100 epochs
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")
     
    if (epoch + 1) % 100 == 0:
        torch.save(model.state_dict(), "spectral_net_model.pth") ## save the model every 100 epochs
        print("Saved model to:", "spectral_net_model.pth")   

Epoch [10/3000], Loss: 0.6134, Accuracy: 0.00%
Epoch [20/3000], Loss: 0.6049, Accuracy: 0.00%
Epoch [30/3000], Loss: 0.5979, Accuracy: 0.00%
Epoch [40/3000], Loss: 0.5900, Accuracy: 0.00%
Epoch [50/3000], Loss: 0.5834, Accuracy: 0.00%
Epoch [60/3000], Loss: 0.5764, Accuracy: 0.00%
Epoch [70/3000], Loss: 0.5692, Accuracy: 0.00%
Epoch [80/3000], Loss: 0.5629, Accuracy: 0.00%
Epoch [90/3000], Loss: 0.5565, Accuracy: 0.00%
Epoch [100/3000], Loss: 0.5492, Accuracy: 0.00%
Saved model to: spectral_net_model.pth
Epoch [110/3000], Loss: 0.5434, Accuracy: 0.00%
Epoch [120/3000], Loss: 0.5368, Accuracy: 0.00%
Epoch [130/3000], Loss: 0.5311, Accuracy: 0.00%
Epoch [140/3000], Loss: 0.5240, Accuracy: 0.00%
Epoch [150/3000], Loss: 0.5179, Accuracy: 0.00%
Epoch [160/3000], Loss: 0.5124, Accuracy: 0.00%
Epoch [170/3000], Loss: 0.5058, Accuracy: 0.00%
Epoch [180/3000], Loss: 0.4998, Accuracy: 0.00%
Epoch [190/3000], Loss: 0.4955, Accuracy: 0.00%
Epoch [200/3000], Loss: 0.4882, Accuracy: 0.00%
Saved mode

In [None]:
# allow it to train and then check the outputs of the model 
# increase the complexity of the model with 2 layers and do not change hyperparameters and then run!

In [None]:
print()

In [66]:
import torch
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Assuming you have the following variables:
# - model: Your trained PyTorch model
# - X_val: Validation or test input data
# - y_test: True labels for the validation or test set

# Evaluate the model on the test set
with torch.no_grad():
    outputs = model(X_val)
    predictions = outputs.cpu().numpy() >= 0.5
    y_true = y_val.cpu().numpy()

# Calculate accuracy
accuracy = accuracy_score(y_true, predictions)
print(f"Accuracy: {accuracy:.4f}") ## cant be used for multi label classification 

# Calculate precision for each class
precision = precision_score(y_true, predictions, average=None)
print(f"Precision for each class: {precision}")

# Calculate recall for each class
recall = recall_score(y_true, predictions, average=None)
print(f"Recall for each class: {recall}")

# Calculate F1-score for each class
f1 = f1_score(y_true, predictions, average=None)
print(f"F1-score for each class: {f1}")

# Calculate micro-averaged precision, recall, and F1-score
micro_precision = precision_score(y_true, predictions, average='micro')
micro_recall = recall_score(y_true, predictions, average='micro')
micro_f1 = f1_score(y_true, predictions, average='micro')
print(f"Micro-averaged Precision: {micro_precision:.4f}")
print(f"Micro-averaged Recall: {micro_recall:.4f}")
print(f"Micro-averaged F1-score: {micro_f1:.4f}")

# Calculate macro-averaged precision, recall, and F1-score
macro_precision = precision_score(y_true, predictions, average='macro')
macro_recall = recall_score(y_true, predictions, average='macro')
macro_f1 = f1_score(y_true, predictions, average='macro')
print(f"Macro-averaged Precision: {macro_precision:.4f}")
print(f"Macro-averaged Recall: {macro_recall:.4f}")
print(f"Macro-averaged F1-score: {macro_f1:.4f}")


## for each specta make f1 score then vary the threshold and see how it changes then average.
## miro aupr and macro aupr trapezoidal rule for each column (class) precision recall curve from all the data
## average and then plot the aupr curve for each class.
## find the blogs 
## kflod cross validation has to be done for each model and then the average of the results has to be taken.

Accuracy: 0.0000
Precision for each class: [0.         0.50243902 0.5        0.45673077 0.56521739 0.52840909
 0.45283019 0.49537037 0.49769585 0.52763819 0.51219512 0.51851852
 0.48858447 0.5212766  0.44239631 0.50925926 0.51295337 0.5255102
 0.48958333 0.44230769 0.47715736 0.53809524 0.49468085 0.54589372
 0.52791878 0.48058252 0.53333333 0.43718593 0.48039216 0.50273224
 0.47058824 0.5        0.52197802 0.49509804 0.46268657 0.50485437]
Recall for each class: [0.         0.5255102  0.44607843 0.4973822  0.52702703 0.45812808
 0.5106383  0.53768844 0.5625     0.49065421 0.5        0.49246231
 0.54871795 0.49246231 0.47761194 0.56122449 0.48529412 0.5255102
 0.47715736 0.54761905 0.47474747 0.53554502 0.5        0.56218905
 0.51485149 0.52659574 0.46846847 0.47540984 0.49       0.43809524
 0.47761194 0.50505051 0.47979798 0.52061856 0.48691099 0.51231527]
F1-score for each class: [0.         0.51371571 0.47150259 0.47619048 0.54545455 0.49076517
 0.48       0.51566265 0.52811736 0.50

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
