<a href="https://colab.research.google.com/github/Ignas12345/Magistro_projektas/blob/main/bandymas_klasifikuoti.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score


In [2]:
# uzkraunam neapdorotus duomenis
url = 'https://raw.githubusercontent.com/Ignas12345/Magistro_projektas/refs/heads/main/counts_combined.csv'
df = pd.read_csv(url)
gene_names = df["miRNA_ID"].to_numpy()
df_trimmed = df.drop(df.columns[0], axis=1)
full_data = (df_trimmed.to_numpy(dtype=np.int32)).T

In [105]:
#arba uzkraunam isfiltruotus duomenis is praeitos knygutes
url = 'https://raw.githubusercontent.com/Ignas12345/Magistro_projektas/refs/heads/main/filtruoti_duomenys/filtruoti_1_su_310_miRNR/'
filtered_data = (pd.read_csv(url + 'further_filtered_data.csv').values).T
gene_names = pd.read_csv(url + 'further_filtered_gene_names.csv', header = None).values.flatten()
print(np.shape(filtered_data))
print(np.shape(gene_names))

(139, 310)
(310,)


In [85]:
X = full_data

In [18]:
# Create labels: first 6 samples -> label 0, rest -> label 1
num_samples = X.shape[0]
labels = torch.zeros(num_samples, dtype=torch.float32)
labels[6:] = 1  # Set the rest of the samples to label 1

# Convert X to a PyTorch tensor
features = torch.tensor(X, dtype=torch.float32)

#netikri papildomi treniravimo duomenys
benign_features = features[:6]  # First 6 samples are benign
benign_labels = labels[:6]

# Duplicate benign samples with slight noise
augmented_features = benign_features + 0.01 * torch.randn_like(benign_features)
features = torch.cat([features, augmented_features])
labels = torch.cat([labels, benign_labels])

mean = features.mean(dim=0)
std = features.std(dim=0)
features = (features - mean) / (std + 1e-8)

# Combine features and labels into a dataset
dataset = TensorDataset(features, labels)

In [19]:
# Create a DataLoader for batching
batch_size = 1  # You can adjust this as needed
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Example: Print the shape of a batch
for batch_features, batch_labels in data_loader:
    print(f"Batch features shape: {batch_features.shape}")
    print(f"Batch labels shape: {batch_labels.shape}")
    break

Batch features shape: torch.Size([1, 1881])
Batch labels shape: torch.Size([1])


In [None]:
for batch_features, batch_labels in data_loader:
    print(f"Batch features shape: {batch_features}")
    print(f"Batch labels shape: {batch_labels}")
    break

Batch features shape: tensor([[ 47204.,  46983.,  47453.,  ...,    408.,   2981., 137966.]])
Batch labels shape: tensor([1.])


In [23]:
class SparseNN(nn.Module):
    def __init__(self, input_dim, temperature = 1):
        super(SparseNN, self).__init__()
        self.input_dim = input_dim
        self.temperature = temperature

        self.first_layer_weights = nn.Parameter(torch.randn(input_dim))  # 1D tensor for scaling
        self.first_layer_biases = nn.Parameter(torch.randn(input_dim))  # 1D tensor for bias


        # Second layer: Sparse weighting (learnable parameters)
        self.output_weights_raw = nn.Parameter(torch.randn(input_dim))  # Raw weights

    def forward(self, x):
        # First layer: Linear transformation
        first_layer_output = x * self.first_layer_weights + self.first_layer_biases

        # Second layer: Apply sparse weights
        output_weights = torch.relu(self.output_weights_raw)  # Enforce non-negativity
        weighted_output = first_layer_output * output_weights  # Element-wise multiplication
  # Element-wise multiplication

        # Pooling: Sum the weighted outputs
        pooled_output = weighted_output.sum(dim=1)  # Summing across features

        # Final activation: sigmoid
        output = torch.sigmoid(pooled_output / self.temperature)

        return output

def l1_regularization(weight, lambda_l1=0.001):
    return lambda_l1 * weight.abs().sum()



In [37]:
#initialize the model
input_dim = full_data.shape[1]
model = SparseNN(input_dim, temperature = 5)

# Define a loss function and optimizer
criterion = nn.BCELoss()  # Binary Cross Entropy Loss for binary classification
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # L1 sparsity via weight_decay

In [38]:
# Training Loop
num_epochs = 20  # Adjust based on dataset size and convergence
lambda_l1 = 0.001  # Regularization strength

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0

    for batch_features, batch_labels in data_loader:
        # Forward pass
        outputs = model(batch_features)  # Ensure shape compatibility for BCELoss
        # Compute binary cross-entropy loss
        loss = criterion(outputs, batch_labels)

        # Add L1 regularization for sparsity
        loss += l1_regularization(model.output_weights_raw, lambda_l1)

        #make probabilities less extreme
        penalty = (outputs * torch.log(outputs + 1e-8) + (1 - outputs) * torch.log(1 - outputs + 1e-8)).mean()
        loss += 0.1 * penalty

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    # Logging epoch statistics
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {total_loss / len(data_loader):.4f}")

# Evaluation (after training)
model.eval()
with torch.no_grad():
    # Forward pass on the entire dataset
    predictions = model(features)
    predictions = (predictions > 0.5).float()  # Convert probabilities to binary predictions

    # Compute accuracy
    accuracy = accuracy_score(labels.numpy(), predictions.numpy())
    print(f"Final Accuracy: {accuracy:.4f}")

Epoch [1/20], Loss: 3.0513
Epoch [2/20], Loss: 3.8932
Epoch [3/20], Loss: 3.7277
Epoch [4/20], Loss: 3.5556
Epoch [5/20], Loss: 2.2663
Epoch [6/20], Loss: 2.1118
Epoch [7/20], Loss: 1.3898
Epoch [8/20], Loss: 1.2337
Epoch [9/20], Loss: 1.1286
Epoch [10/20], Loss: 1.0202
Epoch [11/20], Loss: 0.9719
Epoch [12/20], Loss: 0.9290
Epoch [13/20], Loss: 0.9069
Epoch [14/20], Loss: 0.8715
Epoch [15/20], Loss: 0.8378
Epoch [16/20], Loss: 0.8243
Epoch [17/20], Loss: 0.8065
Epoch [18/20], Loss: 0.7872
Epoch [19/20], Loss: 0.7841
Epoch [20/20], Loss: 0.7707
Final Accuracy: 0.9931


In [62]:

probs = 100 * model(features).detach().numpy()
formatted_probs = np.array([f"{p:.2f}%" for p in probs])
print(formatted_probs)


['0.28%' '2.63%' '98.76%' '13.91%' '6.85%' '2.21%' '100.00%' '99.84%'
 '99.97%' '99.61%' '99.25%' '99.98%' '100.00%' '100.00%' '99.84%' '96.65%'
 '99.59%' '99.98%' '100.00%' '99.96%' '99.95%' '100.00%' '99.56%' '99.91%'
 '99.99%' '100.00%' '99.87%' '98.75%' '99.90%' '91.62%' '99.97%' '99.85%'
 '99.57%' '100.00%' '93.07%' '99.83%' '99.97%' '100.00%' '100.00%'
 '100.00%' '78.69%' '85.09%' '99.97%' '99.86%' '99.94%' '99.91%' '98.92%'
 '96.46%' '100.00%' '99.58%' '100.00%' '98.58%' '99.46%' '99.59%'
 '100.00%' '98.60%' '97.74%' '100.00%' '100.00%' '99.84%' '100.00%'
 '100.00%' '93.83%' '84.41%' '99.85%' '99.96%' '100.00%' '99.79%' '99.79%'
 '99.93%' '99.92%' '92.36%' '100.00%' '99.76%' '99.98%' '99.78%' '99.87%'
 '99.77%' '100.00%' '97.00%' '91.85%' '79.52%' '99.78%' '84.40%' '99.07%'
 '100.00%' '99.98%' '99.56%' '99.98%' '100.00%' '100.00%' '95.92%'
 '99.38%' '92.33%' '99.79%' '100.00%' '99.97%' '99.04%' '100.00%'
 '100.00%' '99.99%' '99.54%' '99.99%' '100.00%' '99.99%' '98.96%' '99.99%'


In [80]:
#jeigu procentai sutampa su tikrove, issaugom svorius (ir jeigu jie naudingi...)

torch.save(model.first_layer_weights.data, "first_layer_weights.pt")

# Save the first layer's biases (if present)
torch.save(model.first_layer_biases.data, "first_layer_biases.pt")

# Save the second layer (sparse) weights
torch.save(model.output_weights_raw.data, "output_weights_raw.pt")

In [69]:
# Svoriu analize is idejos turetu parodyti, kurie genai yra svarbiausi priimant sprendima
print("First Layer Weights:")
print(model.first_layer_weights.data)

# Print the biases of the first layer (if present)
print("First Layer Biases:")
print(model.first_layer_biases.data)

# Print the weights of the second layer (output weights)
print("Second Layer (Sparse) Weights:")
sparse_weights = torch.relu(model.output_weights_raw.data)
print(sparse_weights)

First Layer Weights:
tensor([-0.0822, -1.0442,  0.2279,  ...,  0.5639,  1.3265,  0.9782])
First Layer Biases:
tensor([-1.3368,  0.3855,  0.4678,  ...,  0.3809, -1.2594, -1.2489])
Second Layer (Sparse) Weights:
tensor([0.0000, 0.6077, 2.0900,  ..., 2.4219, 0.0115, 0.5861])


In [79]:
sorted_indices = torch.argsort(sparse_weights, descending=True)
values = sparse_weights[sorted_indices]

print(sorted_indices[:50])
print(values[:50])

tensor([1547, 1051, 1484, 1314,  995,  806,  311,  525, 1710, 1588, 1394,  589,
        1878,  890, 1769,  887, 1033, 1625,  235, 1832,  365, 1065, 1287,   10,
         354,  420,  331,  345,    2,  617,  746, 1411, 1786,  156, 1401, 1831,
         482,  602, 1447, 1108,  509,  581, 1791, 1833, 1192,  691,  315,  249,
        1640,  497])
tensor([3.6505, 3.3982, 3.0856, 2.9888, 2.9321, 2.7831, 2.7825, 2.7048, 2.6162,
        2.4972, 2.4582, 2.4468, 2.4219, 2.4009, 2.3941, 2.3351, 2.3335, 2.3317,
        2.3224, 2.2581, 2.2448, 2.2206, 2.2016, 2.1306, 2.1150, 2.1146, 2.0958,
        2.0937, 2.0900, 2.0758, 2.0651, 2.0588, 2.0584, 2.0507, 1.9739, 1.9727,
        1.9717, 1.9564, 1.9521, 1.9478, 1.9326, 1.9235, 1.9224, 1.9220, 1.9107,
        1.8866, 1.8859, 1.8831, 1.8786, 1.8785])


In [84]:
model = SparseNN(input_dim, temperature = 5)

loaded_first_layer_weights = torch.load("first_layer_weights.pt")
loaded_first_layer_biases = torch.load("first_layer_biases.pt")
loaded_otput_weights_raw = torch.load("output_weights_raw.pt")

# Assign the loaded values to the model
model.first_layer_weights.data = loaded_first_layer_weights
model.first_layer_biases.data = loaded_first_layer_biases
model.output_weights_raw.data = loaded_otput_weights_raw

probs = 100 * model(features).detach().numpy()
formatted_probs = np.array([f"{p:.2f}%" for p in probs])
print(formatted_probs)

['0.28%' '2.63%' '98.76%' '13.91%' '6.85%' '2.21%' '100.00%' '99.84%'
 '99.97%' '99.61%' '99.25%' '99.98%' '100.00%' '100.00%' '99.84%' '96.65%'
 '99.59%' '99.98%' '100.00%' '99.96%' '99.95%' '100.00%' '99.56%' '99.91%'
 '99.99%' '100.00%' '99.87%' '98.75%' '99.90%' '91.62%' '99.97%' '99.85%'
 '99.57%' '100.00%' '93.07%' '99.83%' '99.97%' '100.00%' '100.00%'
 '100.00%' '78.69%' '85.09%' '99.97%' '99.86%' '99.94%' '99.91%' '98.92%'
 '96.46%' '100.00%' '99.58%' '100.00%' '98.58%' '99.46%' '99.59%'
 '100.00%' '98.60%' '97.74%' '100.00%' '100.00%' '99.84%' '100.00%'
 '100.00%' '93.83%' '84.41%' '99.85%' '99.96%' '100.00%' '99.79%' '99.79%'
 '99.93%' '99.92%' '92.36%' '100.00%' '99.76%' '99.98%' '99.78%' '99.87%'
 '99.77%' '100.00%' '97.00%' '91.85%' '79.52%' '99.78%' '84.40%' '99.07%'
 '100.00%' '99.98%' '99.56%' '99.98%' '100.00%' '100.00%' '95.92%'
 '99.38%' '92.33%' '99.79%' '100.00%' '99.97%' '99.04%' '100.00%'
 '100.00%' '99.99%' '99.54%' '99.99%' '100.00%' '99.99%' '98.96%' '99.99%'


  loaded_first_layer_weights = torch.load("first_layer_weights.pt")
  loaded_first_layer_biases = torch.load("first_layer_biases.pt")
  loaded_otput_weights_raw = torch.load("output_weights_raw.pt")
