In [100]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer


In [101]:
# Load Excel dataset
df = pd.read_excel("merged_blood_dataset.xlsx")

X = df.drop(columns=["label"])   # features
y = df["label"]                  # 0 = healthy, 1 = thalassemia


In [102]:
# Handle missing values
imputer = SimpleImputer(strategy="mean")
X = imputer.fit_transform(X)

# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)


In [103]:
# Train / test split 
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    stratify=y,
    random_state=42
)


In [104]:

import mindspore as ms
from mindspore import Tensor

X_train = Tensor(X_train, ms.float32)
X_test  = Tensor(X_test,  ms.float32)

y_train = Tensor(y_train.values, ms.int32)
y_test  = Tensor(y_test.values,  ms.int32)


In [105]:
# LAAYER 2: Define neural network model

In [142]:
#NN model
from mindspore import nn

class ThalassemiaMLP(nn.Cell):
    def __init__(self, input_dim):
        super().__init__()
        self.network = nn.SequentialCell(
            nn.Dense(input_dim, 128),
            nn.ReLU(),
            nn.Dense(128, 64),
            nn.ReLU(),
            nn.Dense(64, 2)   # binary classification
        )

    def construct(self, x):
        return self.network(x)


In [143]:
model = ThalassemiaMLP(input_dim=X_train.shape[1])

In [144]:
#define loss function
loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
optimizer = nn.Adam(model.trainable_params(), learning_rate=1e-3)

net_with_loss = nn.WithLossCell(model, loss_fn)
train_net = nn.TrainOneStepCell(net_with_loss, optimizer)
train_net.set_train()


TrainOneStepCell(
  (network): WithLossCell(
    (_backbone): ThalassemiaMLP(
      (network): SequentialCell(
        (0): Dense(input_channels=19, output_channels=128, has_bias=True)
        (1): ReLU()
        (2): Dense(input_channels=128, output_channels=64, has_bias=True)
        (3): ReLU()
        (4): Dense(input_channels=64, output_channels=2, has_bias=True)
      )
    )
    (_loss_fn): SoftmaxCrossEntropyWithLogits()
  )
  (optimizer): Adam()
  (grad_reducer): Identity()
)

In [121]:
epochs = 200
batch_size = 32

for epoch in range(epochs):
    epoch_loss = 0
    for i in range(0, len(X_train), batch_size):
        xb = X_train[i:i+batch_size]
        yb = y_train[i:i+batch_size]
        loss = train_net(xb, yb)
        epoch_loss += loss.asnumpy().mean()

    if epoch % 5 == 0:
        print(f"Epoch {epoch}, Loss: {epoch_loss:.9f}")


Epoch 0, Loss: 0.000815744
Epoch 5, Loss: 0.000768273
Epoch 10, Loss: 0.000724096
Epoch 15, Loss: 0.000682948
Epoch 20, Loss: 0.000645432
Epoch 25, Loss: 0.000609937
Epoch 30, Loss: 0.000576955
Epoch 35, Loss: 0.000546717
Epoch 40, Loss: 0.000518176
Epoch 45, Loss: 0.000491707
Epoch 50, Loss: 0.000466739
Epoch 55, Loss: 0.000443458
Epoch 60, Loss: 0.000421511
Epoch 65, Loss: 0.000401122
Epoch 70, Loss: 0.000381708
Epoch 75, Loss: 0.000363566
Epoch 80, Loss: 0.000346423
Epoch 85, Loss: 0.000330351
Epoch 90, Loss: 0.000315199
Epoch 95, Loss: 0.000300841
Epoch 100, Loss: 0.000287197
Epoch 105, Loss: 0.000274570
Epoch 110, Loss: 0.000262501
Epoch 115, Loss: 0.000251027
Epoch 120, Loss: 0.000240131
Epoch 125, Loss: 0.000229815
Epoch 130, Loss: 0.000220063
Epoch 135, Loss: 0.000210790
Epoch 140, Loss: 0.000201967
Epoch 145, Loss: 0.000193537
Epoch 150, Loss: 0.000185554
Epoch 155, Loss: 0.000177928
Epoch 160, Loss: 0.000170667
Epoch 165, Loss: 0.000163810
Epoch 170, Loss: 0.000157202
Epoch 1

In [151]:
batch_size = 32
epochs = 150
train_losses = []
test_losses = []

loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=True)

for epoch in range(epochs):
    epoch_loss = 0.0
    num_batches = 0

    for i in range(0, len(X_train), batch_size):
        xb = X_train[i:i+batch_size]
        yb = y_train[i:i+batch_size]

        loss = train_net(xb, yb)
        epoch_loss += loss.asnumpy().mean()
        num_batches += 1

    train_losses.append(epoch_loss / num_batches)

    # test loss
    model.set_train(False)
    logits = model(X_test)
    test_loss = loss_fn(logits, y_test).asnumpy().mean()
    test_losses.append(test_loss)
    model.set_train()

    if epoch % 5 == 0:
        print(f"Epoch {epoch}, Train Loss: {train_losses[-1]:.7f}, Test Loss: {test_losses[-1]:.10f}")


Epoch 0, Train Loss: 0.0000174, Test Loss: 0.2913218141
Epoch 5, Train Loss: 0.0000165, Test Loss: 0.2922946215
Epoch 10, Train Loss: 0.0000158, Test Loss: 0.2933674455
Epoch 15, Train Loss: 0.0000150, Test Loss: 0.2943835855
Epoch 20, Train Loss: 0.0000143, Test Loss: 0.2953539491
Epoch 25, Train Loss: 0.0000137, Test Loss: 0.2963970602
Epoch 30, Train Loss: 0.0000130, Test Loss: 0.2974238992
Epoch 35, Train Loss: 0.0000125, Test Loss: 0.2983917594
Epoch 40, Train Loss: 0.0000119, Test Loss: 0.2993939519
Epoch 45, Train Loss: 0.0000114, Test Loss: 0.3003194034
Epoch 50, Train Loss: 0.0000109, Test Loss: 0.3012902439
Epoch 55, Train Loss: 0.0000104, Test Loss: 0.3022607565
Epoch 60, Train Loss: 0.0000099, Test Loss: 0.3031731844
Epoch 65, Train Loss: 0.0000095, Test Loss: 0.3040858805
Epoch 70, Train Loss: 0.0000091, Test Loss: 0.3049520850
Epoch 75, Train Loss: 0.0000087, Test Loss: 0.3058664203
Epoch 80, Train Loss: 0.0000084, Test Loss: 0.3067392707
Epoch 85, Train Loss: 0.0000080, 

In [111]:
#layer 3

In [112]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [152]:
model.set_train(False)

logits = model(X_test)
preds = logits.argmax(axis=1).asnumpy()
y_true = y_test.asnumpy()

print("Accuracy:", accuracy_score(y_true, preds))
print("Confusion Matrix:\n", confusion_matrix(y_true, preds))
print("\nClassification Report:\n",
      classification_report(y_true, preds, target_names=["Healthy", "Thalassemia"]))


Accuracy: 0.9642857142857143
Confusion Matrix:
 [[36  2]
 [ 1 45]]

Classification Report:
               precision    recall  f1-score   support

     Healthy       0.97      0.95      0.96        38
 Thalassemia       0.96      0.98      0.97        46

    accuracy                           0.96        84
   macro avg       0.97      0.96      0.96        84
weighted avg       0.96      0.96      0.96        84



In [153]:
# find index of a healthy sample in test set
healthy_idx = (y_test.asnumpy() == 1).nonzero()[0][1]

# convert to python int for indexing MindSpore tensors
idx = int(healthy_idx)

# inference on that healthy sample
sample = X_test[idx:idx + 1]
logit = model(sample)

pred_class = logit.argmax(axis=1).asnumpy()[0]
true_class = int(y_test[idx].asnumpy())

print("Predicted:", "Thalassemia" if pred_class == 1 else "Healthy")
print("True:", "Thalassemia" if true_class == 1 else "Healthy")


Predicted: Thalassemia
True: Thalassemia


In [154]:
# find index of a healthy sample in test set
healthy_idx = (y_test.asnumpy() == 0).nonzero()[0][1]

# convert to python int for indexing MindSpore tensors
idx = int(healthy_idx)

# inference on that healthy sample
sample = X_test[idx:idx + 1]
logit = model(sample)

pred_class = logit.argmax(axis=1).asnumpy()[0]
true_class = int(y_test[idx].asnumpy())

print("Predicted:", "Thalassemia" if pred_class == 1 else "Healthy")
print("True:", "Thalassemia" if true_class == 1 else "Healthy")


Predicted: Healthy
True: Healthy
