In [3]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer


In [5]:
# Load Excel dataset
df = pd.read_excel("merged_blood_dataset.xlsx")

# ⚠️ Update column name if needed
X = df.drop(columns=["label"])   # features
y = df["label"]                  # 0 = healthy, 1 = thalassemia


In [6]:
# Handle missing values
imputer = SimpleImputer(strategy="mean")
X = imputer.fit_transform(X)

# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)


In [7]:
# Train / test split (same logic as before)
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    stratify=y,
    random_state=42
)


In [8]:

import mindspore as ms
from mindspore import Tensor

X_train = Tensor(X_train, ms.float32)
X_test  = Tensor(X_test,  ms.float32)

y_train = Tensor(y_train.values, ms.int32)
y_test  = Tensor(y_test.values,  ms.int32)


In [9]:
#NN model

from mindspore import nn

class ThalassemiaMLP(nn.Cell):
    def __init__(self, input_dim):
        super().__init__()
        self.network = nn.SequentialCell(
            nn.Dense(input_dim, 128),
            nn.ReLU(),
            nn.Dense(128, 64),
            nn.ReLU(),
            nn.Dense(64, 2)   # binary classification
        )

    def construct(self, x):
        return self.network(x)


In [10]:
model = ThalassemiaMLP(input_dim=X_train.shape[1])

In [11]:
#define loss function
loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
optimizer = nn.Adam(model.trainable_params(), learning_rate=1e-3)

net_with_loss = nn.WithLossCell(model, loss_fn)
train_net = nn.TrainOneStepCell(net_with_loss, optimizer)
train_net.set_train()


TrainOneStepCell(
  (network): WithLossCell(
    (_backbone): ThalassemiaMLP(
      (network): SequentialCell(
        (0): Dense(input_channels=19, output_channels=128, has_bias=True)
        (1): ReLU()
        (2): Dense(input_channels=128, output_channels=64, has_bias=True)
        (3): ReLU()
        (4): Dense(input_channels=64, output_channels=2, has_bias=True)
      )
    )
    (_loss_fn): SoftmaxCrossEntropyWithLogits()
  )
  (optimizer): Adam()
  (grad_reducer): Identity()
)

In [20]:
#epoch training

epochs = 200

for epoch in range(epochs):
    loss = train_net(X_train, y_train)
    if epoch % 5 == 0:
        print(f"Epoch {epoch}, Loss: {loss.asnumpy().mean():.4f}")


Epoch 0, Loss: 0.0107
Epoch 5, Loss: 0.0097
Epoch 10, Loss: 0.0089
Epoch 15, Loss: 0.0081
Epoch 20, Loss: 0.0075
Epoch 25, Loss: 0.0069
Epoch 30, Loss: 0.0064
Epoch 35, Loss: 0.0059
Epoch 40, Loss: 0.0055
Epoch 45, Loss: 0.0052
Epoch 50, Loss: 0.0049
Epoch 55, Loss: 0.0046
Epoch 60, Loss: 0.0043
Epoch 65, Loss: 0.0040
Epoch 70, Loss: 0.0038
Epoch 75, Loss: 0.0036
Epoch 80, Loss: 0.0034
Epoch 85, Loss: 0.0033
Epoch 90, Loss: 0.0031
Epoch 95, Loss: 0.0030
Epoch 100, Loss: 0.0028
Epoch 105, Loss: 0.0027
Epoch 110, Loss: 0.0026
Epoch 115, Loss: 0.0025
Epoch 120, Loss: 0.0024
Epoch 125, Loss: 0.0023
Epoch 130, Loss: 0.0022
Epoch 135, Loss: 0.0021
Epoch 140, Loss: 0.0020
Epoch 145, Loss: 0.0019
Epoch 150, Loss: 0.0018
Epoch 155, Loss: 0.0018
Epoch 160, Loss: 0.0017
Epoch 165, Loss: 0.0017
Epoch 170, Loss: 0.0016
Epoch 175, Loss: 0.0015
Epoch 180, Loss: 0.0015
Epoch 185, Loss: 0.0014
Epoch 190, Loss: 0.0014
Epoch 195, Loss: 0.0013


In [16]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [21]:
model.set_train(False)

logits = model(X_test)
preds = logits.argmax(axis=1).asnumpy()
y_true = y_test.asnumpy()

print("Accuracy:", accuracy_score(y_true, preds))
print("Confusion Matrix:\n", confusion_matrix(y_true, preds))
print("\nClassification Report:\n",
      classification_report(y_true, preds, target_names=["Healthy", "Thalassemia"]))


Accuracy: 0.9523809523809523
Confusion Matrix:
 [[35  3]
 [ 1 45]]

Classification Report:
               precision    recall  f1-score   support

     Healthy       0.97      0.92      0.95        38
 Thalassemia       0.94      0.98      0.96        46

    accuracy                           0.95        84
   macro avg       0.95      0.95      0.95        84
weighted avg       0.95      0.95      0.95        84

