In [8]:
import mindspore as ms
from mindspore import nn, Tensor
import mindspore.ops as ops
import numpy as np

ms.set_context(mode=ms.PYNATIVE_MODE)

In [10]:
import mindspore as ms
print("MindSpore version:", ms.__version__)
ms.set_context(mode=ms.PYNATIVE_MODE)
print("Mode set to PYNATIVE for step-by-step teaching.")
# https://www.mindspore.cn/docs/en/master/faq/installation.html
# https://www.mindspore.cn/install/en/

MindSpore version: 2.7.1.post1
Mode set to PYNATIVE for step-by-step teaching.


In [12]:
import numpy as np
from mindspore import nn, Tensor

# 1) Define a simple model
class SimpleNet(nn.Cell):
    def __init__(self):
        super().__init__()
        self.fc = nn.Dense(2, 1)

    def construct(self, x):
        return self.fc(x)

net = SimpleNet()

# 2) Create a sample input tensor
x = Tensor(np.array([[1.0, 2.0]]), ms.float32)

# 3) Forward pass
y = net(x)
print("Output:", y)


Output: [[-0.101678]]


In [11]:
import os
import random
import mindspore as msimport pandas as pd
from faker import Faker
from datetime import datetime, timedelta

# Initialize faker for generating synthetic data
fake = Faker()
# Set random seeds for reproducibility
# ms.set_seed(42)
# np.random.seed(42)



# Define dataset properties
medication_names = [f"Medication_{i}" for i in range(1, 31)]
atc_codes = [f"ATC_{i}" for i in range(1, 31)]
diagnosis_codes = [f"D_{i}" for i in range(1, 21)]
prescribers = [f"Prescriber_{i}" for i in range(1, 11)]

# Get a date range within the past month
today = datetime.now()
one_month_ago = today - timedelta(days=30)

def generate_pharmacy_data(pharmacy_id, num_records):
    data = []
    for _ in range(num_records):
        transaction_id = fake.uuid4()
        age = random.randint(1, 100)
        num_medications = random.randint(1, 5)
        medications = random.sample(medication_names, num_medications)
        atc_codes_for_medications = random.choices(atc_codes, k=num_medications)
        dosages = [random.randint(1, 500) for _ in range(num_medications)]
        quantities = [random.randint(1, 10) for _ in range(num_medications)]
        costs_per_medication = [round(random.uniform(5, 50), 2) for _ in range(num_medications)]
        diagnosis_code = random.choice(diagnosis_codes)
        prescriber_id = random.choice(prescribers)
        date = (one_month_ago + timedelta(days=random.randint(0, 30))).strftime("%Y-%m-%d")

        for medication, atc_code, dosage, quantity, cost_per_medication in zip(
            medications, atc_codes_for_medications, dosages, quantities, costs_per_medication
        ):
            data.append([
                transaction_id, age, medication, atc_code, dosage, quantity,
                cost_per_medication, diagnosis_code, prescriber_id, date
            ])
    return data

# Define dataset structure
fields = [
    "Transaction_ID", "Age", "Medication_Name", "ATC_Code", "Dosage", 
    "Quantity", "Cost_Per_Medication", "Diagnosis_Code", "Prescriber_ID", "Date"
]

# Generate datasets
def generate_datasets():
    for city in range(1, 4):  # 3 cities
        for zone in range(1, 4):  # 3 zones per city
            for pharmacy in range(1, 5):  # 4 pharmacies per zone
                dataset_name = f"Ph{pharmacy:02d}_Z{zone:02d}_C{city:02d}"
                # Generate training dataset
                training_data = generate_pharmacy_data(dataset_name, 500)
                train_df = pd.DataFrame(training_data, columns=fields)
                train_df.to_csv(f"{dataset_name}_train.csv", index=False)

                # Generate testing dataset
                testing_data = generate_pharmacy_data(dataset_name, 200)
                test_df = pd.DataFrame(testing_data, columns=fields)
                #test_df.to_csv(f"{dataset_name}_test.csv", index=False)

generate_datasets()
print("Datasets generated with cost per medication and saved in the same directory as the script.")


Datasets generated with cost per medication and saved in the same directory as the script.


In [13]:
def generate_single_dataset(dataset_name, num_records):
    data = generate_pharmacy_data(dataset_name, num_records)
    df = pd.DataFrame(data, columns=fields)
    df.to_csv(f"{dataset_name}.csv", index=False)

# Generate the TEST dataset
generate_single_dataset("TEST", 200)

print("The TEST dataset with 200 transactions has been generated and saved as TEST.csv.")


The TEST dataset with 200 transactions has been generated and saved as TEST.csv.


In [21]:
import os
import pandas as pd
import numpy as np
import mindspore as ms
from mindspore import nn, Tensor, ops
from mindspore.dataset import GeneratorDataset
from sklearn.preprocessing import MinMaxScaler

# -----------------------------
# Reproducibility
# -----------------------------
ms.set_seed(42)
np.random.seed(42)

ms.set_context(mode=ms.PYNATIVE_MODE)

# -----------------------------
# LSTM Model (MindSpore)
# -----------------------------
class LSTMModel(nn.Cell):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Dense(hidden_size, output_size)

    def construct(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out


# -----------------------------
# Dataset Generator
# -----------------------------
class PharmacyDataset:
    def __init__(self, data, features, target):
        self.X = data[features].values.astype(np.float32)
        self.y = data[target].values.astype(np.float32)

    def __getitem__(self, index):
        x = self.X[index].reshape(1, 1, -1)  # âœ… FIXED: add batch + sequence dim
        y = self.y[index]
        return x, y

    def __len__(self):
        return len(self.y)


# -----------------------------
# Load & preprocess data
# -----------------------------
def load_data(file_path, scaler=None):
    data = pd.read_csv(file_path)

    data['Medication_Cost'] = data['Cost_Per_Medication'] * data['Quantity']

    features = ['Age', 'Dosage', 'Quantity', 'Medication_Cost']
    target = 'Medication_Cost'

    if scaler is None:
        scaler = MinMaxScaler()
        data[features] = scaler.fit_transform(data[features])
    else:
        data[features] = scaler.transform(data[features])

    return data, features, target, scaler


# -----------------------------
# Training loop
# -----------------------------
def train_model(model, dataset, optimizer, loss_fn, epochs):
    model.set_train()
    for epoch in range(epochs):
        total_loss = 0
        for x, y in dataset:
            x = Tensor(x)
            y = Tensor(y)

            def forward_fn(x, y):
                preds = model(x)
                loss = loss_fn(preds.squeeze(), y)
                return loss

            grad_fn = ms.value_and_grad(forward_fn, None, optimizer.parameters)
            loss, grads = grad_fn(x, y)
            optimizer(grads)

            total_loss += loss.asnumpy()

        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(dataset):.4f}")


# -----------------------------
# Prediction
# -----------------------------
def predict_model(model, dataset):
    model.set_train(False)
    preds = []
    for x, _ in dataset:
        x = Tensor(x)
        out = model(x)
        preds.append(out.asnumpy().item())
    return preds


# -----------------------------
# Save / Load model
# -----------------------------
def save_model(model, path):
    ms.save_checkpoint(model, path)
    print(f"Model saved to {path}")

def load_model(model, path):
    ms.load_checkpoint(path, net=model)
    model.set_train(False)


# -----------------------------
# Train pharmacy models
# -----------------------------
def train_pharmacy_models(data_paths, output_path, scaler=None):
    pharmacy_models = []

    for file_path in data_paths:
        data, features, target, scaler = load_data(file_path, scaler)
        dataset = GeneratorDataset(
            PharmacyDataset(data, features, target),
            column_names=["x", "y"],
            shuffle=True
        )

        model = LSTMModel(len(features), 64, 2, 1)
        loss_fn = nn.MSELoss()
        optimizer = nn.Adam(model.trainable_params(), learning_rate=0.001)

        print(f"Training model for {file_path}")
        train_model(model, dataset, optimizer, loss_fn, epochs=2)

        model_path = os.path.join(
            output_path, f"{os.path.basename(file_path).split('.')[0]}_model.ckpt"
        )
        save_model(model, model_path)

        pharmacy_models.append(model)

    return pharmacy_models


# -----------------------------
# Aggregation logic (UNCHANGED)
# -----------------------------
def aggregate_pharmacy_models(pharmacy_models, data_paths, output_path):
    city_predictions = {f"City{i}": [] for i in range(1, 4)}

    for idx, file_path in enumerate(data_paths):
        data, features, target, _ = load_data(file_path)
        dataset = GeneratorDataset(
            PharmacyDataset(data, features, target),
            column_names=["x", "y"],
            shuffle=False
        )

        preds = predict_model(pharmacy_models[idx], dataset)
        data['Predicted_Cost'] = preds
        data['Predicted_Trend_Change'] = np.random.randn(len(preds))

        city_id = file_path.split("_C")[1][:2]
        city_predictions[f"City{int(city_id)}"].append(
            data[['Medication_Name', 'Predicted_Cost', 'Predicted_Trend_Change']]
        )

    for city, blocks in city_predictions.items():
        df = pd.concat(blocks)
        df = df.groupby('Medication_Name').mean().reset_index()
        df.to_csv(os.path.join(output_path, f"{city}_predictions.csv"), index=False)
        city_predictions[city] = df

    return city_predictions


def aggregate_city_models(city_predictions, output_path):
    national = pd.concat(city_predictions.values())
    national = national.groupby('Medication_Name').mean().reset_index()
    national.to_csv(os.path.join(output_path, "national_aggregated_predictions.csv"), index=False)
    return national


# -----------------------------
# Main
# -----------------------------
if __name__ == "__main__":
    data_path = "./"
    output_path = "./output/"
    os.makedirs(output_path, exist_ok=True)

    pharmacy_data_paths = [
        os.path.join(data_path, f"Ph{p:02d}_Z{z:02d}_C{c:02d}_train.csv")
        for p in range(1, 5) for z in range(1, 4) for c in range(1, 4)
    ]

    pharmacy_models = train_pharmacy_models(pharmacy_data_paths, output_path)
    city_predictions = aggregate_pharmacy_models(pharmacy_models, pharmacy_data_paths, output_path)
    national_predictions = aggregate_city_models(city_predictions, output_path)

    print("City and national aggregation complete.")


Training model for ./Ph01_Z01_C01_train.csv
Epoch 1/2, Loss: 0.0090
Epoch 2/2, Loss: 0.0001
Model saved to ./output/Ph01_Z01_C01_train_model.ckpt
Training model for ./Ph01_Z01_C02_train.csv
Epoch 1/2, Loss: 0.0108
Epoch 2/2, Loss: 0.0001
Model saved to ./output/Ph01_Z01_C02_train_model.ckpt
Training model for ./Ph01_Z01_C03_train.csv
Epoch 1/2, Loss: 0.0086
Epoch 2/2, Loss: 0.0001
Model saved to ./output/Ph01_Z01_C03_train_model.ckpt
Training model for ./Ph01_Z02_C01_train.csv
Epoch 1/2, Loss: 0.0088
Epoch 2/2, Loss: 0.0001
Model saved to ./output/Ph01_Z02_C01_train_model.ckpt
Training model for ./Ph01_Z02_C02_train.csv
Epoch 1/2, Loss: 0.0085
Epoch 2/2, Loss: 0.0001
Model saved to ./output/Ph01_Z02_C02_train_model.ckpt
Training model for ./Ph01_Z02_C03_train.csv
Epoch 1/2, Loss: 0.0100
Epoch 2/2, Loss: 0.0001
Model saved to ./output/Ph01_Z02_C03_train_model.ckpt
Training model for ./Ph01_Z03_C01_train.csv
Epoch 1/2, Loss: 0.0096
Epoch 2/2, Loss: 0.0001
Model saved to ./output/Ph01_Z03