In [2]:
# ---- src/generate_data.py ----
import os
import random
import pandas as pd

# Dictionary for the Five Element model in Traditional Chinese Medicine (TCM)
five_elements = {
    "Wood": {
        "symptoms": ["anger", "red eyes", "tendon stiffness", "headache", "irritability"],
        "tongue_color": "red",
        "pulse_type": "wiry",
        "acupoint": "LV3",
        "spiritual_goals": ["gain clarity", "release anger", "embrace change"]
    },
    "Fire": {
        "symptoms": ["insomnia", "palpitations", "restlessness", "tongue ulcers", "anxiety"],
        "tongue_color": "red tip",
        "pulse_type": "rapid",
        "acupoint": "HT7",
        "spiritual_goals": ["feel joy", "open heart", "connect to purpose"]
    },
    "Earth": {
        "symptoms": ["fatigue", "poor appetite", "bloating", "heavy limbs", "worry"],
        "tongue_color": "pale",
        "pulse_type": "slippery",
        "acupoint": "ST36",
        "spiritual_goals": ["find stability", "feel nurtured", "process emotions"]
    },
    "Metal": {
        "symptoms": ["cough", "dry skin", "sadness", "nasal congestion", "grief"],
        "tongue_color": "white",
        "pulse_type": "weak",
        "acupoint": "LU9",
        "spiritual_goals": ["let go of grief", "breathe freely", "reclaim self-worth"]
    },
    "Water": {
        "symptoms": ["tinnitus", "cold limbs", "fear", "low back pain", "night sweating"],
        "tongue_color": "bluish",
        "pulse_type": "deep",
        "acupoint": "KI3",
        "spiritual_goals": ["face fears", "reconnect to source", "trust intuition"]
    }
}

def generate_data(num_samples=500):
    """
    Generate synthetic acupuncture data based on the Five Element TCM model.

    Args:
        num_samples (int): Number of synthetic samples to generate.

    Returns:
        pd.DataFrame: A DataFrame containing synthetic acupuncture data.
    """
    data = []
    for _ in range(num_samples):
        element = random.choice(list(five_elements.keys()))
        profile = five_elements[element]
        symptoms = random.sample(profile["symptoms"], k=3)
        data.append({
            "symptom_1": symptoms[0],
            "symptom_2": symptoms[1],
            "symptom_3": symptoms[2],
            "tongue_color": profile["tongue_color"],
            "pulse_type": profile["pulse_type"],
            "spiritual_goal": random.choice(profile["spiritual_goals"]),
            "acupoint": profile["acupoint"]
        })
    return pd.DataFrame(data)

def save_data(df, path='data/synthetic_acupuncture_data.csv'):
    """
    Save the generated DataFrame to a CSV file.

    Args:
        df (pd.DataFrame): The DataFrame to save.
        path (str): File path to save the CSV.
    """
    os.makedirs(os.path.dirname(path), exist_ok=True)
    df.to_csv(path, index=False)
    print(f"Data saved to {path}")

def main():
    """Entry point for the CLI."""
    df = generate_data()
    save_data(df)

if __name__ == '__main__':
    main()

Data saved to data/synthetic_acupuncture_data.csv


In [3]:
# ---- src/preprocess.py ----
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import os

def load_data(path='data/synthetic_acupuncture_data.csv'):
    """
    Load the dataset from a CSV file.

    Args:
        path (str): Path to the CSV file.

    Returns:
        pd.DataFrame: Loaded DataFrame.
    """
    if not os.path.exists(path):
        raise FileNotFoundError(f"Data file not found at {path}")
    return pd.read_csv(path)

def preprocess_data(df):
    """
    Preprocess the data by one-hot encoding categorical features and
    label encoding the target variable.

    Args:
        df (pd.DataFrame): Raw DataFrame.

    Returns:
        Tuple[pd.DataFrame, LabelEncoder]: Encoded DataFrame and label encoder for acupoint.
    """
    df_encoded = pd.get_dummies(df, columns=['symptom_1', 'symptom_2', 'symptom_3', 'pulse_type', 'spiritual_goal', 'tongue_color'])
    label_encoder = LabelEncoder()
    df_encoded['acupoint'] = label_encoder.fit_transform(df['acupoint'])
    return df_encoded, label_encoder

def main():
    """CLI entry point for preprocessing."""
    df = load_data()
    df_encoded, label_encoder = preprocess_data(df)
    print("Preprocessing complete. Encoded feature shape:", df_encoded.shape)

if __name__ == '__main__':
    main()


Preprocessing complete. Encoded feature shape: (500, 101)


In [5]:
# ---- src/model.py ----
import torch
import torch.nn as nn

class AcupunctureModel(nn.Module):
    """
    A feedforward neural network for classifying acupuncture points based on TCM features.
    """
    def __init__(self, input_dim, output_dim):
        """
        Initialize the model layers.

        Args:
            input_dim (int): Number of input features.
            output_dim (int): Number of output classes (acupoints).
        """
        super(AcupunctureModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_dim)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        """
        Forward pass through the network.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, input_dim)

        Returns:
            torch.Tensor: Output probabilities for each class
        """
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return self.softmax(x)

if __name__ == '__main__':
    # Temporary CLI block for testing the model definition
    input_dim = 101  # Update if your input dimension changes
    output_dim = 5   # Number of acupoint classes
    model = AcupunctureModel(input_dim, output_dim)
    print(model)


AcupunctureModel(
  (fc1): Linear(in_features=101, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=5, bias=True)
  (softmax): Softmax(dim=1)
)


In [7]:
# ---- src/train.py ----
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim

from src.preprocess import load_data, preprocess_data
from src.model import AcupunctureModel

from sklearn.model_selection import train_test_split


def prepare_dataloaders(df_encoded):
    """
    Split the dataset and prepare PyTorch dataloaders.

    Args:
        df_encoded (pd.DataFrame): Encoded feature DataFrame.

    Returns:
        Tuple[DataLoader, DataLoader]: Training and test DataLoaders.
    """
    X = df_encoded.drop('acupoint', axis=1).values
    y = df_encoded['acupoint'].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.long)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.long)

    train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=64, shuffle=True)
    test_loader = DataLoader(TensorDataset(X_test_tensor, y_test_tensor), batch_size=64, shuffle=False)

    return train_loader, test_loader, X_train_tensor.shape[1], len(set(y))


def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    """
    Train the neural network.

    Args:
        model (nn.Module): The model to train.
        train_loader (DataLoader): Dataloader for training data.
        criterion: Loss function.
        optimizer: Optimizer.
        num_epochs (int): Number of training epochs.
    """
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {100 * correct/total:.2f}%")


def save_model(model, path='saved_models/acupuncture_model.pth'):
    """
    Save the trained model to disk.

    Args:
        model (nn.Module): Trained model.
        path (str): Path to save the model file.
    """
    import os
    os.makedirs(os.path.dirname(path), exist_ok=True)
    torch.save(model.state_dict(), path)
    print(f"Model saved to {path}")


def main():
    df = load_data()
    df_encoded, _ = preprocess_data(df)
    train_loader, _, input_dim, output_dim = prepare_dataloaders(df_encoded)

    model = AcupunctureModel(input_dim, output_dim)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    train_model(model, train_loader, criterion, optimizer)
    save_model(model)

if __name__ == '__main__':
    main()


Epoch [1/10], Loss: 1.6028, Accuracy: 45.25%
Epoch [2/10], Loss: 1.5826, Accuracy: 93.00%
Epoch [3/10], Loss: 1.5511, Accuracy: 100.00%
Epoch [4/10], Loss: 1.4939, Accuracy: 100.00%
Epoch [5/10], Loss: 1.3923, Accuracy: 100.00%
Epoch [6/10], Loss: 1.2456, Accuracy: 100.00%
Epoch [7/10], Loss: 1.0875, Accuracy: 100.00%
Epoch [8/10], Loss: 0.9812, Accuracy: 100.00%
Epoch [9/10], Loss: 0.9325, Accuracy: 100.00%
Epoch [10/10], Loss: 0.9154, Accuracy: 100.00%
Model saved to saved_models/acupuncture_model.pth


In [8]:
# ---- src/evaluate.py ----
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn

from src.preprocess import load_data, preprocess_data
from src.model import AcupunctureModel
from sklearn.model_selection import train_test_split


def prepare_test_loader(df_encoded):
    """
    Prepare the test set DataLoader from the encoded DataFrame.

    Args:
        df_encoded (pd.DataFrame): Preprocessed DataFrame with features and target.

    Returns:
        DataLoader: DataLoader for the test set.
    """
    X = df_encoded.drop('acupoint', axis=1).values
    y = df_encoded['acupoint'].values
    _, X_test, _, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.long)

    test_loader = DataLoader(TensorDataset(X_test_tensor, y_test_tensor), batch_size=64, shuffle=False)
    return test_loader, X_test_tensor.shape[1], len(set(y_test))


def evaluate_model(model, test_loader, criterion):
    """
    Evaluate a trained model on the test dataset.

    Args:
        model (nn.Module): The trained model.
        test_loader (DataLoader): Test DataLoader.
        criterion: Loss function.
    """
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    avg_loss = running_loss / len(test_loader)
    accuracy = 100 * correct / total
    print(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.2f}%")


def main():
    df = load_data()
    df_encoded, _ = preprocess_data(df)
    test_loader, input_dim, output_dim = prepare_test_loader(df_encoded)

    model = AcupunctureModel(input_dim, output_dim)
    model.load_state_dict(torch.load('saved_models/acupuncture_model.pth'))

    criterion = nn.CrossEntropyLoss()
    evaluate_model(model, test_loader, criterion)

if __name__ == '__main__':
    main()


Test Loss: 0.9122, Test Accuracy: 100.00%


  model.load_state_dict(torch.load('saved_models/acupuncture_model.pth'))


In [16]:
# ---- src/predict.py ----
import torch
import pandas as pd
from src.model import AcupunctureModel
from src.preprocess import preprocess_data

import numpy as np

# Sample input: replace these with real user input or connect to an interface later
sample_input = {
    "symptom_1": "palpitations",
    "symptom_2": "restlessness",
    "symptom_3": "anxiety",
    "tongue_color": "red tip",
    "pulse_type": "rapid",
    "spiritual_goal": "breathe freely"  # representing desire for freedom
}


def prepare_sample_input(sample_input, reference_df):
    """
    One-hot encode a single sample input based on the training feature schema.

    Args:
        sample_input (dict): Dictionary containing the sample TCM features.
        reference_df (pd.DataFrame): The original training dataframe used for structure.

    Returns:
        torch.Tensor: One-hot encoded input tensor.
    """
    input_df = pd.DataFrame([sample_input])
    full_df = pd.concat([reference_df.drop('acupoint', axis=1), input_df], ignore_index=True)
    encoded_df = pd.get_dummies(full_df)
    encoded_sample = encoded_df.tail(1)
    return torch.tensor(encoded_sample.values, dtype=torch.float32)


def main():
    # Load original training data to get encoding structure
    df = pd.read_csv('data/synthetic_acupuncture_data.csv')
    df_encoded, label_encoder = preprocess_data(df)

    # Prepare model
    input_dim = df_encoded.drop('acupoint', axis=1).shape[1]
    output_dim = len(label_encoder.classes_)
    model = AcupunctureModel(input_dim, output_dim)
    model.load_state_dict(torch.load('saved_models/acupuncture_model.pth'))
    model.eval()

    # Prepare the sample input
    input_tensor = prepare_sample_input(sample_input, df)

    # Make prediction
    with torch.no_grad():
        output = model(input_tensor)
        predicted_index = torch.argmax(output, dim=1).item()
        predicted_acupoint = label_encoder.inverse_transform([predicted_index])[0]

    print(f"Predicted Acupoint: {predicted_acupoint}")


if __name__ == '__main__':
    main()

Predicted Acupoint: HT7


  model.load_state_dict(torch.load('saved_models/acupuncture_model.pth'))


In [17]:
# ---- src/predict_cli.py ----
import torch
import pandas as pd
from src.model import AcupunctureModel
from src.preprocess import preprocess_data


def get_user_input():
    """
    Prompt the user to enter TCM-related symptoms and conditions.

    Returns:
        dict: A dictionary containing user input values.
    """
    print("Please enter the following information:")
    symptom_1 = input("Symptom 1: ").strip()
    symptom_2 = input("Symptom 2: ").strip()
    symptom_3 = input("Symptom 3: ").strip()
    tongue_color = input("Tongue color: ").strip()
    pulse_type = input("Pulse type: ").strip()
    spiritual_goal = input("Spiritual goal: ").strip()

    return {
        "symptom_1": symptom_1,
        "symptom_2": symptom_2,
        "symptom_3": symptom_3,
        "tongue_color": tongue_color,
        "pulse_type": pulse_type,
        "spiritual_goal": spiritual_goal
    }


def prepare_sample_input(sample_input, reference_df):
    input_df = pd.DataFrame([sample_input])
    full_df = pd.concat([reference_df.drop('acupoint', axis=1), input_df], ignore_index=True)
    encoded_df = pd.get_dummies(full_df)
    encoded_sample = encoded_df.tail(1)
    return torch.tensor(encoded_sample.values, dtype=torch.float32)


def main():
    df = pd.read_csv('data/synthetic_acupuncture_data.csv')
    df_encoded, label_encoder = preprocess_data(df)

    input_dim = df_encoded.drop('acupoint', axis=1).shape[1]
    output_dim = len(label_encoder.classes_)

    model = AcupunctureModel(input_dim, output_dim)
    model.load_state_dict(torch.load('saved_models/acupuncture_model.pth'))
    model.eval()

    user_input = get_user_input()
    input_tensor = prepare_sample_input(user_input, df)

    with torch.no_grad():
        output = model(input_tensor)
        predicted_index = torch.argmax(output, dim=1).item()
        predicted_acupoint = label_encoder.inverse_transform([predicted_index])[0]

    print(f"\nPredicted Acupoint: {predicted_acupoint}")


if __name__ == '__main__':
    main()


  model.load_state_dict(torch.load('saved_models/acupuncture_model.pth'))


Please enter the following information:


Symptom 1:  poor appetite
Symptom 2:  bloating
Symptom 3:  fatigue
Tongue color:  pale
Pulse type:  slippery
Spiritual goal:  feel nurtured



Predicted Acupoint: ST36
