dataset: https://www.kaggle.com/datasets/yersever/500-person-gender-height-weight-bodymassindex?resource=download

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
import torch
from torch import nn, optim
import pickle
import onnx

### Load Dataset

In [None]:
data = pd.read_csv('500_Person_Gender_Height_Weight_Index.csv')

data['Gender'] = data['Gender'].map({'Female': 1, 'Male': 0})
X = data[['Gender', 'Height', 'Weight']].values
y = data['Index'].values

### Split and transform data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train, y_train = SMOTE(random_state=42).fit_resample(X_train, y_train)

X_train = torch.from_numpy(X_train.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

y_train = y_train.view(y_train.shape[0], 1)
y_test = y_test.view(y_test.shape[0], 1)

### Train model

In [None]:
model = nn.Sequential(
    nn.Linear(3, 10),
    nn.ReLU(),
    nn.Linear(10, 10),
    nn.ReLU(),
    nn.Linear(10, 6)  # 6 possible classes in output
)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 500
for epoch in range(epochs):
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train.long().squeeze())
    loss.backward()
    optimizer.step()

    if (epoch+1) % 50 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')

### Save model and input scaler

In [None]:
with open('input_scaler.pkl', 'wb') as f:
    pickle.dump({
        'mean': scaler.mean_,
        'scale': scaler.scale_
    }, f)

# Save model as ONNX
model.eval()
dummy_input = torch.randn(1, 3)
torch.onnx.export(model, dummy_input, 'model.onnx', opset_version=11)

### Prediction

In [None]:
import onnxruntime

session = onnxruntime.InferenceSession('model.onnx')

with open('input_scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)

def predict(gender, height, weight):
    """
    gender: {0: male, 1: female}
    height: cm
    weight: kg
    """
    mean = scaler['mean']
    scale = scaler['scale']

    scaled_gender = (gender - mean[0]) / scale[0]
    scaled_height = (height - mean[1]) / scale[1]
    scaled_weight = (weight - mean[2]) / scale[2]

    input_data = [[scaled_gender, scaled_height, scaled_weight]]

    input_name = session.get_inputs()[0].name
    inputs = {input_name: input_data}

    outputs = session.run(None, inputs)

    output = outputs[0][0]
    predicted_class = max(range(len(output)), key=lambda i: output[i])

    return predicted_class