In [3]:
import pandas as pd
import random

# First generate a random dataset with 3 features: age, gender and daytime, each point is either click or no click.
def generate_random_dataset():
    data = pd.DataFrame()

    num_data_points = 1000000

    ages = [random.randint(18, 60) for _ in range(num_data_points)]
    is_fem = [random.choice([0,1]) for _ in range(num_data_points)]
    is_other = [random.choice([0,1]) for _ in range(num_data_points)]
    is_male = [random.choice([0,1]) for _ in range(num_data_points)]

    is_morning = [random.choice([0,1]) for _ in range(num_data_points)]
    is_noon = [random.choice([0,1]) for _ in range(num_data_points)]
    is_evening = [random.choice([0,1]) for _ in range(num_data_points)]
    is_night = [random.choice([0,1]) for _ in range(num_data_points)]

    clicks = [random.choice([0, 1]) for _ in range(num_data_points)]

    data['Age'] = ages
    data['Is_Female'] = is_fem
    data['Is_Male'] = is_male
    data['Is_Other'] = is_other
    data['Is_Morning'] = is_morning
    data['Is_Noon'] = is_noon
    data['Is_Evening'] = is_evening
    data['Is_Night'] = is_night
    data['Click'] = clicks
    return data

data = generate_random_dataset()

print(data.head())

   Age  Is_Female  Is_Male  Is_Other  Is_Morning  Is_Noon  Is_Evening  \
0   48          1        1         1           1        0           1   
1   36          1        1         0           0        0           0   
2   47          0        0         1           0        1           0   
3   20          0        1         0           0        0           1   
4   56          1        0         1           1        0           1   

   Is_Night  Click  
0         0      1  
1         1      1  
2         1      0  
3         1      1  
4         1      0  


In [12]:
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

def separate_dataset(dataset):
    X = dataset[['Age', 'Is_Female', 'Is_Male', 'Is_Other', 'Is_Morning', 'Is_Noon', 'Is_Evening', 'Is_Night']]
    y = dataset['Click']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    return (X_train, X_test, y_train, y_test)

def train_lightgbm_model(X_train, y_train):
    clf = lgb.LGBMClassifier()
    clf.fit(X_train, y_train)
    return clf

X_train, X_test, y_train, y_test = separate_dataset(data)
clf = train_lightgbm_model(X_train, y_train)
# Make predictions on the test data
y_pred = clf.predict(X_test)

# Convert predicted probabilities to binary class labels (0 or 1)
y_pred_binary = [1 if p >= 0.5 else 0 for p in y_pred]

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred_binary)
report = classification_report(y_test, y_pred_binary)

print(f"Accuracy: {accuracy}")
print("Classification Report:\n", report)

Accuracy: 0.500965
Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.51      0.51    100272
           1       0.50      0.49      0.50     99728

    accuracy                           0.50    200000
   macro avg       0.50      0.50      0.50    200000
weighted avg       0.50      0.50      0.50    200000



In [13]:
clf.feature_name_

['Age',
 'Is_Female',
 'Is_Male',
 'Is_Other',
 'Is_Morning',
 'Is_Noon',
 'Is_Evening',
 'Is_Night']

In [14]:
import onnxmltools
from onnxmltools.convert import convert_lightgbm
from skl2onnx.common.data_types import FloatTensorType
from lightgbm import LGBMClassifier

def transform_to_onnx(clf: LGBMClassifier):
    features=str(clf.feature_name_)
    initial_types = [('input', FloatTensorType([1, X_train.shape[1]]))]
    onnx_model = convert_lightgbm(clf, initial_types=initial_types, zipmap=False)
    return onnx_model

In [15]:
onnx_model = transform_to_onnx(clf)
# Save as protobuf
onnxmltools.utils.save_model(onnx_model, 'a_branch.onnx')

The maximum opset needed by this model is only 9.


In [16]:
# generate several random models for some branches
dataset = generate_random_dataset()
X_train, X_test, y_train, y_test = separate_dataset(dataset)
clf = train_lightgbm_model(X_train, y_train)
onnx_model = transform_to_onnx(clf)
onnxmltools.utils.save_model(onnx_model, 'a_branch.onnx')

# generate several random models for some branches
dataset = generate_random_dataset()
X_train, X_test, y_train, y_test = separate_dataset(dataset)
clf = train_lightgbm_model(X_train, y_train)
onnx_model = transform_to_onnx(clf)
onnxmltools.utils.save_model(onnx_model, 'b_branch.onnx')

# generate several random models for some branches
dataset = generate_random_dataset()
X_train, X_test, y_train, y_test = separate_dataset(dataset)
clf = train_lightgbm_model(X_train, y_train)
onnx_model = transform_to_onnx(clf)
onnxmltools.utils.save_model(onnx_model, 'c_branch.onnx')

The maximum opset needed by this model is only 9.
The maximum opset needed by this model is only 9.
The maximum opset needed by this model is only 9.
