In [1]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [2]:
# !pip install tensorflow

In [3]:
columns = ["age", "workclass", "fnlwgt", "education", "education-num", "martial-status",
        "occupation", "relationship", "race", "sex", "capital-gain", "capital-loss",
        "hours-per-week", "native-country", "income"]
data = pd.read_csv('/Users/swithana/git/d2i/patra-toolkit/examples/notebooks/data/adult/adult.data', names=columns)

In [None]:
data.head()

### Pre-processing

In [None]:
label_encoder = LabelEncoder()
data['income'] = label_encoder.fit_transform(data['income'])

# Convert categorical variables to one-hot encoding
data = pd.get_dummies(data, drop_first=True, dtype=float)

data.head()

In [6]:
X = data.drop('income', axis=1).values
y = data['income'].values

print(data.columns.tolist())

['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week', 'income', 'workclass_ Federal-gov', 'workclass_ Local-gov', 'workclass_ Never-worked', 'workclass_ Private', 'workclass_ Self-emp-inc', 'workclass_ Self-emp-not-inc', 'workclass_ State-gov', 'workclass_ Without-pay', 'education_ 11th', 'education_ 12th', 'education_ 1st-4th', 'education_ 5th-6th', 'education_ 7th-8th', 'education_ 9th', 'education_ Assoc-acdm', 'education_ Assoc-voc', 'education_ Bachelors', 'education_ Doctorate', 'education_ HS-grad', 'education_ Masters', 'education_ Preschool', 'education_ Prof-school', 'education_ Some-college', 'martial-status_ Married-AF-spouse', 'martial-status_ Married-civ-spouse', 'martial-status_ Married-spouse-absent', 'martial-status_ Never-married', 'martial-status_ Separated', 'martial-status_ Widowed', 'occupation_ Adm-clerical', 'occupation_ Armed-Forces', 'occupation_ Craft-repair', 'occupation_ Exec-managerial', 'occupation_ Farming-fishing', 'occupa

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
X_train.shape

(26048, 100)

### Model Training

In [9]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

In [10]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.1, callbacks=[early_stopping])

Epoch 1/100
[1m733/733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.6607 - loss: 514.0686 - val_accuracy: 0.8027 - val_loss: 138.2379
Epoch 2/100
[1m733/733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6886 - loss: 123.0007 - val_accuracy: 0.7988 - val_loss: 19.6475
Epoch 3/100
[1m733/733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6748 - loss: 45.6013 - val_accuracy: 0.8019 - val_loss: 28.9808
Epoch 4/100
[1m733/733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6802 - loss: 34.3183 - val_accuracy: 0.8012 - val_loss: 23.5935
Epoch 5/100
[1m733/733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6813 - loss: 20.1362 - val_accuracy: 0.8000 - val_loss: 4.2781
Epoch 6/100
[1m733/733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6906 - loss: 2.8920 - val_accuracy: 0.8061 - val_loss: 1.1385
Epoch 7/10

<keras.src.callbacks.history.History at 0x26a36a33f50>

In [11]:
loss, accuracy = model.evaluate(X_test, y_test)

[1m204/204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 988us/step - accuracy: 0.7735 - loss: 0.5310


In [12]:
y_pred = model.predict(X_test)

[1m204/204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


## Model Card Generation

In [13]:
from patra_model_card.patra_model_card import ModelCard, AIModel, BiasAnalysis, ExplainabilityAnalysis

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
mc = ModelCard(
            name="UCI Adult Data Analysis model using Tensorflow",
            version="0.1",
            short_description="UCI Adult Data analysis using Tensorflow for demonstration of Patra Model Cards.",
            full_description="We have trained a ML model using the tensorflow framework to predict income for the UCI Adult Dataset. We leverage this data to run the Patra model cards to capture metadata about the model as well as fairness and explainability metrics.",
            keywords="uci adult, tensorflow, explainability, fairness, patra",
            author="Sachith Withana",
            input_type="Tabular",
            category="classification",
            foundational_model="None"
        )

mc.input_data = 'https://archive.ics.uci.edu/dataset/2/adult'
mc.output_data = 'https://github.iu.edu/d2i/dockerhub/tensorflow/adult_modelv01'

In [15]:
ai_model = AIModel(
            name="Income prediction tensorflow model",
            version="0.1",
            description="Census classification problem using Tensorflow Neural Network using the UCI Adult Dataset",
            owner="Sachith Withana",
            location="https://github.iu.edu/d2i/uci_adult/tensorflow_model",
            license="BSD-3 Clause",
            framework="tensorflow",
            model_type="dnn",
            test_accuracy=accuracy
        )
ai_model.populate_model_structure(model)

In [16]:
ai_model.add_metric("Test loss", loss)
ai_model.add_metric("Epochs", 100)
ai_model.add_metric("Batch Size", 32)
ai_model.add_metric("Optimizer", "Adam")
ai_model.add_metric("Learning Rate", 0.0001)
ai_model.add_metric("Input Shape", "(26048, 100)")

In [17]:
mc.ai_model = ai_model

In [18]:
mc.populate_requirements()

In [19]:
print(mc)

{
    "name": "UCI Adult Data Analysis model using Tensorflow",
    "version": "0.1",
    "short_description": "UCI Adult Data analysis using Tensorflow for demonstration of Patra Model Cards.",
    "full_description": "We have trained a ML model using the tensorflow framework to predict income for the UCI Adult Dataset. We leverage this data to run the Patra model cards to capture metadata about the model as well as fairness and explainability metrics.",
    "keywords": "uci adult, tensorflow, explainability, fairness, patra",
    "author": "Sachith Withana",
    "input_type": "Tabular",
    "category": "classification",
    "input_data": "https://archive.ics.uci.edu/dataset/2/adult",
    "output_data": "https://github.iu.edu/d2i/dockerhub/tensorflow/adult_modelv01",
    "ai_model": {
        "name": "Income prediction tensorflow model",
        "version": "0.1",
        "description": "Census classification problem using Tensorflow Neural Network using the UCI Adult Dataset",
       

In [20]:
y_pred = model.predict(X_test)
y_pred = (y_pred >= 0.5).flatten()
mc.populate_bias(X_test, y_test, y_pred, "gender", X_test[:, 58], model)

[1m204/204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 813us/step


In [21]:
x_columns = data.columns.tolist()
x_columns.remove('income')

mc.populate_xai(X_test[:10], x_columns, model, 10)

In [22]:
print(mc.bias_analysis)

{'demographic_parity_diff': 0.01868794336126514, 'equal_odds_difference': 0.020824111318539618}


In [23]:
print(mc.xai_analysis)

{'capital_gain': 0.11161078001889917, 'fnlwgt': 0.01672927967376179, 'age': 6.346434354782287e-05, 'hours_per_week': 4.864313536220007e-05, 'occupation__Exec_managerial': 9.803416000473932e-06, 'education_num': 5.791021717916098e-06, 'workclass__Self_emp_not_inc': 4.637746347324611e-06, 'education__HS_grad': 4.270490672854862e-06, 'relationship__Wife': 3.714164098102114e-06, 'martial_status__Married_civ_spouse': 2.949121925568029e-06}


In [24]:
print(mc)

{
    "name": "UCI Adult Data Analysis model using Tensorflow",
    "version": "0.1",
    "short_description": "UCI Adult Data analysis using Tensorflow for demonstration of Patra Model Cards.",
    "full_description": "We have trained a ML model using the tensorflow framework to predict income for the UCI Adult Dataset. We leverage this data to run the Patra model cards to capture metadata about the model as well as fairness and explainability metrics.",
    "keywords": "uci adult, tensorflow, explainability, fairness, patra",
    "author": "Sachith Withana",
    "input_type": "Tabular",
    "category": "classification",
    "input_data": "https://archive.ics.uci.edu/dataset/2/adult",
    "output_data": "https://github.iu.edu/d2i/dockerhub/tensorflow/adult_modelv01",
    "ai_model": {
        "name": "Income prediction tensorflow model",
        "version": "0.1",
        "description": "Census classification problem using Tensorflow Neural Network using the UCI Adult Dataset",
       

In [25]:
mc.save("/Users/swithana/git/d2i/patra-toolkit/examples/model_cards/tesorflow_adult_nn_MC.json")

FileNotFoundError: [Errno 2] No such file or directory: '/Users/swithana/git/d2i/patra-toolkit/examples/model_cards/tesorflow_adult_nn_MC.json'

In [27]:
mc.validate()

True

In [28]:
mc.submit("http://127.0.0.1:5000/upload_mc")

{'message': 'Successfully uploaded the model card',
 'model_card_id': 'd9f7aa3e-a9ac-4084-a5cc-92cfaf4eacde'}