In [1]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
data = pd.read_csv('data/titanic/train.csv')

In [3]:
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [4]:
# Drop irrelevant columns
data_cleaned = data.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'])

In [5]:
# Fill missing values in 'Age' with the median and 'Embarked' with the mode

data_cleaned['Age'] = data_cleaned['Age'].fillna(data_cleaned['Age'].median())
data_cleaned['Embarked'] = data_cleaned['Embarked'].fillna(data_cleaned['Embarked'].mode()[0])

In [6]:
# Encode categorical variables 'Sex' and 'Embarked'
label_encoder_sex = LabelEncoder()
label_encoder_embarked = LabelEncoder()

In [7]:
data_cleaned['Sex'] = label_encoder_sex.fit_transform(data_cleaned['Sex'])
data_cleaned['Embarked'] = label_encoder_embarked.fit_transform(data_cleaned['Embarked'])

In [8]:
# Separate features (X) and target variable (y)
X = data_cleaned.drop(columns=['Survived'])
y = data_cleaned['Survived']

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Check the shape of the processed data
input_shape = X_train.shape

In [9]:
data_cleaned.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,1,22.0,1,0,7.25,2
1,1,1,0,38.0,1,0,71.2833,0
2,1,3,0,26.0,0,0,7.925,2
3,1,1,0,35.0,1,0,53.1,2
4,0,3,1,35.0,0,0,8.05,2


In [10]:
# Build a simple Sequential model
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(X_train.shape[1],)),  # Input layer with the number of features
    tf.keras.layers.Dense(64, activation='relu'),  # Hidden layer with 16 neurons and ReLU activation
    tf.keras.layers.Dense(128, activation='relu'),   # Hidden layer with 8 neurons and ReLU activation
    tf.keras.layers.Dense(1, activation='sigmoid')  # Output layer for binary classification (sigmoid)
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1)

# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

Epoch 1/50




[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5389 - loss: 1.0401 - val_accuracy: 0.5866 - val_loss: 0.9398
Epoch 2/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6213 - loss: 0.8632 - val_accuracy: 0.6536 - val_loss: 0.6212
Epoch 3/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7103 - loss: 0.6311 - val_accuracy: 0.7374 - val_loss: 0.5602
Epoch 4/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7146 - loss: 0.5739 - val_accuracy: 0.7318 - val_loss: 0.5487
Epoch 5/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6965 - loss: 0.5784 - val_accuracy: 0.7151 - val_loss: 0.5508
Epoch 6/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7075 - loss: 0.5706 - val_accuracy: 0.7430 - val_loss: 0.5441
Epoch 7/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━

In [11]:
from patra_model_card.patra_model_card import ModelCard, AIModel, BiasAnalysis, ExplainabilityAnalysis, Metric

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
mc = ModelCard(
            name="Titanic Disaster Analysis",
            version="0.1",
            short_description="Titanic Disaster Analysis using Tensorflow for demonstration of Patra Model Cards.",
            full_description="We have trained a ML model using the tensorflow framework to predict profit for Titanic Disaster Analysis dataset. We leverage this data to run the Patra model cards to capture metadata about the model as well as fairness and explainability metrics.",
            keywords="titanic, tensorflow, explainability, fairness, patra",
            author="Isuru Gamage",
            input_type="Tabular",
            category="classification",
            foundational_model="None",
        )

mc.input_data = 'https://www.kaggle.com/datasets/monisamir/titanic-disaster-analysis'
mc.output_data = 'https://github.iu.edu/d2i/dockerhub/tensorflow/titanic_modelv01'

In [13]:
ai_model = AIModel(
            name="Survived prediction tensorflow model",
            version="0.1",
            description="Census classification problem using Tensorflow Neural Network using the Titanic Disaster Analysis Dataset",
            owner="Isuru Gamage",
            location="https://github.iu.edu/d2i/sales/tensorflow_model",
            license="BSD-3 Clause",
            framework="tensorflow",
            model_type="dnn",
            test_accuracy=test_accuracy
        )
ai_model.populate_model_structure(model)

In [14]:
ai_model.add_metric("Test loss", test_loss)
ai_model.add_metric("Epochs", 100)
ai_model.add_metric("Batch Size", 32)
ai_model.add_metric("Optimizer", "Adam")
ai_model.add_metric("Learning Rate", 0.0001)
ai_model.add_metric("Input Shape", input_shape)

In [15]:
mc.ai_model = ai_model

In [16]:
mc.populate_requirements()

In [17]:
print(mc)

{
    "name": "Titanic Disaster Analysis",
    "version": "0.1",
    "short_description": "Titanic Disaster Analysis using Tensorflow for demonstration of Patra Model Cards.",
    "full_description": "We have trained a ML model using the tensorflow framework to predict profit for Titanic Disaster Analysis dataset. We leverage this data to run the Patra model cards to capture metadata about the model as well as fairness and explainability metrics.",
    "keywords": "titanic, tensorflow, explainability, fairness, patra",
    "author": "Isuru Gamage",
    "input_type": "Tabular",
    "category": "classification",
    "input_data": "https://www.kaggle.com/datasets/monisamir/titanic-disaster-analysis",
    "output_data": "https://github.iu.edu/d2i/dockerhub/tensorflow/titanic_modelv01",
    "foundational_model": "None",
    "ai_model": {
        "name": "Survived prediction tensorflow model",
        "version": "0.1",
        "description": "Census classification problem using Tensorflow Ne

In [18]:
y_pred = model.predict(X_test)
y_pred = (y_pred >= 0.5).flatten()
# y_pred
mc.populate_bias(X_test, y_test, y_pred, "Sex", X_test['Sex'], model)

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


In [19]:
x_columns = data_cleaned.columns.tolist()
x_columns.remove('Survived')

mc.populate_xai(X_test[:10], x_columns, model, 10)

In [20]:
print(mc)

{
    "name": "Titanic Disaster Analysis",
    "version": "0.1",
    "short_description": "Titanic Disaster Analysis using Tensorflow for demonstration of Patra Model Cards.",
    "full_description": "We have trained a ML model using the tensorflow framework to predict profit for Titanic Disaster Analysis dataset. We leverage this data to run the Patra model cards to capture metadata about the model as well as fairness and explainability metrics.",
    "keywords": "titanic, tensorflow, explainability, fairness, patra",
    "author": "Isuru Gamage",
    "input_type": "Tabular",
    "category": "classification",
    "input_data": "https://www.kaggle.com/datasets/monisamir/titanic-disaster-analysis",
    "output_data": "https://github.iu.edu/d2i/dockerhub/tensorflow/titanic_modelv01",
    "foundational_model": "None",
    "ai_model": {
        "name": "Survived prediction tensorflow model",
        "version": "0.1",
        "description": "Census classification problem using Tensorflow Ne

In [21]:
mc.validate()

True

In [22]:
mc.submit("http://149.165.153.142:5002")

{'message': 'Successfully uploaded the model card',
 'model_card_id': '1b9e2ce1b346f4c2084c35f78543c591db132696d274ed924423e92fdbe6a65a'}

In [23]:
mc.save("../model_cards/tensorflow_titanic_MC.json")