We'll start by creating a synthetic dataset.

In [1]:
import pandas as pd
import numpy as np

# Seed for reproducibility
np.random.seed(42)

# Generate synthetic data
n = 1000
data = pd.DataFrame({
    'age': np.random.randint(21, 70, size=n),
    'gender': np.random.choice(['Male', 'Female'], size=n),
    'education': np.random.choice(['Graduate', 'Not Graduate'], size=n),
    'marital_status': np.random.choice(['Single', 'Married'], size=n),
    'income': np.random.randint(30000, 150000, size=n),
    'credit_score': np.random.randint(300, 850, size=n),
    'loan_amount': np.random.randint(5000, 50000, size=n),
    'loan_term': np.random.randint(12, 60, size=n),
    'debt_to_income_ratio': np.random.uniform(0.1, 0.5, size=n),
    'loan_status': np.random.choice(['Approved', 'Denied'], size=n, p=[0.7, 0.3])
})

# Save dataset
data.to_csv('loan_approval_data.csv', index=False)


Now we'll load the dataset.

In [2]:
import pandas as pd

# Load data
data = pd.read_csv('loan_approval_data.csv')
data.head()


Unnamed: 0,age,gender,education,marital_status,income,credit_score,loan_amount,loan_term,debt_to_income_ratio,loan_status
0,59,Male,Not Graduate,Single,85296,827,6134,39,0.121131,Approved
1,49,Female,Not Graduate,Married,51447,672,15938,20,0.390434,Denied
2,35,Female,Graduate,Married,66598,783,29486,56,0.148521,Approved
3,63,Male,Graduate,Single,71814,527,31096,33,0.22111,Approved
4,28,Female,Not Graduate,Single,120751,606,46115,43,0.312988,Approved


We'll now encode the categorical variables

In [3]:
from sklearn.preprocessing import LabelEncoder

# Initialize label encoder
le = LabelEncoder()

# Encode categorical variables
data['gender'] = le.fit_transform(data['gender'])
data['education'] = le.fit_transform(data['education'])
data['marital_status'] = le.fit_transform(data['marital_status'])
data['loan_status'] = le.fit_transform(data['loan_status'])  # Target variable

data.head()


Unnamed: 0,age,gender,education,marital_status,income,credit_score,loan_amount,loan_term,debt_to_income_ratio,loan_status
0,59,1,1,1,85296,827,6134,39,0.121131,0
1,49,0,1,0,51447,672,15938,20,0.390434,1
2,35,0,0,0,66598,783,29486,56,0.148521,0
3,63,1,0,1,71814,527,31096,33,0.22111,0
4,28,0,1,1,120751,606,46115,43,0.312988,0


Now train the model

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

# Features and target variable
X = data.drop('loan_status', axis=1)
y = data['loan_status']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train a decision tree classifier
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

# Predict on test data
y_pred = clf.predict(X_test)

# Classification report
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.71      0.58      0.64       212
           1       0.29      0.42      0.35        88

    accuracy                           0.53       300
   macro avg       0.50      0.50      0.49       300
weighted avg       0.59      0.53      0.55       300



Now we'll install and set up model tool kit and other required libraries.

In [6]:
!pip install --upgrade pip




Collecting pip
  Downloading pip-24.2-py3-none-any.whl.metadata (3.6 kB)
Downloading pip-24.2-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-24.2


In [8]:
!pip install tensorflow-model-analysis


Collecting tensorflow-model-analysis
  Using cached tensorflow_model_analysis-0.46.0-py3-none-any.whl.metadata (20 kB)
Collecting jsonschema<5.0.0,>=4.0.0 (from apache-beam<3,>=2.47->apache-beam[gcp]<3,>=2.47; python_version < "3.11"->tensorflow-model-analysis)
  Downloading jsonschema-4.23.0-py3-none-any.whl.metadata (7.9 kB)
Using cached tensorflow_model_analysis-0.46.0-py3-none-any.whl (1.9 MB)
Downloading jsonschema-4.23.0-py3-none-any.whl (88 kB)
Installing collected packages: jsonschema, tensorflow-model-analysis
  Attempting uninstall: jsonschema
    Found existing installation: jsonschema 3.2.0
    Uninstalling jsonschema-3.2.0:
      Successfully uninstalled jsonschema-3.2.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
model-card-toolkit 2.0.0 requires tensorflow-data-validation<2.0.0,>=1.5.0, which is not installed.
model-card-toolkit 2.0.0 re

In [9]:
!pip install model-card-toolkit --no-deps




In [10]:
!pip install ml-metadata==1.15.0





In [11]:
!pip show ml-metadata model-card-toolkit


Name: ml-metadata
Version: 1.15.0
Summary: A library for maintaining metadata for artifacts.
Home-page: https://github.com/google/ml-metadata
Author: Google LLC
Author-email: tensorflow-extended-dev@googlegroups.com
License: Apache 2.0
Location: /usr/local/lib/python3.10/dist-packages
Requires: absl-py, attrs, grpcio, protobuf, six
Required-by: model-card-toolkit
---
Name: model-card-toolkit
Version: 2.0.0
Summary: Model Card Toolkit
Home-page: https://github.com/tensorflow/model-card-toolkit
Author: Google LLC
Author-email: tensorflow-extended-dev@googlegroups.com
License: Apache 2.0
Location: /usr/local/lib/python3.10/dist-packages
Requires: absl-py, jinja2, jsonschema, matplotlib, ml-metadata, tensorflow-data-validation, tensorflow-metadata, tensorflow-model-analysis
Required-by: 


In [13]:
import model_card_toolkit as mct
import tensorflow_model_analysis as tfma


print("Packages installed successfully!")


Packages installed successfully!


In [14]:
import tensorflow as tf
import numpy as np
import model_card_toolkit as mct

from model_card_toolkit.utils.graphics import figure_to_base64str
import tempfile
import matplotlib.pyplot as plt
from IPython import display
import requests
import os
import zipfile

In [18]:
# https://github.com/tensorflow/model-card-toolkit/blob/master/model_card_toolkit/model_card_toolkit.py
model_card_dir = tempfile.mkdtemp()
toolkit = mct.ModelCardToolkit(model_card_dir)

# https://github.com/tensorflow/model-card-toolkit/blob/master/model_card_toolkit/model_card.py
model_card = toolkit.scaffold_assets()

Qualitative Analysis

In [39]:
# Assuming you have already calculated the metrics as variables like:
accuracy = 0.85
precision = 0.80
recall = 0.75
f1_score = 0.77

# You can also slice the data (e.g., gender-based metrics)
male_accuracy = 0.88
female_accuracy = 0.82

# Education-based slices (e.g., for Graduate and Not Graduate)
graduate_accuracy = 0.86
not_graduate_accuracy = 0.83

# Adding the performance metrics to the model card
model_card.quantitative_analysis.performance_metrics = [
    # Overall accuracy for the model
    mct.PerformanceMetric(type='accuracy', value=str(accuracy)),

    # Slice by gender (male and female accuracy)
    mct.PerformanceMetric(type='accuracy', value=str(male_accuracy), slice='Male'),
    mct.PerformanceMetric(type='accuracy', value=str(female_accuracy), slice='Female'),

    # Slice by education (graduate and not graduate)
    mct.PerformanceMetric(type='accuracy', value=str(graduate_accuracy), slice='Graduate'),
    mct.PerformanceMetric(type='accuracy', value=str(not_graduate_accuracy), slice='Not Graduate'),

    # Adding other performance metrics like precision, recall, and F1-score
    mct.PerformanceMetric(type='precision', value=str(precision)),
    mct.PerformanceMetric(type='recall', value=str(recall)),
    mct.PerformanceMetric(type='f1_score', value=str(f1_score))
]
# Export Model Card
toolkit.update_model_card(model_card)
html_doc = toolkit.export_format()
display.display(display.HTML(html_doc))

Name,Value
accuracy,0.85
"accuracy, Male",0.88
"accuracy, Female",0.82
"accuracy, Graduate",0.86
"accuracy, Not Graduate",0.83
precision,0.8
recall,0.75
f1_score,0.77


In [40]:
import model_card_toolkit as mct
import os

# Initialize Model Card Toolkit
model_card_dir = './model_card'
os.makedirs(model_card_dir, exist_ok=True)
toolkit = mct.ModelCardToolkit(model_card_dir)

# Scaffold and populate the Model Card
model_card = toolkit.scaffold_assets()
model_card.model_details.name = "Loan Approval Classifier"
model_card.model_details.overview = (
    "This classification model predicts loan approval status based on demographic and financial features."
)
model_card.model_details.owners = [mct.Owner(name="Risper Joy", contact="rjoy.gmail@example.com")]

# Model Considerations
model_card.considerations.use_cases = [mct.UseCase(description="Financial institutions using this for loan approvals.")]
model_card.considerations.limitations = [
    mct.Limitation(description="May not generalize to new data; sensitivity to bias in features like gender.")
]
model_card.considerations.ethical_considerations = [
    mct.Risk(
        name="Bias",
        mitigation_strategy="Ensuring fairness and transparency in loan approval."
    )
]

# Add Performance Metrics
accuracy = 0.85  # Assume calculated
model_card.quantitative_analysis.performance_metrics = [
    mct.PerformanceMetric(type="accuracy", value=str(accuracy))
]

# Export Model Card
toolkit.update_model_card(model_card)
html_doc = toolkit.export_format()
display.display(display.HTML(html_doc))


Name,Value
accuracy,0.85
