In [28]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [2]:
#pip install torch torchvision torchaudio

In [3]:
data = pd.read_csv('/Users/swithana/git/d2i/patra-toolkit/examples/notebooks/data/adult/train.csv')

In [4]:
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [5]:
# Drop irrelevant columns
data_cleaned = data.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'])

# Fill missing values in 'Age' with the median and 'Embarked' with the mode

data_cleaned['Age'] = data_cleaned['Age'].fillna(data_cleaned['Age'].median())
data_cleaned['Embarked'] = data_cleaned['Embarked'].fillna(data_cleaned['Embarked'].mode()[0])

In [6]:
# Encode categorical variables 'Sex' and 'Embarked'
label_encoder_sex = LabelEncoder()
label_encoder_embarked = LabelEncoder()

In [7]:
data_cleaned['Sex'] = label_encoder_sex.fit_transform(data_cleaned['Sex'])
data_cleaned['Embarked'] = label_encoder_embarked.fit_transform(data_cleaned['Embarked'])

In [8]:
# Separate features (X) and target variable (y)
X = data_cleaned.drop(columns=['Survived'])
y = data_cleaned['Survived']

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
X_test_data = X_test
y_test_data = y_test

In [10]:
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to NumPy arrays and then to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)  # Reshape for output

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

In [11]:
# Define a simple neural network
class SimpleNN(nn.Module):
    def __init__(self, input_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 1)  # Simplified to one layer
    
    def forward(self, x):
        return self.fc1(x)

In [12]:
# Instantiate the model
input_size = X_train.shape[1]  # Number of features
model = SimpleNN(input_size)

# Set up the loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [13]:
# Training the model
num_epochs = 100  # Number of training epochs
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    optimizer.zero_grad()  # Clear the gradients

    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)  # Compute the loss

    # Backward pass
    loss.backward()  # Compute the gradients
    optimizer.step()  # Update the weights

    # Print the loss every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 1.1006
Epoch [20/100], Loss: 1.0498
Epoch [30/100], Loss: 1.0020
Epoch [40/100], Loss: 0.9571
Epoch [50/100], Loss: 0.9149
Epoch [60/100], Loss: 0.8748
Epoch [70/100], Loss: 0.8368
Epoch [80/100], Loss: 0.8007
Epoch [90/100], Loss: 0.7663
Epoch [100/100], Loss: 0.7335


In [14]:
# Evaluate the model
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    predictions = model(X_test_tensor)
    test_loss = criterion(predictions, y_test_tensor)
    
    print(f'Test Loss: {test_loss.item():.4f}')

Test Loss: 0.7769


In [15]:
from patra_toolkit import ModelCard, AIModel, BiasAnalysis, ExplainabilityAnalysis, Metric

  from .autonotebook import tqdm as notebook_tqdm


In [16]:
mc = ModelCard(
            name="Titanic Disaster Analysis",
            version="0.1",
            short_description="Titanic Disaster Analysis using pytorch for demonstration of Patra Model Cards.",
            full_description="We have trained a ML model using the pytorch framework to predict profit for Titanic Disaster Analysis dataset. We leverage this data to run the Patra model cards to capture metadata about the model as well as fairness and explainability metrics.",
            keywords="titanic, pytorch, explainability, fairness, patra",
            author="Isuru Gamage",
            input_type="Tabular",
            category="classification",
            foundational_model="None",
        )

mc.input_data = 'https://www.kaggle.com/datasets/monisamir/titanic-disaster-analysis'
mc.output_data = 'https://github.iu.edu/d2i/dockerhub/tensorflow/titanic_modelv01'

In [17]:
ai_model = AIModel(
            name="Survived prediction pytorch model",
            version="0.1",
            description="Census classification problem using pytorch Neural Network using the Titanic Disaster Analysis Dataset",
            owner="Isuru Gamage",
            location="https://github.iu.edu/d2i/sales/tensorflow_model",
            license="BSD-3 Clause",
            framework="pytorch",
            model_type="dnn",
            test_accuracy= 0.2
        )
ai_model.populate_model_structure(model)

In [18]:
ai_model.add_metric("Test loss", test_loss.item())
ai_model.add_metric("Epochs", 100)
ai_model.add_metric("Batch Size", 32)
ai_model.add_metric("Optimizer", "Adam")
ai_model.add_metric("Learning Rate", 0.0001)
ai_model.add_metric("Input Shape", 0.2)

In [19]:
mc.ai_model = ai_model

In [20]:
mc.populate_requirements()

In [21]:
print(mc)

{
    "name": "Titanic Disaster Analysis",
    "version": "0.1",
    "short_description": "Titanic Disaster Analysis using pytorch for demonstration of Patra Model Cards.",
    "full_description": "We have trained a ML model using the pytorch framework to predict profit for Titanic Disaster Analysis dataset. We leverage this data to run the Patra model cards to capture metadata about the model as well as fairness and explainability metrics.",
    "keywords": "titanic, pytorch, explainability, fairness, patra",
    "author": "Isuru Gamage",
    "input_type": "Tabular",
    "category": "classification",
    "input_data": "https://www.kaggle.com/datasets/monisamir/titanic-disaster-analysis",
    "output_data": "https://github.iu.edu/d2i/dockerhub/tensorflow/titanic_modelv01",
    "foundational_model": "None",
    "ai_model": {
        "name": "Survived prediction pytorch model",
        "version": "0.1",
        "description": "Census classification problem using pytorch Neural Network us

In [22]:
print(data_cleaned.columns.tolist())

['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']


In [23]:
y_pred_tensor = model(X_test_tensor)  # Directly call the model
y_pred = (y_pred_tensor >= 0.5).flatten()

mc.populate_bias(X_test, y_test_data, y_pred, "Sex", X_test_data['Sex'], model) 


In [24]:

x_columns = data_cleaned.columns.tolist()
x_columns.remove('Survived')

X_test_tensor = torch.tensor(X_test[:10], dtype=torch.float32)

mc.populate_xai(X_test_tensor, x_columns, model, 7)

In [25]:
print(mc)

{
    "name": "Titanic Disaster Analysis",
    "version": "0.1",
    "short_description": "Titanic Disaster Analysis using pytorch for demonstration of Patra Model Cards.",
    "full_description": "We have trained a ML model using the pytorch framework to predict profit for Titanic Disaster Analysis dataset. We leverage this data to run the Patra model cards to capture metadata about the model as well as fairness and explainability metrics.",
    "keywords": "titanic, pytorch, explainability, fairness, patra",
    "author": "Isuru Gamage",
    "input_type": "Tabular",
    "category": "classification",
    "input_data": "https://www.kaggle.com/datasets/monisamir/titanic-disaster-analysis",
    "output_data": "https://github.iu.edu/d2i/dockerhub/tensorflow/titanic_modelv01",
    "foundational_model": "None",
    "ai_model": {
        "name": "Survived prediction pytorch model",
        "version": "0.1",
        "description": "Census classification problem using pytorch Neural Network us

In [None]:
mc.submit("http://127.0.0.1:5002")

In [None]:
mc.save("/Users/swithana/git/d2i/patra-toolkit/examples/model_cards/pytorch_titanic_data.json")