In [50]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from helpers import *

In [29]:
class LinearGreaterThanZero(nn.Linear):
  def __init__(self, in_features, bias=False, min_w=0.0000001):
    super().__init__(in_features, 1, bias)
    self.is_bias = bias
    self.min_w = min_w
    if bias:
      nn.init.uniform_(self.bias, self.min_w, 1.0)
    else:
      self.bias = None

  def reset_parameters(self):
    nn.init.uniform_(self.weight, 0.1, 1.0)

  def w(self):
    with torch.no_grad():
      self.weight.data[self.weight.data < 0] = self.min_w
    return self.weight

  def forward(self, input):
    return F.linear(input, self.w(), self.bias)

In [30]:
class LinearInteraction(nn.Linear):
  def __init__(self, in_features, criterion_layer):
    super().__init__(((in_features - 1) * in_features) // 2, 1, False)
    self.in_features = in_features
    self.criterion_layer = criterion_layer

  def reset_parameters(self):
    nn.init.normal_(self.weight, 0.0, 0.1)

  def w(self):
    with torch.no_grad():
      w_i = 0
      w = self.criterion_layer.w()
      for i in range(self.in_features):
        for j in range(i + 1, self.in_features):
          self.weight.data[:, w_i] = torch.max(
            self.weight.data[:, w_i], -w[:, i]
          )
          self.weight.data[:, w_i] = torch.max(
            self.weight.data[:, w_i], -w[:, j]
          )
          w_i += 1
    return self.weight

  def forward(self, input):
    return F.linear(input, self.w(), None)

In [31]:
class ThresholdLayer(nn.Module):
  def __init__(self, threshold=None, requires_grad=True):
    super().__init__()
    if threshold is None:
      self.threshold = nn.Parameter(
        torch.FloatTensor(1).uniform_(0.1, 0.5), requires_grad=requires_grad
      )
    else:
      self.threshold = nn.Parameter(
        torch.FloatTensor([threshold]), requires_grad=requires_grad
      )

  def forward(self, x):
    return x - self.threshold

In [109]:
class ChoquetConstrained(nn.Module):
  def __init__(self, criteria_nr):
    super().__init__()
    self.criteria_nr = criteria_nr
    self.criteria_layer = LinearGreaterThanZero(criteria_nr)
    self.interaction_layer = LinearInteraction(criteria_nr, self.criteria_layer)
    self.threshold_layer = ThresholdLayer()

  def forward(self, x):
    if len(x.shape) == 3: x = x[:, 0, :]
    x_wi = self.criteria_layer(x[:, : self.criteria_nr])
    x_wij = self.interaction_layer(x[:, self.criteria_nr:])
    weight_sum = self.criteria_layer.w().sum() + self.interaction_layer.w().sum()
    score = (x_wi + x_wij) / (weight_sum)
    return self.threshold_layer(score)

In [110]:
def mobious_transform(row):
  return list(row) + [
    min(row[i], row[j]) for i in range(len(row)) for j in range(i + 1, len(row))
  ]

In [130]:
path = "./src/resources/datasets/loan_sanction_labeled.csv"
dropped_cols = [
  "Loan_ID",
  "Self_Employed",
  "Education",
  "Gender",
  "Married",
  "Dependents",
  "Credit_History",
  "Property_Area"
]
encoded_cols = [
]

data = pd.read_csv(path)
data.drop(dropped_cols, axis=1, inplace=True)
data.dropna(inplace=True)
data[encoded_cols] = data[encoded_cols].apply(LabelEncoder().fit_transform)

target_map = {"N": 0, "Y": 1}
print(data.head())
criteria_nr = 4 + len(encoded_cols)
data_input = data.iloc[:, :criteria_nr].apply(
  lambda x: mobious_transform(x), axis=1, result_type="expand"
)
data_target = data["Loan_Status"].map(target_map)

X_train, X_test, y_train, y_test = train_test_split(
  data_input.values, data_target.values, test_size=0.2, random_state=1234
)

   ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term   
1             4583             1508.0       128.0             360.0  \
2             3000                0.0        66.0             360.0   
3             2583             2358.0       120.0             360.0   
4             6000                0.0       141.0             360.0   
5             5417             4196.0       267.0             360.0   

  Loan_Status  
1           N  
2           Y  
3           Y  
4           Y  
5           Y  


In [131]:
X_train

array([[ 2083.,  3150.,   128., ...,   128.,   360.,   128.],
       [ 3588.,     0.,   110., ...,     0.,     0.,   110.],
       [63337.,     0.,   490., ...,     0.,     0.,   180.],
       ...,
       [ 2971.,  2791.,   144., ...,   144.,   360.,   144.],
       [ 3357.,  2859.,   144., ...,   144.,   360.,   144.],
       [ 3717.,     0.,   120., ...,     0.,     0.,   120.]])

In [132]:
train_dataloader = CreateDataLoader(X_train, y_train)
test_dataloader = CreateDataLoader(X_test, y_test)

In [133]:
PATH = "choquet.pt"

In [134]:
model = ChoquetConstrained(criteria_nr)
acc, acc_test, auc, auc_test = Train(model, train_dataloader, test_dataloader, PATH,lr=0.001, epoch_nr=100)

print("Accuracy train:\t%.2f%%" % (acc * 100.0))
print("AUC train: \t%.2f%%" % (acc_test * 100.0))
print()
print("Accuracy test:\t%.2f%%" % (auc * 100.0))
print("AUC test: \t%.2f%%" % (auc_test * 100.0))


100%|██████████| 100/100 [00:00<00:00, 129.12it/s]

Accuracy train:	70.56%
AUC train: 	66.38%

Accuracy test:	49.70%
AUC test: 	56.81%





In [39]:
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint["model_state_dict"])

<All keys matched successfully>

In [40]:
weights = model.criteria_layer.w().detach().numpy()[0]
interaction_weights = model.interaction_layer.w().detach().numpy()[0]
s = weights.sum() + interaction_weights.sum()
weights = weights / s
interaction_weights = interaction_weights / s

interactions = np.zeros((criteria_nr, criteria_nr))
weight_id = 0
for i in range(criteria_nr):
  for j in range(i + 1, criteria_nr):
    interactions[i, j] = interactions[j, i] = interaction_weights[weight_id]
    weight_id += 1

In [41]:
print("Criteria weights:")
print(weights)
print()
print("Criteria interactions:")
print(interactions)

Criteria weights:
[0.2727188  0.3189538  0.1992663  0.24479568]

Criteria interactions:
[[ 0.         -0.00369659 -0.04910372 -0.04246257]
 [-0.00369659  0.         -0.01175502 -0.0442977 ]
 [-0.04910372 -0.01175502  0.          0.11558106]
 [-0.04246257 -0.0442977   0.11558106  0.        ]]


In [42]:
shapley = weights + interactions.sum(0) / 2
print("Importance of criterina (Shapley value):")
print(shapley)

Importance of criterina (Shapley value):
[0.22508735 0.28907915 0.22662746 0.25920608]
