In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split

def hess_to_tensor(H):
    hess_elements = []
    for i in range(len(H)):
        for j in range(len(H)):
            hess_elements.append(H[i][j].reshape(1))
    return torch.cat(hess_elements).reshape(len(H),len(H))

class Net(nn.Module):
    def __init__(self, n_features=7, nodes=[7], output_nodes=7,temp=0.0001):
        super(Net, self).__init__()
        self.temperature = temp
        # Build network
        n_nodes = [n_features] + nodes + [output_nodes]
        self.layers = nn.ModuleList()
        for i in range(len(n_nodes) - 1):
            linear_layer = nn.Linear(n_nodes[i], n_nodes[i+1])

            with torch.no_grad():
                linear_layer.weight.copy_(torch.eye(n_nodes[i+1], n_nodes[i]))
            with torch.no_grad():
                linear_layer.bias.zero_()
            self.layers.append(linear_layer)
            
            self.layers.append(nn.ReLU())
        
        
    # def _init_weights(self, module):
    #     if isinstance(module, nn.Linear):
    #         module.weight.data.copy_(torch.eye(7))
    #         if module.bias is not None:
    #             module.bias.data.zero_()
    #     print('INITIALED')


    def forward(self, x):
        out = self.layers[0](x)
        for layer in self.layers[1:]:
            out = layer(out)
        return out
        # out = out / self.temperature
        # return torch.softmax(out, dim=1)
        
    def set_temperature(self, temp):
        self.temperature = temp



In [2]:
filecode = 'InfA_RD_DPrmvd'
oc = np.load(f'/vols/cms/hw423/Data/Week14/octest_{filecode}.npy')
Lb = pd.read_pickle('/vols/cms/hw423/Data/Week14/Label.pkl')
dfy = pd.get_dummies(Lb)
dfw = pd.read_pickle('/vols/cms/hw423/Data/Week14/weight.pkl')
true = torch.tensor(np.array(dfy).astype(int))

labels = torch.tensor(np.array(Lb))
OC = torch.tensor(oc)
weight = torch.tensor(np.array(dfw))*138000
label = torch.tensor(np.array(dfy),dtype = torch.float32)


model = Net()

In [38]:
def smooth_argmax(tensor, dim=-1, temperature=1.0):
    softmax_tensor =F.softmax(tensor / temperature, dim=dim)
    return softmax_tensor
def confusion_matrix(OC,label,weight,model):
    label_w = weight.unsqueeze(1)*label
    pred_matrix = smooth_argmax(model(OC),temperature=.01,dim=1)
    confusion_matrix = torch.matmul(pred_matrix.t(),label_w)[1:,:]
    return confusion_matrix
def nll(theta1,OC,label,weight,model):
    cm = confusion_matrix(OC,label,weight,model)
    O = torch.sum(cm,dim=1)
    theta0 = torch.ones(1)
    theta = torch.cat([theta0,theta1])
    return -(O@(torch.log(cm@theta))-torch.sum((cm@theta)))
def InfAwareLoss(input,label,weight,theta_init,model):
    theta = torch.tensor(theta_init)
    hess = torch.func.hessian(nll,0)(theta,input,label,weight,model)
    H = hess_to_tensor(hess)
    I = torch.inverse(H)
    return torch.trace(I)**0.5

In [34]:
def tloss(out,label,weight,theta,model):
    theta1 = torch.ones(6)
    nlls = nll(theta1,OC,label,weight,model)
    hess = torch.func.hessian(nll,0)(theta,out,label,weight,model)
    H = hess_to_tensor(hess)
    I = torch.inverse(H)
    return torch.trace(I)**0.5

In [7]:
model = Net()
theta = torch.ones(6)
out = model(OC)

In [31]:
l = tloss(out,label,weight,theta,model)
print(l)

tensor(384.9692, grad_fn=<TraceBackward0>)


In [68]:
# nll(theta,OC,label,weight,model)
theta = torch.ones(6)
hess = torch.func.hessian(nll,0)(theta,OC,label,weight,model)
H = hess_to_tensor(hess)
I = -torch.inverse(H)
I

tensor([[-5.6578e-03,  1.9097e-02, -1.7581e-02,  1.9246e-01, -7.3483e-04,
          7.4694e-03],
        [ 1.9097e-02, -2.2473e-01, -9.3603e-03,  2.7169e-01, -2.0733e-01,
          2.7118e+00],
        [-1.7581e-02, -9.3602e-03, -5.1238e+00,  1.0091e+01, -7.9550e-01,
          9.2549e+00],
        [ 1.9246e-01,  2.7169e-01,  1.0091e+01, -7.7378e+01,  1.2011e+00,
         -6.5414e+00],
        [-7.3483e-04, -2.0733e-01, -7.9550e-01,  1.2011e+00, -5.1286e+00,
          3.1129e+01],
        [ 7.4694e-03,  2.7118e+00,  9.2549e+00, -6.5414e+00,  3.1129e+01,
         -2.9711e+02]], grad_fn=<NegBackward0>)

In [39]:
model = Net()
theta = torch.ones(6)
model.train()
out = model(OC)
optimiser = torch.optim.Adam(model.parameters(), lr=0.001)
optimiser.zero_grad()
loss = InfAwareLoss(out,label,weight,theta,model)
# loss = tloss(out,label,weight,theta,model)
loss

  theta = torch.tensor(theta_init)


tensor(19.6206, grad_fn=<PowBackward0>)

In [40]:
loss.backward()
for name, param in model.named_parameters():
    print(f"Gradient {name}: {param.grad}")

Gradient layers.0.weight: tensor([[-2.1199e-12, -2.8906e-11, -3.5646e-11, -3.6202e-12, -2.3931e-11,
         -3.5365e-11, -3.7856e-11],
        [-2.6858e-02, -4.6698e+00, -9.6111e-01, -1.3058e+00, -4.4255e+00,
         -8.9785e-02, -1.2818e-01],
        [-1.2794e-01, -1.3121e+00, -4.8906e+00, -2.3892e-01, -1.0260e+00,
         -2.1419e+00, -4.6623e+00],
        [-4.0741e-04, -8.8537e-02, -2.7076e-02,  6.0592e-04, -9.8041e-02,
          1.9141e-02,  9.5398e-02],
        [ 3.2064e-02,  4.6911e+00,  1.1659e+00,  1.5116e+00,  4.9557e+00,
          4.2863e-02,  8.9755e-02],
        [-6.4443e-02, -6.8549e-01, -1.6112e+00,  1.0240e-01, -3.5463e-01,
          1.7911e+00,  1.7993e+00],
        [ 1.8758e-01,  2.0647e+00,  6.3240e+00, -6.9922e-02,  9.4841e-01,
          3.7849e-01,  2.8061e+00]])
Gradient layers.0.bias: tensor([-1.6744e-10, -1.1607e+01, -1.4400e+01, -9.8918e-02,  1.2489e+01,
         9.7706e-01,  1.2639e+01])
Gradient layers.2.weight: tensor([[-2.1199e-12, -2.8906e-11, -3.5646e-1