# Inference

In [2]:
import os
import pandas as pd
import numpy as np
import pickle

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

from transformers import AutoTokenizer,  AutoConfig, AutoModel, DistilBertTokenizer, AdamW, get_linear_schedule_with_warmup, DistilBertModel

from torch.optim import Adam
from torch.optim.lr_scheduler import LinearLR
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Check if CUDA (NVIDIA GPU support) is available
if torch.cuda.is_available():
    # Get the number of available GPUs
    num_gpus = torch.cuda.device_count()
    print(f"Number of available GPUs: {num_gpus}")

    # Get information about each GPU
    for i in range(num_gpus):
        gpu_info = torch.cuda.get_device_properties(i)
        print(f"GPU {i}: {gpu_info.name}")
else:
    print("No GPUs available.")


Number of available GPUs: 1
GPU 0: NVIDIA GeForce RTX 3090


In [4]:
params = {
    "max_token_len": 50,
    "batch_size": 512,
    "n_epochs": 15,
    "gpus": 1,
    "early_stop_patience": 2,
    "dropout": 0.2,
    "model_name": "roberta-base",
    "tokenizer_model_name": "roberta-base",
    "val_size": 0.2,
    "logger_filename": 'rebate-robert-logger',
    "category_encoder_filename": "Category_Encoder_Roberta_Others.pkl",
    "lr": 1e-5,
    "n_samp": 10,
    "eps": 1e-6,
    "unc_rate": 1
}

In [5]:
#Setting Random Seeds
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

# setting up the device type
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [6]:
#taking up the datapoints that were not used while updation of the model parameters during training.
train_data = pd.read_csv(r"D:\workspace\Rishabh\Uncertainty_NN\Rebate_Data\1_non_vendor_15000_10.csv")  #"D:\workspace\karamjit\rebate\model_data\final_train_data_uid.csv"
val_batch = train_data[train_data.set != 'TRAIN']
val_batch.head()

Unnamed: 0,uid,source,cleaned_desc,vendor_category,encoded_category,source_2,set
1,749918,Set_3,SYMMONS SC-3 RENEWABLE SEAT,"ELECTRICAL BREAKERS, BOXES, FUSES, AND FITTINGS",43,new,TEST
7,619331,Set_3,600V 60A HUBBELLOCK CONNECTOR,"ELECTRICAL BREAKERS, BOXES, FUSES, AND FITTINGS",43,new,TEST
8,595852,Set_3,NS) S91-605B FOOT PEDAL AND HOSE FOR SPEED ROO...,"ELECTRICAL BREAKERS, BOXES, FUSES, AND FITTINGS",43,new,VAL
9,593328,Set_3,NUMA 060271 BIT QL60 6.000 CC H,"ELECTRICAL BREAKERS, BOXES, FUSES, AND FITTINGS",43,new,VAL
10,738849,Set_1,ROPE SYNTH 3/8X50 ROLL,"ELECTRICAL BREAKERS, BOXES, FUSES, AND FITTINGS",43,og,TEST


In [7]:
val_batch['encoded_category'], _ = pd.factorize(val_batch['encoded_category'])
val_batch.tail()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  val_batch['encoded_category'], _ = pd.factorize(val_batch['encoded_category'])


Unnamed: 0,uid,source,cleaned_desc,vendor_category,encoded_category,source_2,set
149995,466482,Set_1,412-075-000 SLANT FIN PILOT,WARM AIR AND IGNITION CONTROLS,9,og,VAL
149996,703203,Set_1,"Q340A1082 THERMOCOUPLE 30""",WARM AIR AND IGNITION CONTROLS,9,og,TEST
149997,608096,Set_3,0130F00008 GOODMAN IGNITER -,WARM AIR AND IGNITION CONTROLS,9,new,TEST
149998,730702,Set_1,3761801 PILOT ASSEMBLY NAT GAS,WARM AIR AND IGNITION CONTROLS,9,og,TEST
149999,681423,Set_1,LIMIT DISC 250F-210F 40 DEG,WARM AIR AND IGNITION CONTROLS,9,og,TEST


In [8]:
num_classes = val_batch['encoded_category'].nunique()
num_classes

10

In [9]:
tokenizer = AutoTokenizer.from_pretrained(params['tokenizer_model_name'])
tokenizer

RobertaTokenizerFast(name_or_path='roberta-base', vocab_size=50265, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'sep_token': '</s>', 'pad_token': '<pad>', 'cls_token': '<s>', 'mask_token': '<mask>'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	1: AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	3: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	50264: AddedToken("<mask>", rstrip=False, lstrip=True, single_word=False, normalized=False, special=True),
}

In [11]:
class TorchDataset(torch.utils.data.Dataset):
    
    def __init__(self, data, tokenizer, max_token_len):
        self.data = data
        self.tokenizer = tokenizer
        self.max_token_len = max_token_len
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index: int):
        data_row = self.data.iloc[index]
        
        raw_description = data_row.cleaned_desc 
        labels = data_row.encoded_category 
        uid = data_row.uid
        
        encoding = self.tokenizer.encode_plus(
            raw_description,
            add_special_tokens=True,
            max_length=self.max_token_len,
            return_token_type_ids=False,
            padding="max_length",
            truncation=True,
            return_attention_mask=True,
            return_tensors="pt"
        )
        
        return_item = dict(
            uid = torch.Tensor([uid]).long().squeeze(),
            input_ids=encoding['input_ids'].flatten(),
            attention_mask=encoding["attention_mask"].flatten(),
            labels=torch.Tensor([labels]).long().squeeze()
        )
        return return_item

In [None]:

class OriginalModel(nn.Module):
    def __init__(self):
        super(OriginalModel, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 20)  # Assuming 10 output classes
    
    def forward(self, x):
        x = torch.flatten(x, 1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

class ExtendableNet(nn.Module):
	def __init__(self, base_model):
		super(ExtendableNet, self).__init__()
		self.base_model = base_model

		# Get the modules before the last layer (assuming output layer is last)
		self.pre_output_modules = nn.Sequential(*list(base_model.children())[:-1])

		# Define the new linear layer
		self.extended_layer = nn.Linear(self.get_pre_output_features(), 
										list(self.base_model.children())[-1].out_features)  # Same as logits dimension

	def get_pre_output_features(self):
		"""
		Extracts the input feature size of the logits layer.

		This function iterates through the modules of the base model
		and returns the number of features before the last layer.
		"""
		modules = [mod for name, mod in self.base_model.named_children()]
		last_non_linear_layer = modules[-1]
		if isinstance(last_non_linear_layer, nn.Linear):
			return last_non_linear_layer.in_features  # Linear layer
		else:
			# Handle cases where the last layer is not a linear layer (e.g., activation)
			return last_non_linear_layer.out_features  # Assuming final output dimension

	def forward(self, x):
		# Pass input through all layers except the last
		pre_output = self.pre_output_modules(x)

		# Reshape pre_output to a vector (1xN)
		pre_output = pre_output.view(pre_output.shape[0], -1)

		# Pass the reshaped output through the extended layer
		extended_output = self.extended_layer(pre_output)

		# Get the final output from the base model (logits)
		base_output = self.base_model(x)

		# Return both outputs (original and extended)
		return base_output, extended_output

# Example usage: assuming your base model is defined as `BaseNet`
base_model = OriginalModel()  # Replace with your actual model class
extended_model = ExtendableNet(base_model)

data = torch.randn(1,784)
# Pass your data through the extended model
extended_output1, extended_output2 = extended_model(data)

print("Extended Layer 1 Output:", extended_output1.shape)  # Shape of the output of the first extended layer
print("Extended Layer 2 Output:", extended_output2.shape)  # Shape of the output of the second extended layer

In [13]:

model_checkpoint_path = (r".\roberta_best_full.pt")

model = ExtendableNet(num_classes, params["dropout"])
model.load_state_dict(torch.load(model_checkpoint_path)["model_state_dict"])
model.to(device)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertClassifier(
  (bert_flavor): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): 

In [14]:
class ExtendedCELoss(nn.Module):
    """ use modified BCE loss for variance calculation with UncertainLinear network """
    def forward(self, out:torch.Tensor, y:torch.Tensor, n_samp:int=10) -> torch.Tensor:
        f = nn.CrossEntropyLoss()
        logit, sigma = out   
        dist = torch.distributions.Normal(logit, torch.exp(sigma))
        mc_logs = dist.rsample((n_samp,))
        loss = 0.

        for mc_log in mc_logs:
            loss += f(mc_log, y)
            
        loss /= n_samp

        return loss

In [15]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
criterion = ExtendedCELoss()
criterion = criterion.to(device)

In [None]:
def predict(model, x, n_samp:int=25, is_target = True):                              
    """ This function predicts the model and data uncertainty where samples are drawn from the target distribution for uncertainty prediction
    
    Input:
        model: model object instance 
        x (tuple): (input_ids, attention_mask)
        n_samp (int) :  number of samples used for uncertainty prediction.
        is_target (Boolean) : If it is True, samples are drawn from target distribution for uncertainty estimation else the same input is passed multiple times to get different predictions for uncertainty estimation

    Return : 
        epistemic (float): model uncertainty 
        aleatpry (float) : data uncertainty
    """

    if is_target: 
        logit, sigma = model.forward(x[0], x[1])
        dist = torch.distributions.Normal(logit, torch.exp(sigma))
        mc_logs = dist.rsample((n_samp,))
        probits = torch.sigmoid(mc_logs)
        epistemic = probits.var(dim=0, unbiased=True)
        aleatory = torch.exp(sigma)
        return epistemic, aleatory
    
    else:
        out = [model.forward(x[0], x[1]) for _ in range(n_samp)]
        logits = torch.stack([o[0] for o in out]).detach().cpu()   #shape = (n_samp, 512, 10)
        sigmas = torch.stack([o[1] for o in out]).detach().cpu()
        probits = torch.sigmoid(logits)
        epistemic = probits.var(dim=0, unbiased=True)
        aleatory = torch.exp(sigmas).mean(dim=0)
        return epistemic, aleatory


def get_metrics(model, x, y, n_samp:int, eps:float): 
    ''' This function helps us getting the epistemic, aleatory and scibilic uncertainty(epistemic/aleatoric) values '''

    state = model.training
    model.eval()
    with torch.no_grad():
        ep, al = predict(model, x, n_samp, is_target = True)
        sb = ep / (al + eps)
        eu, au, su = ep.cpu().numpy().mean(), al.cpu().numpy().mean(), sb.cpu().numpy().mean()
    model.train(state)

    return eu, au, su     

In [17]:
def inference(model, df):
    '''
    This function takes the model instance and the input dataframe as an input and does the inference. 
    It calculates the corresponding predictions, prediction_probability and uncertainty values for each datapoint.
    '''

    model.eval()
    all_output = []
    # all_labels = []

    dataloader = DataLoader(TorchDataset(df, tokenizer, max_token_len=params["max_token_len"]), batch_size=params["batch_size"])
    t_ep, t_al, t_sb = [], [], []
    model.eval()
    with torch.no_grad():
        for val_item_dict in tqdm(dataloader):
            val_label = val_item_dict['labels'].to(device)
            val_input_ids = val_item_dict['input_ids'].to(device)
            val_attention_mask = val_item_dict['attention_mask'].to(device)
            # val_vendor_vector = val_item_dict['vendor_vector'].to(device)
            out, var = model(val_input_ids, val_attention_mask) 
            output = nn.Softmax(dim=1)(out)
            all_output.append(output)

            ep, al, sb = get_metrics(model, (val_input_ids, val_attention_mask), val_label, params['n_samp'], params['eps'])
            
            t_ep.extend(ep)            
            t_al.extend(al)
            t_sb.extend(sb)

    ao = []
    for i in all_output:
        ao.append(i.cpu().numpy())
    ao = np.vstack(ao)
    ao.argmax(axis = 1).min(), ao.argmax(axis = 1).max()

    df['Prediction'] = ao.argmax(axis = 1)
    df['Prediction_Prob'] = ao.max(axis=1)
    df['Ep_Unc'] = t_ep
    df['Alea_Unc'] = t_al
    df['Sb_Unc'] = t_sb
    
    return df

In [18]:
val_output_df = inference(model, val_batch) 

100%|██████████| 118/118 [01:46<00:00,  1.11it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Prediction'] = ao.argmax(axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Prediction_Prob'] = ao.max(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Ep_Unc'] = t_ep
A value is trying to be set on a copy of a slice from a D

In [24]:
val_output_df.to_csv("val_inference_samp20.csv", index = False)