In [21]:
import torch
import torch.nn as nn
import pandas as pd
from transformers import BertModel, BertConfig
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch.utils.data import DataLoader, TensorDataset


data = pd.read_csv("/content/dataSample.csv").round(2)

FEATURES = [col for col in data.columns if col != "decision"]


for col in ["appName", "apMode", "cMode"]:
    if col in data.columns:
        data[col] = LabelEncoder().fit_transform(data[col])

scaler = StandardScaler()
data[FEATURES] = scaler.fit_transform(data[FEATURES])

X = torch.tensor(data[FEATURES].values, dtype=torch.float32)
X = X.unsqueeze(-1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class BertFeatureExtractor(nn.Module):
    def __init__(self, input_dim=1, hidden_dim=768, num_heads=12):
        super(BertFeatureExtractor, self).__init__()

        self.bert_config = BertConfig(
            hidden_size=hidden_dim,
            num_attention_heads=num_heads,
            num_hidden_layers=4,
            intermediate_size=hidden_dim * 4,
        )
        self.bert = BertModel(self.bert_config)

        self.input_projection = nn.Linear(input_dim, hidden_dim)

    def forward(self, x):
        batch_size, seq_len, _ = x.shape
        x = self.input_projection(x)

        attention_mask = torch.ones(batch_size, seq_len).to(x.device)

        outputs = self.bert(inputs_embeds=x, attention_mask=attention_mask, output_attentions=True)

        return outputs.attentions


model = BertFeatureExtractor().to(device)
model.eval()

batch_size = 32
dataset = TensorDataset(X)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)


total_feature_importance = torch.zeros(17, device=device)


In [20]:
with torch.no_grad():
    for batch in dataloader:
        batch_x = batch[0].to(device)

        attentions = model(batch_x)

        last_layer_attention = attentions[-1]
        feature_importance = last_layer_attention.mean(dim=1)
        feature_importance = feature_importance.mean(dim=2)

        total_feature_importance += feature_importance.sum(dim=0)



In [23]:
total_feature_importance /= len(data)

feature_importance_df = pd.DataFrame({"Feature": FEATURES, "Importance": total_feature_importance.cpu().numpy()})
feature_importance_df.to_csv("feature_importance.csv", index=False)

print("Feature importance saved to feature_importance.csv")


Feature importance saved to feature_importance.csv
