### Configure to  use GPU

In [17]:
!nvidia-smi

Wed Mar 12 20:28:27 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.03              Driver Version: 560.35.03      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla P100-PCIE-16GB           Off |   00000000:00:04.0 Off |                    0 |
| N/A   40C    P0             27W /  250W |       0MiB /  16384MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

#### Empty Cache

In [18]:
torch.cuda.empty_cache()

#### Imports

In [19]:
from transformers import BertTokenizer, BertModel, AdamW, get_linear_schedule_with_warmup
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import joblib
import datetime
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import OneHotEncoder
from transformers import BertTokenizer, BertModel
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
import datetime


##### Current timestamp

In [20]:
# Get current timestamp
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

##### Load dataset

In [21]:
# Load the dataset (Make sure the dataset is uploaded to Kaggle's environment)
data = pd.read_csv('/kaggle/input/itcarenew/incident_report_preprocessed_final_98000_cleaned.csv')  # Replace with actual dataset path

# Prepare input and output
X = data[['Operational Categorization Tier 1', 'Summary', 'Priority', 'Organization', 'Department']]
y = data['Assigned Group']

# One-hot encode categorical features
encoder = OneHotEncoder(sparse_output=False)
X_encoded = encoder.fit_transform(X[['Operational Categorization Tier 1', 'Priority', 'Organization', 'Department']])


##### Create output directory

In [22]:
# Create an output folder in Kaggle to save models and results
output_dir = f"/kaggle/working/output_{timestamp}"
os.makedirs(output_dir, exist_ok=True)

### Save BERT Embeddings

In [23]:
# Function to get BERT embeddings
def get_bert_embeddings(texts, tokenizer, bert_model):
    embeddings = []
    for text in texts:
        inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
        outputs = bert_model(**inputs)
        # Get the mean of the last hidden state
        embedding = outputs.last_hidden_state.mean(dim=1).detach().numpy()
        embeddings.append(embedding)
    return np.vstack(embeddings)

# Get BERT embeddings for the text features
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')
summary_embeddings = get_bert_embeddings(X['Summary'].tolist(), tokenizer, bert_model)

# Save BERT embeddings
np.save(f'{output_dir}/bert_embeddings.npy', summary_embeddings)

 ### Load BERT Embeddings for Training

In [25]:
# Load BERT embeddings
summary_embeddings = np.load(f'{output_dir}/bert_embeddings.npy')

# Combine the one-hot encoded features with BERT embeddings
X_final = np.hstack((X_encoded, summary_embeddings))

### Modify the Model to Use Precomputed BERT Embeddings

In [26]:
class NeuralNetworkWithPrecomputedBERT(nn.Module):
    def __init__(self, input_size, output_size):
        super(NeuralNetworkWithPrecomputedBERT, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 256)
        self.bn2 = nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(256, 128)
        self.bn3 = nn.BatchNorm1d(128)
        self.fc4 = nn.Linear(128, output_size)
        self.leaky_relu = nn.LeakyReLU(0.01)
        self.dropout = nn.Dropout(0.3)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        # Pass through fully connected layers
        x = self.leaky_relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.leaky_relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = self.leaky_relu(self.bn3(self.fc3(x)))
        x = self.fc4(x)
        return self.softmax(x)

### Train the Model

In [27]:
# Map y to numeric values and save the mapping
y_encoded, y_mapping = pd.factorize(y)
y_mapping_dict = dict(enumerate(y_mapping))  # Create mapping for y

joblib.dump(y_mapping_dict, f'{output_dir}/advanced_new_y_mapping.pkl')  # Save y mapping

# Save the mapping for X (One-Hot Encoded features)
X_mapping_dict = {i: col for i, col in enumerate(encoder.get_feature_names_out())}  # Create mapping for X
joblib.dump(X_mapping_dict, f'{output_dir}/advanced_new_X_mapping.pkl')  # Save X mapping

# Split the data into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X_final, y_encoded, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Save X_test and y_test to separate files for future use
np.save(f'{output_dir}/advanced_X_test.npy', X_test)
np.save(f'{output_dir}/advanced__test.npy', y_test)
np.save(f'{output_dir}/advanced_X_val.npy', X_val)
np.save(f'{output_dir}/advanced_y_val.npy', y_val)

In [28]:
# Convert to PyTorch tensors

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create DataLoader for batch processing
batch_size = 64
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Initialize the model, loss function, and optimizer
input_size = X_final.shape[1]
output_size = len(np.unique(y_encoded))
model = NeuralNetworkWithPrecomputedBERT(input_size, output_size)
criterion = nn.NLLLoss()
optimizer = AdamW(model.parameters(), lr=0.001, weight_decay=0.01)

# Learning Rate Scheduler
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=len(train_loader)*100)

# Training loop with Early Stopping
epochs = 100
best_val_accuracy = 0
patience = 10  # Early stopping patience
counter = 0

for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    scheduler.step()
    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {epoch_loss / len(train_loader):.4f}')

    # Validation metrics
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor)
        _, val_predicted = torch.max(val_outputs.data, 1)
        val_accuracy = accuracy_score(y_val, val_predicted)
        print(f'Validation Accuracy: {val_accuracy * 100:.2f}%')

        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            counter = 0
            # Save the best model
            torch.save(model.state_dict(), f'{output_dir}/best_trained_model_{timestamp}.pth')
        else:
            counter += 1
            if counter >= patience:
                print("Early stopping due to no improvement in validation accuracy.")
                break

# Test metrics
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    _, test_predicted = torch.max(test_outputs.data, 1)
    test_accuracy = accuracy_score(y_test, test_predicted)
    print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

# Save the encoder and metadata
torch.save(model.state_dict(), f'{output_dir}/advanced_trained_model.pth')
torch.save(model, f'{output_dir}/advanced_trained_model_2.pth')
joblib.dump(encoder, f'{output_dir}/advanced_trained_encoder.pkl')
joblib.dump({'input_size': input_size, 'output_size': output_size}, f'{output_dir}/advanced_metadata.pkl')
print("Model, encoder, and metadata saved successfully.")



Epoch [1/100], Loss: 1.2139
Validation Accuracy: 68.03%
Epoch [2/100], Loss: 1.0454
Validation Accuracy: 68.68%
Epoch [3/100], Loss: 1.0126
Validation Accuracy: 69.18%
Epoch [4/100], Loss: 0.9949
Validation Accuracy: 69.67%
Epoch [5/100], Loss: 0.9828
Validation Accuracy: 69.34%
Epoch [6/100], Loss: 0.9683
Validation Accuracy: 69.34%
Epoch [7/100], Loss: 0.9602
Validation Accuracy: 69.70%
Epoch [8/100], Loss: 0.9512
Validation Accuracy: 69.40%
Epoch [9/100], Loss: 0.9448
Validation Accuracy: 69.48%
Epoch [10/100], Loss: 0.9362
Validation Accuracy: 69.99%
Epoch [11/100], Loss: 0.9302
Validation Accuracy: 69.88%
Epoch [12/100], Loss: 0.9232
Validation Accuracy: 70.14%
Epoch [13/100], Loss: 0.9209
Validation Accuracy: 70.02%
Epoch [14/100], Loss: 0.9152
Validation Accuracy: 69.81%
Epoch [15/100], Loss: 0.9110
Validation Accuracy: 69.56%
Epoch [16/100], Loss: 0.9071
Validation Accuracy: 69.71%
Epoch [17/100], Loss: 0.9008
Validation Accuracy: 69.97%
Epoch [18/100], Loss: 0.8980
Validation 

## Fine Tune 2

In [None]:
# Convert to PyTorch tensors

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create DataLoader for batch processing
batch_size = 64
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Initialize the model, loss function, and optimizer
input_size = X_final.shape[1]
output_size = len(np.unique(y_encoded))
model = NeuralNetworkWithPrecomputedBERT(input_size, output_size)
criterion = nn.NLLLoss()
optimizer = AdamW(model.parameters(), lr=0.001, weight_decay=0.01)

# Learning Rate Scheduler
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=len(train_loader)*100)

# Training loop with Early Stopping
epochs = 100
best_val_accuracy = 0
patience = 10  # Early stopping patience
counter = 0

for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()  
        epoch_loss += loss.item()

    scheduler.step()
    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {epoch_loss / len(train_loader):.4f}')

    # Validation metrics
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor)
        _, val_predicted = torch.max(val_outputs.data, 1)
        val_accuracy = accuracy_score(y_val, val_predicted)
        print(f'Validation Accuracy: {val_accuracy * 100:.2f}%')


# Test metrics
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    _, test_predicted = torch.max(test_outputs.data, 1)
    test_accuracy = accuracy_score(y_test, test_predicted)
    print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

# Save the encoder and metadata
torch.save(model.state_dict(), f'{output_dir}/advanced_trained_model_2.pth')
torch.save(model, f'{output_dir}/advanced_trained_model_2_1.pth')
joblib.dump(encoder, f'{output_dir}/advanced_trained_encoder_2.pkl')
joblib.dump({'input_size': input_size, 'output_size': output_size}, f'{output_dir}/advanced_metadata_2.pkl')
print("Model, encoder, and metadata saved successfully.")



Epoch [1/100], Loss: 1.2168
Validation Accuracy: 67.98%
Epoch [2/100], Loss: 1.0478
Validation Accuracy: 68.40%
Epoch [3/100], Loss: 1.0166
Validation Accuracy: 68.63%
Epoch [4/100], Loss: 0.9937
Validation Accuracy: 68.98%
Epoch [5/100], Loss: 0.9802
Validation Accuracy: 69.12%
Epoch [6/100], Loss: 0.9693
Validation Accuracy: 69.86%
Epoch [7/100], Loss: 0.9596
Validation Accuracy: 69.58%
Epoch [8/100], Loss: 0.9502
Validation Accuracy: 69.50%
Epoch [9/100], Loss: 0.9435
Validation Accuracy: 69.37%
Epoch [10/100], Loss: 0.9379
Validation Accuracy: 69.78%
Epoch [11/100], Loss: 0.9320
Validation Accuracy: 69.16%
Epoch [12/100], Loss: 0.9245
Validation Accuracy: 69.80%
Epoch [13/100], Loss: 0.9217
Validation Accuracy: 69.88%
Epoch [14/100], Loss: 0.9139
Validation Accuracy: 69.89%
Epoch [15/100], Loss: 0.9117
Validation Accuracy: 69.73%
Epoch [16/100], Loss: 0.9061
Validation Accuracy: 70.12%
Epoch [17/100], Loss: 0.9047
Validation Accuracy: 70.02%
Epoch [18/100], Loss: 0.8982
Validation 

## Fine Tune 3

In [None]:
# Convert to PyTorch tensors

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create DataLoader for batch processing
batch_size = 128
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Initialize the model, loss function, and optimizer
input_size = X_final.shape[1]
output_size = len(np.unique(y_encoded))
model = NeuralNetworkWithPrecomputedBERT(input_size, output_size)
criterion = nn.NLLLoss()
optimizer = AdamW(model.parameters(), lr=0.001, weight_decay=0.01)

# Learning Rate Scheduler
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=len(train_loader)*100)

# Training loop with Early Stopping and Training Accuracy
epochs = 100
best_val_accuracy = 0
patience = 10  # Early stopping patience
counter = 0

for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    correct_train = 0
    total_train = 0

    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

        # Calculate training accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_train += batch_y.size(0)
        correct_train += (predicted == batch_y).sum().item()

    # Calculate training accuracy for the epoch
    train_accuracy = correct_train / total_train

    scheduler.step()
    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {epoch_loss / len(train_loader):.4f}, Training Accuracy: {((train_accuracy * 100)):.2f}%')

    # Validation metrics
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor)
        _, val_predicted = torch.max(val_outputs.data, 1)
        val_accuracy = accuracy_score(y_val, val_predicted)
        print(f'Validation Accuracy: {((val_accuracy * 100)):.2f}%')

        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            counter = 0
            # Save the best model
            torch.save(model.state_dict(), f'{output_dir}/best_trained_model_final_{timestamp}.pth')
        else:
            counter += 1
            if counter >= patience:
                print("Early stopping due to no improvement in validation accuracy.")
                break

# Test metrics
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    _, test_predicted = torch.max(test_outputs.data, 1)
    test_accuracy = accuracy_score(y_test, test_predicted)
    print(f'Test Accuracy: {((test_accuracy * 100)):.2f}%')

# Save the encoder and metadata
torch.save(model.state_dict(), f'{output_dir}/advanced_trained_model_final.pth')
torch.save(model, f'{output_dir}/advanced_trained_model_2_final.pth')
joblib.dump(encoder, f'{output_dir}/advanced_trained_encoder_final.pkl')
joblib.dump({'input_size': input_size, 'output_size': output_size}, f'{output_dir}/advanced_metadata_final.pkl')
print("Model, encoder, and metadata saved successfully.")



Epoch [1/100], Loss: 1.2504, Training Accuracy: 80.87%
Validation Accuracy: 83.01%
Epoch [2/100], Loss: 1.0442, Training Accuracy: 82.99%
Validation Accuracy: 83.81%
Epoch [3/100], Loss: 1.0070, Training Accuracy: 83.62%
Validation Accuracy: 83.95%
Epoch [4/100], Loss: 0.9863, Training Accuracy: 84.15%
Validation Accuracy: 83.57%
Epoch [5/100], Loss: 0.9722, Training Accuracy: 84.24%
Validation Accuracy: 84.39%
Epoch [6/100], Loss: 0.9619, Training Accuracy: 84.56%
Validation Accuracy: 84.40%
Epoch [7/100], Loss: 0.9510, Training Accuracy: 84.74%
Validation Accuracy: 84.69%
Epoch [8/100], Loss: 0.9431, Training Accuracy: 84.88%
Validation Accuracy: 84.78%
Epoch [9/100], Loss: 0.9352, Training Accuracy: 84.96%
Validation Accuracy: 85.03%
Epoch [10/100], Loss: 0.9302, Training Accuracy: 85.18%
Validation Accuracy: 84.90%
Epoch [11/100], Loss: 0.9258, Training Accuracy: 85.33%
Validation Accuracy: 84.80%
Epoch [12/100], Loss: 0.9172, Training Accuracy: 85.35%
Validation Accuracy: 84.76%
E