In [None]:
# Setup
! pip install seqeval evaluate
! pip install kaleido

Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting datasets>=2.0.0 (from evaluate)
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill (from evaluate)
  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from evaluate)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.17-py311-none-any.whl.metadata (7.2 kB)
Collecting dill (from evaluate)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting multiprocess 

In [None]:
# Library imports
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline, AutoConfig, DistilBertForTokenClassification, DistilBertModel, DistilBertConfig, DistilBertPreTrainedModel
from transformers import DataCollatorForTokenClassification, TrainingArguments, Trainer
from transformers.modeling_outputs import TokenClassifierOutput
from transformers.tokenization_utils_base import BatchEncoding
from datasets import Dataset, DatasetDict
import torch
import torch.nn as nn
from google.colab import drive, userdata
import pickle
import random
import re
import time
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import plotly.express as px
from seqeval.metrics import classification_report
import evaluate
import pprint
import kaleido

In [None]:
# Mount drive
drive.mount("/content/drive")
%cd '/content/drive/MyDrive/Colab Notebooks/Math_Graph/pickle_files'

Mounted at /content/drive
/content/drive/MyDrive/Colab Notebooks/Math_Graph/pickle_files


In [None]:
# View all pandas columns, rows
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [None]:
metric = evaluate.load('seqeval')

Downloading builder script:   0%|          | 0.00/6.34k [00:00<?, ?B/s]

In [None]:
# Define file read function
def read_pickle(dict_file):
  with open(dict_file, 'rb') as file:
    return pickle.load(file)

In [None]:
# Read in dictionary
lin_alg = read_pickle('lin_alg.pkl')

indices = [None] * 4
names = ['X_train_indices_la.pkl', 'y_train_indices_la.pkl', 'X_valid_indices_la.pkl', 'y_valid_indices_la.pkl']

for idx, name in zip(range(len(indices)), names):
  indices[idx] = read_pickle(name)

X_train_indices, y_train_indices, X_valid_indices, y_valid_indices = indices

## Developing the Model Parameters

In [None]:
# Massive class imbalance between labels - will modify the loss function to perform a weighted loss based on frequency of classes.
# The weights will just be 1 minus the proportion for the main label.
# Eg for  CALCULUS-AND-ANALYSIS = 1 - 0.215533 = 0.784467

_total_count_dictionary = dict()

for i, (key, sub_dict) in enumerate(lin_alg.items()):
  my_dict = dict()
  my_list, my_list_i = [],[]
  to_count = sub_dict['ner_tags']
  for tag in to_count:
    if tag not in my_dict:
      my_dict[tag] = 1
    else:
      my_dict[tag] += 1
  for k, v in my_dict.items():
    if k not in _total_count_dictionary:
      _total_count_dictionary[k] = v
    else:
      _total_count_dictionary[k] += v

total_count_dictionary = dict(sorted(_total_count_dictionary.items(), key=lambda item: item[1], reverse=True))

total_count = sum(total_count_dictionary.values())
proportions = {k : v/total_count for k, v in total_count_dictionary.items()}

assert np.round(sum(proportions.values()),0) == 1, 'Error'

weights  = {k : 1 - v for k, v in proportions.items()}
class_weights = torch.tensor([weights[cls] for cls in sorted(proportions.keys())], dtype=torch.float32)

loss_fct = nn.CrossEntropyLoss(weight=class_weights, ignore_index=-100)

In [None]:
# Load DistilBERT

tag_checkpoint = "dslim/distilbert-NER"
tag_tokenizer = AutoTokenizer.from_pretrained(tag_checkpoint, do_lower_case=False)
tag_model = AutoModelForTokenClassification.from_pretrained(tag_checkpoint)

tags = pipeline("ner", model=tag_model, tokenizer=tag_tokenizer)

tokenizer_config.json:   0%|          | 0.00/1.20k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/669k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/926 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

Device set to use cuda:0


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data_collator = DataCollatorForTokenClassification(tokenizer=tag_tokenizer, padding=True, return_tensors="pt")

In [None]:
# Need mapping of NER tags to their indices for model to use

tag_list = []
for key, sub_dict in lin_alg.items():
  ner_tags = sub_dict['ner_tags']
  for tag in ner_tags:
    if tag == 'IGN':
      continue
    if tag not in tag_list:
      tag_list.append(tag)

tag_list = sorted(tag_list, key=lambda x: x.split('-', 1)[1] if '-' in x else '')

index2tag = {idx:tag for idx, tag in enumerate(tag_list)} # This is just a nonsignificant arbitrary mapping of the label to a number for training the model
tag2index = {tag:idx for idx, tag in enumerate(tag_list)} # To lookup indices from tags

index2tag[0] = 'O'
index2tag[-100] = 'IGN'

tag2index['O'] = 0
tag2index['IGN'] = -100
index2tag

{0: 'O',
 1: 'B-ALGEBRA',
 2: 'E-ALGEBRA',
 3: 'S-ALGEBRA',
 4: 'I-ALGEBRA',
 5: 'S-APPLIED-MATHEMATICS',
 6: 'B-APPLIED-MATHEMATICS',
 7: 'E-APPLIED-MATHEMATICS',
 8: 'I-APPLIED-MATHEMATICS',
 9: 'B-CALCULUS-AND-ANALYSIS',
 10: 'E-CALCULUS-AND-ANALYSIS',
 11: 'S-CALCULUS-AND-ANALYSIS',
 12: 'I-CALCULUS-AND-ANALYSIS',
 13: 'S-DETERMINANTS',
 14: 'B-DETERMINANTS',
 15: 'E-DETERMINANTS',
 16: 'I-DETERMINANTS',
 17: 'B-DISCRETE-MATHEMATICS',
 18: 'I-DISCRETE-MATHEMATICS',
 19: 'E-DISCRETE-MATHEMATICS',
 20: 'S-DISCRETE-MATHEMATICS',
 21: 'S-FOUNDATIONS-OF-MATHEMATICS',
 22: 'B-FOUNDATIONS-OF-MATHEMATICS',
 23: 'E-FOUNDATIONS-OF-MATHEMATICS',
 24: 'I-FOUNDATIONS-OF-MATHEMATICS',
 25: 'S-GEOMETRY',
 26: 'B-GEOMETRY',
 27: 'E-GEOMETRY',
 28: 'I-GEOMETRY',
 29: 'B-LIE-ALGEBRA',
 30: 'E-LIE-ALGEBRA',
 31: 'I-LIE-ALGEBRA',
 32: 'B-LINEAR-ALGEBRA',
 33: 'E-LINEAR-ALGEBRA',
 34: 'I-LINEAR-ALGEBRA',
 35: 'S-LINEAR-ALGEBRA',
 36: 'S-LINEAR-INDEPENDENCE',
 37: 'B-LINEAR-SYSTEMS-OF-EQUATIONS',
 38: '

In [None]:
# Replace ner_indices (labels) in lin_alg with the index from index2tag

for key, sub_dict in lin_alg.items():
  ner_tags = sub_dict['ner_tags']
  my_list = []
  for tag in ner_tags:
    idx = tag2index[tag]
    my_list.append(idx)
  lin_alg[key]['labels'] = my_list

In [None]:
# Custom model based off of pretrained DistilBERT

class DistilBertForTokenClassification(DistilBertPreTrainedModel):

  def __init__(self, config):
    super().__init__(config)
    self.num_labels = config.num_labels

    # Model body
    self.distilbert = DistilBertModel(config)

    # Classification head
    self.dropout = nn.Dropout(config.dropout)
    self.classifier = nn.Linear(config.hidden_size, config.num_labels)

    # Initialise weights
    self.init_weights()

  def forward(self, input_ids=None, attention_mask=None, labels=None, **kwargs):
    # Remove num_items_in_batch from kwargs if present
    kwargs.pop('num_items_in_batch', None)

    outputs = self.distilbert(input_ids=input_ids, attention_mask=attention_mask, **kwargs)
    sequence_output = self.dropout(outputs[0])
    logits = self.classifier(sequence_output)

    # Loss calc
    loss = None
    if labels is not None:
      loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

    final_output = TokenClassifierOutput(loss=loss, logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions)
    return final_output

In [None]:
# Update the existing config to the settings for custom DistilBERT
config = AutoConfig.from_pretrained(tag_checkpoint, num_labels=len(index2tag), label2id=tag2index, id2label=index2tag,)
config.label2id = tag2index
config.id2label = index2tag
config.num_labels = len(index2tag)
config.max_position_embeddings = 512

# Initialise custom DistilBERT using these changed configurations
tag_model_custom = DistilBertForTokenClassification(config)

In [None]:
# Load the pretrained state_dict
pretrained_model = AutoModelForTokenClassification.from_pretrained(tag_checkpoint)

# Use pretrained state_dict in tag_model_custom
tag_model_custom.distilbert.load_state_dict(pretrained_model.distilbert.state_dict(), strict=False)

tag_model_custom.classifier = nn.Linear(config.hidden_size, config.num_labels)
tag_model_custom.init_weights()

tag_model_custom.to(device)

DistilBertForTokenClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
   

## Checking untrained prediction ability

In [None]:
data_dict = dict()
for key, sub_dict in lin_alg.items():
  my_dict = {k : [v] for k, v in sub_dict.items()}
  data_dict[key] = BatchEncoding(my_dict)

# Convert dictionary to BatchEncoding
batch_encoded_data = BatchEncoding(data_dict)

assert len(batch_encoded_data.keys()) == len(lin_alg.keys()), 'Error'

In [None]:
# Check key ordering the same between lin_alg and batch_encoded_data

my_list = list(batch_encoded_data.keys())
for i, key in enumerate(list(lin_alg.keys())):
  assert my_list[i] == key, 'Error'

In [None]:
def pads_to_max_dimensions(sub_dict):
  # Model needs labels, input_ids and attention_mask to be the maximum size of 512, function pads remainder out to this size
  if isinstance(sub_dict['input_ids'], list):
    input_ids_tensor = torch.tensor(sub_dict['input_ids'], dtype=torch.long)  # Convert list to tensor
  else:
    input_ids_tensor = sub_dict['input_ids'].clone().detach()

  # Convert attention_mask
  if isinstance(sub_dict['attention_mask'], list):
    attention_mask_tensor = torch.tensor(sub_dict['attention_mask'], dtype=torch.long)  # Convert list to tensor
  else:
    attention_mask_tensor = sub_dict['attention_mask'].clone().detach()

  # Convert labels
  if isinstance(sub_dict['labels'], list):
    labels_mask_tensor = torch.tensor(sub_dict['labels'], dtype=torch.long)  # Convert list to tensor
  else:
    labels_mask_tensor = sub_dict['labels'].clone().detach()

  # Padding lengths
  max_len = config.max_position_embeddings
  input_padding_len = max_len - input_ids_tensor.shape[1]
  attention_padding_len = max_len - attention_mask_tensor.shape[1] #config.max_position_embeddings - attention_mask_tensor.shape[0]
  labels_padding_len = max_len - labels_mask_tensor.shape[1]

  # Padded out to the number of dimensions (512)
  dim_input_ids = torch.nn.functional.pad(input_ids_tensor, (0, input_padding_len), value=0)
  dim_attention_mask = torch.nn.functional.pad(attention_mask_tensor, (0, attention_padding_len), value=0)
  dim_labels = torch.nn.functional.pad(labels_mask_tensor, (0, labels_padding_len), value=-100)

  sub_dict['input_ids'] = dim_input_ids
  sub_dict['attention_mask'] = dim_attention_mask
  sub_dict['labels'] = dim_labels

  return sub_dict

In [None]:
# Implementing above fn
for key, sub_dict in batch_encoded_data.items():
  batch_encoded_data[key] = pads_to_max_dimensions(sub_dict)

## Defining Model functions

In [None]:
idx2key = {idx: key for idx, key in enumerate(batch_encoded_data.keys())}

# Function to create Hugging Face dataset split
def create_dataset_split(indices, batch_encoded_data):

  input_ids = [batch_encoded_data[idx2key[i]]['input_ids'] for i in indices]
  attention_mask = [batch_encoded_data[idx2key[i]]['attention_mask'] for i in indices]
  labels = [batch_encoded_data[idx2key[i]]['labels'] for i in indices]

  dataset_dict = {
      'input_ids': input_ids,
      'attention_mask': attention_mask,
      'labels': labels
  }
  return Dataset.from_dict(dataset_dict)

# Initialises custom model
def model_init():
  return tag_model_custom.to(device)

In [None]:
# Create train, validation and test sets

train_dataset = create_dataset_split(X_train_indices, batch_encoded_data)
train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

validation_dataset = create_dataset_split(X_valid_indices, batch_encoded_data)
validation_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

test_dataset = create_dataset_split(X_valid_indices, batch_encoded_data)
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

# Combine into a DatasetDict
dataset = DatasetDict({'train': train_dataset, 'validation': validation_dataset, 'test': test_dataset})

## Test Model function on single example

In [None]:
sample = train_dataset[0]
_inputs = {
    'input_ids': sample['input_ids'].clone().detach(),
    'attention_mask': sample['attention_mask'].clone().detach()}
inputs = {
    'input_ids': _inputs['input_ids'].to(device),
    'attention_mask': _inputs['attention_mask'].to(device)}

tag_model_custom.eval()

with torch.no_grad():
  outputs = tag_model_custom.forward(**inputs).logits

y_actual = sample['labels'].to(device)

In [None]:
prediction_idx = torch.argmax(outputs, dim=-1)
prediction_batch_size, prediction_dim = prediction_idx.shape
actual_dim = 512
actual_idx = 1
actual, predicted, _actual, _preds = [], [], [], []

for i in range(actual_dim):
  actual_idx = y_actual[0][i].item()
  pred_idx = prediction_idx[0][i].item()
  if pred_idx == 0:
    pred_idx = -100
  if actual_idx != -100:
    _preds.append(index2tag[pred_idx])
    _actual.append(index2tag[actual_idx])
  predicted.append(_preds)
  actual.append(_actual)

In [None]:
# Display in df: across all 512 dimensions as a double-check
input_ids = inputs['input_ids'][0].cpu().numpy() #X
tokens = tag_tokenizer.convert_ids_to_tokens(inputs['input_ids'][0].cpu().numpy()) #X in human sub-token form
prediction_idx = torch.argmax(outputs, dim=-1)
untrained_model_id = prediction_idx[0].cpu().numpy() #Tag predictions in numeric form
y_pred = [index2tag.get(p, 'O') for p in untrained_model_id] #Tag predictions in human word form
attn_mask = inputs['attention_mask'][0].cpu().numpy()
actual_idx = sample['labels'].squeeze().numpy() #Actual tag index
actual_ner = [index2tag[i] for i in actual_idx] #Actual tags

pd.DataFrame([input_ids, tokens, untrained_model_id, y_pred, attn_mask, actual_ner, actual_idx],
             index=["Input_ids", "Tokens", "Untrained_ID","Untrained_Tag_Prediction", "Attention_Mask", "Actual_NER_Tags", "Actual_NER_idx"])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,511
Input_ids,101,1109,14255,9380,4948,14715,14811,1110,1126,2805,1115,2274,170,2703,8952,117,14715,14811,1116,1122,117,1105,1173,2274,1103,2703,14255,9380,4948,1104,1296,5290,119,1188,2686,1107,170,1207,8952,1115,15208,2218,4625,117,1543,1122,5616,1107,1672,9988,20011,117,2521,1107,7378,13450,1105,9539,11556,119,1135,2399,170,2501,1648,1107,13682,5047,2982,1105,13170,8952,2191,118,8050,25665,1204,1757,119,102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Tokens,[CLS],The,con,##ju,##gate,trans,##pose,is,an,operation,that,takes,a,complex,matrix,",",trans,##pose,##s,it,",",and,then,takes,the,complex,con,##ju,##gate,of,each,element,.,This,results,in,a,new,matrix,that,retains,certain,properties,",",making,it,useful,in,various,mathematical,contexts,",",particularly,in,linear,algebra,and,quantum,mechanics,.,It,plays,a,key,role,in,defining,inner,products,and,determining,matrix,self,-,ad,##join,##t,##ness,.,[SEP],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD],[PAD]
Untrained_ID,52,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,35,20,20,20,20,20,20,20,20,56,20,15,35,20,41,41,35,9,35,35,35,10,37,41,35,43,35,35,10,10,35,35,35,10,10,35,10,43,35,43,20,43,43,43,43,20,20,20,43,43,9,20,20,20,20,20,20,35,9,58,41,38,56,38,38,9,37,37,38,38,37,56,37,38,38,38,41,9,9,9,53,41,41,3,9,49,3,37,41,38,43,43,9,43,58,9,17,17,37,41,41,9,41,41,41,20,20,20,3,37,17,17,37,37,37,37,58,58,20,37,58,58,58,37,37,37,9,9,9,41,37,37,38,38,3,63,17,37,17,17,38,3,3,56,56,9,38,38,38,38,9,3,38,37,38,9,9,9,15,9,38,9,41,38,38,41,41,9,9,41,9,9,9,9,9,53,41,3,3,41,41,3,3,10,49,3,9,41,20,37,63,41,43,20,58,43,43,43,58,58,9,17,17,9,41,41,9,37,41,41,41,37,20,37,20,20,20,37,63,20,20,20,37,63,63,63,56,38,38,38,38,38,38,38,38,38,38,38,38,38,9,9,3,9,38,38,3,10,49,49,3,37,20,20,20,20,20,20,20,37,56,38,38,41,38,38,9,38,9,9,9,9,53,41,41,3,3,3,3,41,3,3,10,9,3,11,41,38,43,43,9,58,58,37,46,37,38,38,41,9,41,9,9,37,3,37,17,37,58,37,37,37,37,58,20,58,58,17,43,41,53,43,41,41,56,56,9,58,58,58,9,58,58,58,3,58,58,58,58,58,9,17,17,9,9,9,41,41,41,41,63,20,20,20,20,20,20,37,56,63,63,38,38,56,56,56,56,56,56,56,56,56,38,56,41,41,56,56,56,56,56,38,46,38,56,56,38,56,38,38,38,56,63,63,56,56,56,43,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,38,38,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,46,56,56,56,56,56,56,56,38,38,38,43,38,38,56,38,56,38,56,56,38,38,56,56,56,56,38,56,56,56,38,56,56,56,56,56,56,56,37,56,38,56,56,56,56,56,38,38,38,56,56,38,38,56,56,56
Untrained_Tag_Prediction,S-PERMANENTS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-LINEAR-ALGEBRA,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,I-PROBABILITY-AND-STATISTICS,S-DISCRETE-MATHEMATICS,E-DETERMINANTS,S-LINEAR-ALGEBRA,S-DISCRETE-MATHEMATICS,I-LOC,I-LOC,S-LINEAR-ALGEBRA,B-CALCULUS-AND-ANALYSIS,S-LINEAR-ALGEBRA,S-LINEAR-ALGEBRA,S-LINEAR-ALGEBRA,E-CALCULUS-AND-ANALYSIS,B-LINEAR-SYSTEMS-OF-EQUATIONS,I-LOC,S-LINEAR-ALGEBRA,B-MATRICES,S-LINEAR-ALGEBRA,S-LINEAR-ALGEBRA,E-CALCULUS-AND-ANALYSIS,E-CALCULUS-AND-ANALYSIS,S-LINEAR-ALGEBRA,S-LINEAR-ALGEBRA,S-LINEAR-ALGEBRA,E-CALCULUS-AND-ANALYSIS,E-CALCULUS-AND-ANALYSIS,S-LINEAR-ALGEBRA,E-CALCULUS-AND-ANALYSIS,B-MATRICES,S-LINEAR-ALGEBRA,B-MATRICES,S-DISCRETE-MATHEMATICS,B-MATRICES,B-MATRICES,B-MATRICES,B-MATRICES,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,B-MATRICES,B-MATRICES,B-CALCULUS-AND-ANALYSIS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-LINEAR-ALGEBRA,B-CALCULUS-AND-ANALYSIS,B-RECREATIONAL-MATHEMATICS,I-LOC,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,B-CALCULUS-AND-ANALYSIS,B-LINEAR-SYSTEMS-OF-EQUATIONS,B-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,B-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,B-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LOC,B-CALCULUS-AND-ANALYSIS,B-CALCULUS-AND-ANALYSIS,B-CALCULUS-AND-ANALYSIS,S-PROBABILITY-AND-STATISTICS,I-LOC,I-LOC,S-ALGEBRA,B-CALCULUS-AND-ANALYSIS,I-NUMBER-THEORY,S-ALGEBRA,B-LINEAR-SYSTEMS-OF-EQUATIONS,I-LOC,I-LINEAR-SYSTEMS-OF-EQUATIONS,B-MATRICES,B-MATRICES,B-CALCULUS-AND-ANALYSIS,B-MATRICES,B-RECREATIONAL-MATHEMATICS,B-CALCULUS-AND-ANALYSIS,B-DISCRETE-MATHEMATICS,B-DISCRETE-MATHEMATICS,B-LINEAR-SYSTEMS-OF-EQUATIONS,I-LOC,I-LOC,B-CALCULUS-AND-ANALYSIS,I-LOC,I-LOC,I-LOC,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-ALGEBRA,B-LINEAR-SYSTEMS-OF-EQUATIONS,B-DISCRETE-MATHEMATICS,B-DISCRETE-MATHEMATICS,B-LINEAR-SYSTEMS-OF-EQUATIONS,B-LINEAR-SYSTEMS-OF-EQUATIONS,B-LINEAR-SYSTEMS-OF-EQUATIONS,B-LINEAR-SYSTEMS-OF-EQUATIONS,B-RECREATIONAL-MATHEMATICS,B-RECREATIONAL-MATHEMATICS,S-DISCRETE-MATHEMATICS,B-LINEAR-SYSTEMS-OF-EQUATIONS,B-RECREATIONAL-MATHEMATICS,B-RECREATIONAL-MATHEMATICS,B-RECREATIONAL-MATHEMATICS,B-LINEAR-SYSTEMS-OF-EQUATIONS,B-LINEAR-SYSTEMS-OF-EQUATIONS,B-LINEAR-SYSTEMS-OF-EQUATIONS,B-CALCULUS-AND-ANALYSIS,B-CALCULUS-AND-ANALYSIS,B-CALCULUS-AND-ANALYSIS,I-LOC,B-LINEAR-SYSTEMS-OF-EQUATIONS,B-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,S-ALGEBRA,E-TOPOLOGY,B-DISCRETE-MATHEMATICS,B-LINEAR-SYSTEMS-OF-EQUATIONS,B-DISCRETE-MATHEMATICS,B-DISCRETE-MATHEMATICS,I-LINEAR-SYSTEMS-OF-EQUATIONS,S-ALGEBRA,S-ALGEBRA,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,B-CALCULUS-AND-ANALYSIS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,B-CALCULUS-AND-ANALYSIS,S-ALGEBRA,I-LINEAR-SYSTEMS-OF-EQUATIONS,B-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,B-CALCULUS-AND-ANALYSIS,B-CALCULUS-AND-ANALYSIS,B-CALCULUS-AND-ANALYSIS,E-DETERMINANTS,B-CALCULUS-AND-ANALYSIS,I-LINEAR-SYSTEMS-OF-EQUATIONS,B-CALCULUS-AND-ANALYSIS,I-LOC,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LOC,I-LOC,B-CALCULUS-AND-ANALYSIS,B-CALCULUS-AND-ANALYSIS,I-LOC,B-CALCULUS-AND-ANALYSIS,B-CALCULUS-AND-ANALYSIS,B-CALCULUS-AND-ANALYSIS,B-CALCULUS-AND-ANALYSIS,B-CALCULUS-AND-ANALYSIS,S-PROBABILITY-AND-STATISTICS,I-LOC,S-ALGEBRA,S-ALGEBRA,I-LOC,I-LOC,S-ALGEBRA,S-ALGEBRA,E-CALCULUS-AND-ANALYSIS,I-NUMBER-THEORY,S-ALGEBRA,B-CALCULUS-AND-ANALYSIS,I-LOC,S-DISCRETE-MATHEMATICS,B-LINEAR-SYSTEMS-OF-EQUATIONS,E-TOPOLOGY,I-LOC,B-MATRICES,S-DISCRETE-MATHEMATICS,B-RECREATIONAL-MATHEMATICS,B-MATRICES,B-MATRICES,B-MATRICES,B-RECREATIONAL-MATHEMATICS,B-RECREATIONAL-MATHEMATICS,B-CALCULUS-AND-ANALYSIS,B-DISCRETE-MATHEMATICS,B-DISCRETE-MATHEMATICS,B-CALCULUS-AND-ANALYSIS,I-LOC,I-LOC,B-CALCULUS-AND-ANALYSIS,B-LINEAR-SYSTEMS-OF-EQUATIONS,I-LOC,I-LOC,I-LOC,B-LINEAR-SYSTEMS-OF-EQUATIONS,S-DISCRETE-MATHEMATICS,B-LINEAR-SYSTEMS-OF-EQUATIONS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,B-LINEAR-SYSTEMS-OF-EQUATIONS,E-TOPOLOGY,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,B-LINEAR-SYSTEMS-OF-EQUATIONS,E-TOPOLOGY,E-TOPOLOGY,E-TOPOLOGY,I-PROBABILITY-AND-STATISTICS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,B-CALCULUS-AND-ANALYSIS,B-CALCULUS-AND-ANALYSIS,S-ALGEBRA,B-CALCULUS-AND-ANALYSIS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,S-ALGEBRA,E-CALCULUS-AND-ANALYSIS,I-NUMBER-THEORY,I-NUMBER-THEORY,S-ALGEBRA,B-LINEAR-SYSTEMS-OF-EQUATIONS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,B-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LOC,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,B-CALCULUS-AND-ANALYSIS,I-LINEAR-SYSTEMS-OF-EQUATIONS,B-CALCULUS-AND-ANALYSIS,B-CALCULUS-AND-ANALYSIS,B-CALCULUS-AND-ANALYSIS,B-CALCULUS-AND-ANALYSIS,S-PROBABILITY-AND-STATISTICS,I-LOC,I-LOC,S-ALGEBRA,S-ALGEBRA,S-ALGEBRA,S-ALGEBRA,I-LOC,S-ALGEBRA,S-ALGEBRA,E-CALCULUS-AND-ANALYSIS,B-CALCULUS-AND-ANALYSIS,S-ALGEBRA,S-CALCULUS-AND-ANALYSIS,I-LOC,I-LINEAR-SYSTEMS-OF-EQUATIONS,B-MATRICES,B-MATRICES,B-CALCULUS-AND-ANALYSIS,B-RECREATIONAL-MATHEMATICS,B-RECREATIONAL-MATHEMATICS,B-LINEAR-SYSTEMS-OF-EQUATIONS,S-NUMBER-THEORY,B-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LOC,B-CALCULUS-AND-ANALYSIS,I-LOC,B-CALCULUS-AND-ANALYSIS,B-CALCULUS-AND-ANALYSIS,B-LINEAR-SYSTEMS-OF-EQUATIONS,S-ALGEBRA,B-LINEAR-SYSTEMS-OF-EQUATIONS,B-DISCRETE-MATHEMATICS,B-LINEAR-SYSTEMS-OF-EQUATIONS,B-RECREATIONAL-MATHEMATICS,B-LINEAR-SYSTEMS-OF-EQUATIONS,B-LINEAR-SYSTEMS-OF-EQUATIONS,B-LINEAR-SYSTEMS-OF-EQUATIONS,B-LINEAR-SYSTEMS-OF-EQUATIONS,B-RECREATIONAL-MATHEMATICS,S-DISCRETE-MATHEMATICS,B-RECREATIONAL-MATHEMATICS,B-RECREATIONAL-MATHEMATICS,B-DISCRETE-MATHEMATICS,B-MATRICES,I-LOC,S-PROBABILITY-AND-STATISTICS,B-MATRICES,I-LOC,I-LOC,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,B-CALCULUS-AND-ANALYSIS,B-RECREATIONAL-MATHEMATICS,B-RECREATIONAL-MATHEMATICS,B-RECREATIONAL-MATHEMATICS,B-CALCULUS-AND-ANALYSIS,B-RECREATIONAL-MATHEMATICS,B-RECREATIONAL-MATHEMATICS,B-RECREATIONAL-MATHEMATICS,S-ALGEBRA,B-RECREATIONAL-MATHEMATICS,B-RECREATIONAL-MATHEMATICS,B-RECREATIONAL-MATHEMATICS,B-RECREATIONAL-MATHEMATICS,B-RECREATIONAL-MATHEMATICS,B-CALCULUS-AND-ANALYSIS,B-DISCRETE-MATHEMATICS,B-DISCRETE-MATHEMATICS,B-CALCULUS-AND-ANALYSIS,B-CALCULUS-AND-ANALYSIS,B-CALCULUS-AND-ANALYSIS,I-LOC,I-LOC,I-LOC,I-LOC,E-TOPOLOGY,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,S-DISCRETE-MATHEMATICS,B-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,E-TOPOLOGY,E-TOPOLOGY,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,I-LOC,I-LOC,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-LINEAR-SYSTEMS-OF-EQUATIONS,S-NUMBER-THEORY,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,E-TOPOLOGY,E-TOPOLOGY,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,B-MATRICES,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,S-NUMBER-THEORY,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,B-MATRICES,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,B-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-LINEAR-SYSTEMS-OF-EQUATIONS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS,I-PROBABILITY-AND-STATISTICS
Attention_Mask,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Actual_NER_Tags,IGN,O,B-MATRICES,IGN,IGN,E-MATRICES,IGN,O,O,S-CALCULUS-AND-ANALYSIS,O,O,O,B-MATRICES,E-MATRICES,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,B-LINEAR-ALGEBRA,E-LINEAR-ALGEBRA,O,O,O,O,O,O,O,O,O,O,O,O,S-ALGEBRA,O,O,O,O,O,O,O,O,O,O,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN,IGN
Actual_NER_idx,-100,0,43,-100,-100,45,-100,0,0,11,0,0,0,43,45,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,33,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100,-100


In [None]:
print(classification_report(actual, predicted))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                             precision    recall  f1-score   support

                    ALGEBRA       0.00      0.00      0.00       512
      CALCULUS-AND-ANALYSIS       0.00      0.00      0.00       512
               DETERMINANTS       0.00      0.00      0.00         0
       DISCRETE-MATHEMATICS       0.00      0.00      0.00         0
             LINEAR-ALGEBRA       0.00      0.00      0.00       512
LINEAR-SYSTEMS-OF-EQUATIONS       0.00      0.00      0.00         0
                        LOC       0.00      0.00      0.00         0
                   MATRICES       0.00      0.00      0.00      1024
 PROBABILITY-AND-STATISTICS       0.00      0.00      0.00         0

                  micro avg       0.00      0.00      0.00      2560
                  macro avg       0.00      0.00      0.00      2560
               weighted avg       0.00      0.00      0.00      2560



## Model Training

In [None]:
# Fixing input_ids shape in the dataset

def fix_shape(example):
  example['input_ids'] = example['input_ids'].squeeze(0).tolist()
  example['attention_mask'] = example['attention_mask'].squeeze(0).tolist()
  example['labels'] = example['labels'].squeeze(0).tolist()
  return example

dataset['train'] = dataset['train'].map(fix_shape)
dataset['validation'] = dataset['validation'].map(fix_shape)
dataset['test'] = dataset['test'].map(fix_shape)

Map:   0%|          | 0/1106 [00:00<?, ? examples/s]

Map:   0%|          | 0/474 [00:00<?, ? examples/s]

Map:   0%|          | 0/474 [00:00<?, ? examples/s]

In [None]:
# Update function so that it runs on Batch
def returns_actual_and_predictions(outputs, labels):
  # Convert outputs to a PyTorch tensor if it's a NumPy array
  outputs = torch.from_numpy(outputs) if isinstance(outputs, np.ndarray) else outputs

  prediction_idx = torch.argmax(outputs, dim=-1)
  prediction_batch_size, prediction_dim = prediction_idx.shape
  actual, predicted = [], []

  for i in range(prediction_batch_size):
    _labels, _preds = [], []
    for j in range(prediction_dim):
      label = labels[i][j].item()
      # ignore label of -100
      if label != -100:
        _labels.append(index2tag[label])
        _preds.append(index2tag[prediction_idx[i][j].item()])
    actual.append(_labels)
    predicted.append(_preds)
  return actual, predicted

def compute_metrics(eval_pred):
  actual_tags, predicted_tags = returns_actual_and_predictions(eval_pred.predictions, eval_pred.label_ids)
  metric_result_dict = metric.compute(predictions=predicted_tags, references=actual_tags)
  f1_score = metric_result_dict['overall_f1']
  return {'f1_score': f1_score}

In [None]:
num_epochs = 15
batch_size = 15
learning_rate = 3e-5
# logging_steps = len(dataset["train"]) // batch_size
model_name = f"distilbert-NER-LinearAlg-finetuned"

model_arguments = TrainingArguments(
    output_dir=model_name,
    log_level="error",
    logging_strategy="steps",
    logging_steps=50,
    weight_decay=0.01,
    num_train_epochs=num_epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    learning_rate=learning_rate,
    eval_strategy="epoch",
    save_strategy="epoch",
    disable_tqdm=False,
    save_steps=1000000,
    remove_unused_columns=False,
    push_to_hub=True,
    no_cuda=False
)

data_collator = DataCollatorForTokenClassification(
    tokenizer=tag_tokenizer,
    return_tensors="pt",
    padding='max_length',
    label_pad_token_id=-100  # Padding token for labels
)

In [None]:
trainer = Trainer(
    model_init=model_init, args=model_arguments, data_collator=data_collator, compute_metrics=compute_metrics,
    tokenizer=tag_tokenizer, train_dataset=dataset['train'], eval_dataset=dataset['validation']
    )

  trainer = Trainer(


In [None]:
torch.cuda.empty_cache()

In [None]:
# Run on a single batch of 5
train_dataloader = trainer.get_train_dataloader()

for batch in train_dataloader:
  # Check the batch structure
  print(f"input_ids shape {batch['input_ids'].shape}")
  print(f"attention_mask shape {batch['attention_mask'].shape}")
  print(f"labels shape {batch['labels'].shape}\n")
  input_ids = batch['input_ids'].to(device)
  attention_mask = batch['attention_mask'].to(device)
  labels = batch['labels'].to(device)

  # Get model outputs
  outputs = tag_model_custom(input_ids=input_ids, attention_mask=attention_mask)

  # Convert logits to CPU and numpy for debugging (if needed)
  logits_np = outputs.logits.detach().cpu().numpy()
  print(f"logits_np shape: {logits_np.shape}")
  break

input_ids shape torch.Size([15, 512])
attention_mask shape torch.Size([15, 512])
labels shape torch.Size([15, 512])

logits_np shape: (15, 512, 66)


In [None]:
# If cuda:
tag_model_custom.train()  # Set model to training mode

# Move class_weights to the same device as the model
class_weights = class_weights.to(device)
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
labels = batch['labels'].to(device)
tag_model_custom = tag_model_custom.to(device)
loss_fct = loss_fct.to(device)

print(f"Model device: {tag_model_custom.device}")
print(f"Class weights device: {class_weights.device}")
print(f"Input IDs device: {input_ids.device}")
print(f"Attention mask device: {attention_mask.device}")
print(f"Labels device: {labels.device}")

# Ensure inputs are on the same device
outputs = tag_model_custom(
    input_ids=batch['input_ids'].to(device),
    attention_mask=batch['attention_mask'].to(device),
    labels=batch['labels'].to(device)
)
loss = outputs.loss
print(f"logits shape: {outputs.logits.shape}")
print(f"loss: {loss}")
loss.backward()

optimizer = torch.optim.AdamW(tag_model_custom.parameters(), lr=learning_rate)
optimizer.step()
optimizer.zero_grad()

Model device: cuda:0
Class weights device: cuda:0
Input IDs device: cuda:0
Attention mask device: cuda:0
Labels device: cuda:0
logits shape: torch.Size([15, 512, 66])
loss: 0.0006478547002188861


In [None]:
# Check functioning of trainer directly

batch = next(iter(train_dataloader))  # Get a single batch
outputs = trainer.training_step(trainer.model, batch)

print(outputs)  # Loss for the batch

tensor(0.0005, device='cuda:0')


In [None]:
# forward propagation, loss computation, backpropagation, and optimization all handled by Hugging Face Trainer
trainer.train()
trainer.push_to_hub(commit_message="NER Training")

Epoch,Training Loss,Validation Loss,F1 Score
1,0.0036,0.107763,0.912111
2,0.0035,0.110529,0.912403
3,0.0038,0.112857,0.911533
4,0.0034,0.108516,0.914404
5,0.0026,0.104577,0.911956
6,0.0019,0.109054,0.914286
7,0.0016,0.105926,0.919967
8,0.0017,0.107003,0.9237
9,0.0011,0.109926,0.91721
10,0.0014,0.108986,0.91835


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


events.out.tfevents.1738693350.869aa05a19b5.2916.2:   0%|          | 0.00/18.8k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Heather-Driver/distilbert-NER-LinearAlg-finetuned/commit/771c63ee1cdaf8eed058de08df17b310d068f944', commit_message='NER Training', commit_description='', oid='771c63ee1cdaf8eed058de08df17b310d068f944', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Heather-Driver/distilbert-NER-LinearAlg-finetuned', endpoint='https://huggingface.co', repo_type='model', repo_id='Heather-Driver/distilbert-NER-LinearAlg-finetuned'), pr_revision=None, pr_num=None)

In [None]:
%cd '/content/drive/MyDrive/Colab Notebooks/Math_Graph'
tag_model_custom.save_pretrained("linalg_ner_model")
tag_tokenizer.save_pretrained("linalg_ner_model")

/content/drive/MyDrive/Colab Notebooks/Math_Graph


('linalg_ner_model/tokenizer_config.json',
 'linalg_ner_model/special_tokens_map.json',
 'linalg_ner_model/vocab.txt',
 'linalg_ner_model/added_tokens.json',
 'linalg_ner_model/tokenizer.json')