# LLM example Usecase: Event Classification

Task: Classify events into 3 categories, or None if the event is not relevant to the three categories.
Categories are the three finetuned models: Flood, Crop, Burn Scars

In [54]:
%pip install pandas accelerate transformers torch scikit-learn marvin



### Load Training data

In [55]:
import pandas as pd

# Specify the path to your TSV file
file_path = "../data/event_data.tsv"
df = pd.read_csv(file_path, sep='\t')
df.fillna("None", inplace=True)

print(df)

                                                 Text       Class
0               Flooding in New York on May 10, 2023.      Floods
1   Show images of burn scars in California from A...  Burn Scars
2      Find crop types in India observed last Monday.       Crops
3          June 15, 2023: Flooding in Miami, Florida.      Floods
4   Display burn scars in the Rockies as of July 2...  Burn Scars
..                                                ...         ...
71           Display the traditional foods of Mexico.        None
72           Provide the history of the Roman Empire.        None
73          Highlight the famous artists from France.        None
74          Show the transportation system of London.        None
75           Find the key historical events in Egypt.        None

[76 rows x 2 columns]


## Load the Encoder Model using Huggingface Lib

In [56]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, DataCollatorWithPadding

# Load your domain-specific encoder model (replace 'model_name' with your model's name)
model_name = 'nasa-impact/nasa-smd-ibm-distil-v0.1'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=4) # 3 event types + 1 'None' class
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at nasa-impact/nasa-smd-ibm-distil-v0.1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Encode the data and Labels into ML-ready format

In [57]:
## encode labels
from sklearn.preprocessing import LabelEncoder
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
import torch
from torch.utils.data import Dataset

# Create a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {
            'input_ids': self.encodings['input_ids'][idx],
            'attention_mask': self.encodings['attention_mask'][idx],
            'labels': self.labels[idx]
        }

label_encoder = LabelEncoder()
df['labels'] = label_encoder.fit_transform(df['Class'])
# print the label mapping
for index, item in enumerate(label_encoder.classes_):
    print(item, '->', index)

data = df
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42, stratify=data['labels'])
train_encodings = tokenizer(list(train_data['Text']), return_tensors='pt', padding=True, truncation=True, max_length=512, return_attention_mask=True)
test_encodings = tokenizer(list(test_data['Text']), return_tensors='pt', padding=True, truncation=True, max_length=512, return_attention_mask=True)
train_dataset = CustomDataset(train_encodings, torch.tensor(train_data['labels'].tolist()))
test_dataset = CustomDataset(test_encodings, torch.tensor(test_data['labels'].tolist()))


Burn Scars -> 0
Crops -> 1
Floods -> 2
None -> 3


## Create training and evaluation arguments for the Huggingface Trainer

In [58]:
training_args = TrainingArguments(
    per_device_train_batch_size=24,
    per_device_eval_batch_size=24,
    evaluation_strategy="steps",
    output_dir="./output",
    num_train_epochs=70,
    save_steps=50,
    save_total_limit=2,
    remove_unused_columns=True,
    logging_dir="./logs",
    optim="adamw_torch",
    learning_rate=5e-5,
    overwrite_output_dir=True,
    do_train=True,
    do_eval=True,
    logging_steps=10,
    seed=42,
    load_best_model_at_end=True,
    metric_for_best_model="eval_accuracy",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    data_collator=data_collator,
    compute_metrics=lambda p: classification_report(p.label_ids, p.predictions.argmax(-1), output_dict=True),
)

trainer.train()

results = trainer.evaluate()

# Print classification report
print("Classification Report:")
print(results)



Step,Training Loss,Validation Loss,0,1,2,3,Accuracy,Macro avg,Weighted avg
10,1.2811,1.054871,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 4}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 6}",1.0,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}"
20,1.0057,0.789825,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 4}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 6}",1.0,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}"
30,0.7702,0.598694,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 4}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 6}",1.0,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}"
40,0.6001,0.467529,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 4}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 6}",1.0,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}"
50,0.4702,0.369904,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 4}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 6}",1.0,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}"
60,0.3838,0.297585,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 4}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 6}",1.0,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}"
70,0.3115,0.242844,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 4}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 6}",1.0,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}"
80,0.2544,0.201942,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 4}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 6}",1.0,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}"
90,0.2155,0.170853,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 4}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 6}",1.0,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}"
100,0.1841,0.148151,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 4}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 6}",1.0,"{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}","{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}"


Trainer is attempting to log a value of "{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 4}" of type <class 'dict'> for key "eval/0" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}" of type <class 'dict'> for key "eval/1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}" of type <class 'dict'> for key "eval/2" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 6}" of type <class 'dict'> for key "eval/3" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we 

Trainer is attempting to log a value of "{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 4}" of type <class 'dict'> for key "eval/0" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}" of type <class 'dict'> for key "eval/1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}" of type <class 'dict'> for key "eval/2" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 6}" of type <class 'dict'> for key "eval/3" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we 

Classification Report:
{'eval_loss': 0.369903564453125, 'eval_0': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 4}, 'eval_1': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}, 'eval_2': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3}, 'eval_3': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 6}, 'eval_accuracy': 1.0, 'eval_macro avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}, 'eval_weighted avg': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 16}, 'eval_runtime': 0.024, 'eval_samples_per_second': 665.353, 'eval_steps_per_second': 41.585, 'epoch': 70.0}


## Evaluation on the test set

In [59]:
results

{'eval_loss': 0.369903564453125,
 'eval_0': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 4},
 'eval_1': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3},
 'eval_2': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 3},
 'eval_3': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 6},
 'eval_accuracy': 1.0,
 'eval_macro avg': {'precision': 1.0,
  'recall': 1.0,
  'f1-score': 1.0,
  'support': 16},
 'eval_weighted avg': {'precision': 1.0,
  'recall': 1.0,
  'f1-score': 1.0,
  'support': 16},
 'eval_runtime': 0.024,
 'eval_samples_per_second': 665.353,
 'eval_steps_per_second': 41.585,
 'epoch': 70.0}

## save model for later use

In [60]:
import numpy as np
# Save the trained model
model.save_pretrained('event_classifier')  # Replace 'your_model_directory' with your desired directory
tokenizer.save_pretrained('event_tokenizer')  # Save the tokenizer as well
np.save('../data/label_encoder_classes.npy', label_encoder.classes_)


('event_tokenizer/tokenizer_config.json',
 'event_tokenizer/special_tokens_map.json',
 'event_tokenizer/vocab.json',
 'event_tokenizer/merges.txt',
 'event_tokenizer/added_tokens.json',
 'event_tokenizer/tokenizer.json')

## Predict using the saved model

In [66]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.preprocessing import LabelEncoder
import torch
import pandas as pd

# Load the saved model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained('event_classifier')  # Load from the directory where you saved it
tokenizer = AutoTokenizer.from_pretrained('event_tokenizer')

# Get predictions
with torch.no_grad():
    outputs = model(**test_encodings)

# Extract predicted class labels
predicted_labels = torch.argmax(outputs.logits, dim=-1)

# print the text, true and predicted labels
for i in range(len(test_data)):
    print(test_data['Text'].iloc[i])
    print('True:', test_data['Class'].iloc[i])
    print('Predicted:', label_encoder.classes_[predicted_labels[i]])
    print('')

Find the major exports of Germany.
True: None
Predicted: None

Show images of burn scars in Wyoming as of the first week of March 2024.
True: Burn Scars
Predicted: Burn Scars

Show images of burn scars in Utah as of the first week of May 2024.
True: Burn Scars
Predicted: Burn Scars

November 16, 2023: Flooding in Venice, Italy.
True: Floods
Predicted: Floods

Find the major rivers in South America.
True: None
Predicted: None

Provide the currency used in Russia.
True: None
Predicted: None

Can you show crop types in Brazil observed last Thursday?
True: Crops
Predicted: Crops

June 1, 2023: Flooding in Los Angeles, California.
True: Floods
Predicted: Floods

Find current crop types in Argentina as observed today.
True: Crops
Predicted: Crops

Provide the latest imagery of flooding in Moscow, Russia, from last Monday.
True: Floods
Predicted: Floods

Find the coordinates of Mount Everest.
True: None
Predicted: None

Show images of burn scars in Arizona as of the last week of December 2023