In [1]:
# CLEAR CACHE 
import gc
import torch
gc.collect()

torch.cuda.empty_cache()

In [2]:
import time
import pandas as pd
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import torch
start_time = time.time()
# Load the dataset once
file_path = 'eris_cleaned_no_dash.csv'  # Update with the correct file path
df = pd.read_csv(file_path)

# Define the mapping dictionaries for each label
mappings = {
    "context": {label: idx for idx, label in enumerate(df['context'].unique())},
    "motive": {label: idx for idx, label in enumerate(df['motive'].unique())},
    "consequence": {label: idx for idx, label in enumerate(df['longtermconsequence'].unique())},
    "level": {label: idx for idx, label in enumerate(df['level'].unique())},
}

# Model paths for each label
model_paths = {
    "context": './Knowledge_Extraction/saved_model_Roberta_large_context/',
    "motive": './Knowledge_Extraction/saved_model_Roberta_large_motive/',
    "consequence": './Knowledge_Extraction/saved_model_Roberta_large_consequence/',
    "level": './Knowledge_Extraction/saved_model_Roberta_large_level/',
}

# General function to classify new sentences for any label
def classify_new_sentences(sentences, model_path, label_name):
    # Load the tokenizer and model
    tokenizer = RobertaTokenizer.from_pretrained(model_path)
    model = RobertaForSequenceClassification.from_pretrained(model_path)

    # Tokenize the new sentences
    encodings = tokenizer(sentences, truncation=True, padding=True, return_tensors='pt')
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    encodings = {key: val.to(device) for key, val in encodings.items()}
    
    # Make predictions
    model.eval()
    with torch.no_grad():
        outputs = model(**encodings)
        logits = outputs.logits
    
    # Convert logits to probabilities and then to labels
    probs = torch.nn.functional.softmax(logits, dim=1)
    pred_labels = torch.argmax(probs, dim=1).cpu().numpy()
    
    # Map the predicted labels back to the original labels
    label_mapping = mappings[label_name]
    pred_classifications = [list(label_mapping.keys())[label] for label in pred_labels]
    
    return pred_classifications
# Example usage for all labels
def classify_sentences_across_models(sentences):
    predictions = {}
    
    for label_name, model_path in model_paths.items():
        pred_labels = classify_new_sentences(sentences, model_path, label_name)
        predictions[label_name] = pred_labels
    
    return predictions

# Function to classify sentences from a CSV file and print results
def classify_sentences_from_csv(csv_file_path):
    # Load the CSV file containing the new sentences with proper encoding handling
    input_df = pd.read_csv(csv_file_path,)  # or use 'latin1'

    # Ensure the column 'text' exists
    if 'text' not in input_df.columns:
        raise ValueError("CSV file must contain a 'text' column.")
    
    # Extract sentences
    sentences = input_df['text'].tolist()

    # Classify each sentence across all models
    predictions = classify_sentences_across_models(sentences)
  
    # Print predictions in a simpler formatnew_se
    for i, sentence in enumerate(sentences):
        print(f"Sentence: {sentence}")
        for label_name, preds in predictions.items():
            print(f"{label_name.capitalize()}: {preds[i]}")
        print("-" * 50)

    # Optionally, save the results back to a CSV file
    output_df = input_df.copy()
    for label_name, preds in predictions.items():
        output_df[label_name] = preds
    
    output_df.to_csv('classified_sentences.csv', index=False)
    print("Results have been saved to 'classified_sentences.csv'.")

# Usage example
csv_file_path = 'New_sentences.csv'  # Update with the path to your CSV file
classify_sentences_from_csv(csv_file_path)
end_time = time.time()
elapsed_time = end_time - start_time
hours, rem = divmod(elapsed_time, 3600)
minutes, seconds = divmod(rem, 60)
print(f"Elapsed time: {int(hours)} hours, {int(minutes)} minutes, {seconds:.2f} seconds")

Sentence: A lover stabs their partner in a fit of jealousy after discovering infidelity.
Context: conspiracy
Motive: emotional
Consequence: death
Level: interpersonal
--------------------------------------------------
Sentence: Fanatics burn down a rival sect’s temple with the priests inside.
Context: religious
Motive: political
Consequence: destruction/devastation
Level: intrasocial
--------------------------------------------------
Sentence: A gang of thieves kills a shop owner during a robbery for gold and goods.
Context: sack
Motive: economical
Consequence: plunder
Level: intersocial
--------------------------------------------------
Sentence: Ramses the second unleashed the full might of the chariots on Mageddo. This was recorded as the largest chariot battle in the bronze age.
Context: war/military campaign
Motive: tactical/strategical
Consequence: victory
Level: intersocial
--------------------------------------------------
Sentence: Driven by the need to secure valuable trade r