In [5]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2ForSequenceClassification, GPT2Tokenizer
import torch
import time


In [6]:
timing_dict={}
prediction_dict = {}

# Define the batch size
batch_size = 32  # You can adjust this based on your system's memory capacity

def prediction_timings(input, load_model):
 
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    if load_model == 'tuned_gpt_model':
        tokenizer = GPT2Tokenizer.from_pretrained('./models/'+load_model)
        model = GPT2ForSequenceClassification.from_pretrained('./models/'+load_model)
    else:
        tokenizer = AutoTokenizer.from_pretrained('./models/'+load_model)
        model = AutoModelForSequenceClassification.from_pretrained('./models/'+load_model)


    start_time = time.time()
    
    # Tokenize the input data
    inputs = tokenizer(input, return_tensors='pt', padding=True, truncation=True)

    # Make predictions
    with torch.no_grad():
        outputs = model(**inputs)

    # Get the predicted class
    predictions = torch.argmax(outputs.logits, dim=-1)
    # Map the predictions to 'normal' or 'fraud'
    label_map = {1: 'normal', 0: 'fraud'}
    predicted_labels = [label_map[pred.item()] for pred in predictions]

    prediction_dict[load_model] = predicted_labels
    end_time = time.time()
    timing_dict[load_model] = end_time - start_time

    return prediction_dict, timing_dict


In [None]:
def make_predictions(model,message):

    # Convert summaries to a list
    #      messages = df_test.message.tolist()

    # Define the batch size
    batch_size = 32  # You can adjust this based on your system's memory capacity

    # Initialize an empty list to store the model outputs
    all_outputs = []
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    
    tokenizer = AutoTokenizer.from_pretrained('./models/'+model)
    model = AutoModelForSequenceClassification.from_pretrained('./models/'+model)

    #   # Process the sentences in batches
    #   for i in range(0, len(messages), batch_size):
    # Get the batch of sentences
    # batch_messages = messages[i:i + batch_size]

    # Tokenize the batch
    inputs = tokenizer(message, return_tensors="pt", padding=True, truncation=True, max_length=512)

    # Move tensors to the device where the model is (e.g., GPU or CPU)
    inputs = {k: v.to('cuda' if torch.cuda.is_available() else 'cpu') for k, v in inputs.items()}

    # Perform inference and store the logits
    with torch.no_grad():
        outputs = model(**inputs)
        all_outputs.append(outputs['logits'])

    final_outputs = torch.cat(all_outputs, dim=0)

    outputs=final_outputs.argmax(axis=1).cpu().numpy()
    
    # Map the predictions to 'normal' or 'fraud'
    label_map = {1: 'normal', 0: 'fraud'}
    predicted_labels = [label_map[pred.item()] for pred in outputs]

    return predicted_labels


In [None]:
print(prediction_timings("Hi John, this is John from the Fraud Prevention Department at your bank. We’ve detected some suspicious activity on your account and need to verify some information with you immediately.", "models/tuned_llama_model"))

In [7]:
new_data = ["Hi John, this is John from the Fraud Prevention Department at your bank. We’ve detected some suspicious activity on your account and need to verify some information with you immediately."]
models = ['tuned_gpt_model','tuned_llama_model', 'tuned_bert_model', 'tuned_roberta_model', 'tuned_distilbert_model']

for model in models:
    predition, timings = prediction_timings(new_data, model)

    
print(predition)
print(timings)


{'tuned_gpt_model': ['fraud']}


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 4/4 [01:08<00:00, 17.15s/it]
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Meta-Llama-3-8B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


{'tuned_gpt_model': ['fraud'], 'tuned_llama_model': ['fraud']}
{'tuned_gpt_model': ['fraud'], 'tuned_llama_model': ['fraud'], 'tuned_bert_model': ['fraud']}
{'tuned_gpt_model': ['fraud'], 'tuned_llama_model': ['fraud'], 'tuned_bert_model': ['fraud'], 'tuned_roberta_model': ['fraud']}
{'tuned_gpt_model': ['fraud'], 'tuned_llama_model': ['fraud'], 'tuned_bert_model': ['fraud'], 'tuned_roberta_model': ['fraud'], 'tuned_distilbert_model': ['fraud']}
{'tuned_gpt_model': 0.1622333526611328, 'tuned_llama_model': 108.24846482276917, 'tuned_bert_model': 0.09799957275390625, 'tuned_roberta_model': 0.06200051307678223, 'tuned_distilbert_model': 0.029999494552612305}
