In [None]:
%pip install transformers torch torchtext scikit-learn pandas
%pip install torch==2.0.1 torchtext==0.15.2
!pip install tensorflow
%pip install transformers torch scikit-learn
!pip install datasets





In [None]:
import torch
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
from sklearn.metrics import accuracy_score


sentences = [
    "I received a call from someone asking for my bank account details.",  # fraud
    "You've won a lottery! Send us your bank details to claim the prize.",  # fraud
    "Please update your account information to prevent deactivation.",  # fraud
    "Hey, just wanted to check in on our meeting next week.",  # not fraud
    "I need help with my order; it didn't arrive on time.",  # not fraud
    "Your account has been compromised; please send your password to fix it.",  # fraud
    "Urgent: Your bank account is under review. Kindly verify your personal information immediately to avoid restrictions.",  # fraud
    "Warning: Unauthorized login attempts detected on your account. Please confirm your identity to secure your account.",  # fraud
    "We noticed suspicious activity in your account. Please reply with your account number to verify your identity.",  # fraud
    "You have been selected to receive a special reward. Please send your payment details to claim your prize.",  # fraud
    "Your subscription is about to expire. To prevent service interruption, please update your payment information as soon as possible.",  # not fraud
    "Important: Your account has been locked due to multiple failed login attempts. Click here to reset your password.",  # fraud
    "We are conducting a security check. Kindly provide your social security number and date of birth to verify your account.",  # fraud
    "Exclusive Offer: You've won a free vacation! Please provide your payment details to confirm your booking.",  # fraud
    "We need to verify your identity. Please send a copy of your ID and recent utility bill to proceed.",  # fraud
    "Congratulations! You've won a gift card worth $500. Please reply with your email address and payment info to claim it.",  # fraud
    "Immediate action required: Your account has been flagged for suspicious activity. Please log in and verify your account details immediately.",  # fraud

]

labels = [1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,1,1,1,1]


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(sentences, labels, test_size=0.2, random_state=42)

# Load the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize the input data
train_encodings = tokenizer(X_train, truncation=True, padding=True, max_length=32)
test_encodings = tokenizer(X_test, truncation=True, padding=True, max_length=32)

# Create a Dataset object for the Hugging Face Trainer API
train_dataset = Dataset.from_dict({'input_ids': train_encodings['input_ids'], 'attention_mask': train_encodings['attention_mask'], 'label': y_train})
test_dataset = Dataset.from_dict({'input_ids': test_encodings['input_ids'], 'attention_mask': test_encodings['attention_mask'], 'label': y_test})

# Load the pre-trained BERT model for sequence classification
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)



# Define the compute_metrics function to calculate accuracy
def compute_metrics(p):
    predictions, labels = p
    # Convert numpy.ndarray to torch.Tensor
    predictions = torch.tensor(predictions)
    # Apply argmax to get the predicted labels
    preds = torch.argmax(predictions, dim=-1)
    return {'accuracy': accuracy_score(labels, preds)}


# Define training arguments, disable wandb
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=10,              # number of training epochs
    per_device_train_batch_size=4,   # batch size for training
    per_device_eval_batch_size=4,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',
    # learning_rate=5e-5,# directory for storing logs
    report_to="none",                # Disable Wandb
)

# Initialize the Trainer
trainer = Trainer(
    model=model,                         # the model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=test_dataset,           # evaluation dataset
    compute_metrics=compute_metrics      # pass the compute_metrics function
)

# Train the model
trainer.train()

# Evaluate the model
results = trainer.evaluate()
print(f"Test Accuracy: {results['eval_accuracy']:.2f}")

# Test with a new sentence
new_sentence = "Hey, just wanted to check in on our meeting next week."
inputs = tokenizer(new_sentence, return_tensors="pt", truncation=True, padding=True, max_length=32)
output = model(**inputs)
prediction = torch.argmax(output.logits, dim=-1).item()
print(f"Fraud prediction for new sentence: {'fraud' if prediction == 1 else 'not fraud'}")
