In [1]:
%pip install transformers datasets gradio scikit-learn pandas openai-whisper torch


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import pandas as pd
import torch
import whisper
import os
os.environ["WANDB_DISABLED"] = "true"
from datasets import Dataset
from transformers import (
    BertTokenizerFast,
    BertForSequenceClassification,
    Trainer,
    TrainingArguments,
    TextClassificationPipeline
)
from sklearn.metrics import accuracy_score, f1_score
import gradio as gr



  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# STEP 1: Load and preprocess data
df = pd.read_csv('./fraud_calls_data.csv', header=None)
df.columns = ['label', 'call_text']
df = df.dropna(subset=['call_text', 'label'])
df['label'] = df['label'].map({'normal': 0, 'fraud': 1})



In [4]:
# STEP 2: Tokenization and Hugging Face dataset
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
dataset = Dataset.from_pandas(df[['call_text', 'label']])

def tokenize_function(batch):
    return tokenizer(batch['call_text'], truncation=True, padding=True, max_length=128)

dataset = dataset.map(tokenize_function, batched=True)
dataset = dataset.rename_column("label", "labels")
dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

Map: 100%|██████████| 5925/5925 [00:00<00:00, 7818.62 examples/s]


In [5]:

# STEP 3: Split dataset
data_split = dataset.train_test_split(test_size=0.2)
train_dataset = data_split["train"]
eval_dataset = data_split["test"]



In [6]:
pip install --upgrade transformers


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
# STEP 4: Load and train BERT
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.tensor(logits).argmax(dim=-1)
    return {
        "accuracy": accuracy_score(labels, predictions),
        "f1": f1_score(labels, predictions)
    }

training_args = TrainingArguments(
    output_dir="./bert-fraud-checkpoint",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=4,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    report_to=[]  # Disable wandb
)

from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    data_collator=data_collator
)

trainer.train()



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.054011,0.987342,0.933333
2,0.078300,0.035979,0.990717,0.953586
3,0.078300,0.044573,0.991561,0.957265
4,0.015000,0.049031,0.991561,0.957265


TrainOutput(global_step=1188, training_loss=0.04054746734172808, metrics={'train_runtime': 12828.4493, 'train_samples_per_second': 1.478, 'train_steps_per_second': 0.093, 'total_flos': 1247146402406400.0, 'train_loss': 0.04054746734172808, 'epoch': 4.0})

In [8]:
# STEP 5: Create a text classification pipeline
# Load the best model from the checkpoints
model_path = trainer.state.best_model_checkpoint
if model_path is None:
    # If no best model checkpoint is found (e.g., early stopping not used or only one epoch),
    # use the last checkpoint or the initial model if only one epoch was run.
    # For simplicity, let's assume the model object 'model' is already the trained one
    # after trainer.train() completes. If you need to load a specific epoch's model,
    # you would typically load from the output_dir.
    print("No best model checkpoint found, using the model trained in the last epoch.")
    model_to_use = model
else:
    print(f"Loading best model from {model_path}")
    model_to_use = BertForSequenceClassification.from_pretrained(model_path)


pipeline = TextClassificationPipeline(
    model=model_to_use,
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1 # Use GPU if available
)

# Example usage:
example_texts = [
    "Hello, this is a call from your bank regarding a suspicious transaction.",
    "Hi, just calling to catch up.",
    "You have won a prize! Please provide your bank details to claim it."
]

print("\nClassifying example texts:")
for text in example_texts:
    result = pipeline(text)
    predicted_label = "fraud" if result[0]['label'] == 'LABEL_1' else "normal"
    score = result[0]['score']
    print(f"Text: '{text}' -> Predicted: {predicted_label} (Score: {score:.4f})")

Loading best model from ./bert-fraud-checkpoint\checkpoint-891


Device set to use cpu



Classifying example texts:
Text: 'Hello, this is a call from your bank regarding a suspicious transaction.' -> Predicted: fraud (Score: 0.9982)
Text: 'Hi, just calling to catch up.' -> Predicted: normal (Score: 0.9998)
Text: 'You have won a prize! Please provide your bank details to claim it.' -> Predicted: fraud (Score: 0.9980)


In [9]:
# STEP 5: Prediction pipeline
pipe = TextClassificationPipeline(
    model=model,
    tokenizer=tokenizer,
    return_all_scores=True,
    device=0 if torch.cuda.is_available() else -1
)

Device set to use cpu


In [10]:

# STEP 6: Whisper model for transcription
whisper_model = whisper.load_model("base")

# STEP 7: Risk scoring logic
def calculate_risk_score(prob):
    if prob >= 0.85:
        return "Critical"
    elif prob >= 0.65:
        return "High"
    elif prob >= 0.35:
        return "Medium"
    else:
        return "Low"


100%|███████████████████████████████████████| 139M/139M [00:34<00:00, 4.15MiB/s]


In [11]:
# STEP 8: Fusion layer for prediction
def predict_fraud(text, trust_score):
    try:
        result = pipe(text)[0]
        model_prob = next((item['score'] for item in result if item['label'] == 'LABEL_1'), 0.0)
        alpha = 0.7
        fused_score = alpha * model_prob + (1 - alpha) * (1 - trust_score)
        label = "Fraud" if fused_score >= 0.5 else "Normal"
        risk = calculate_risk_score(fused_score)
        return label, f"{model_prob:.4f}", f"{fused_score:.4f}", risk
    except Exception as e:
        return "Error", "Error", "Error", str(e)



In [12]:
# STEP 9: Voice + Trust input wrapper
def transcribe_and_predict(audio_file, trust_score):
    try:
        result = whisper_model.transcribe(audio_file)
        transcript = result['text']
        prediction = predict_fraud(transcript, trust_score)
        return prediction + (transcript,)
    except Exception as e:
        return "Error", "Error", "Error", str(e), ""


In [13]:
# STEP 10: Gradio UI
def build_interface():
    text_input_tab = gr.Interface(
        fn=predict_fraud,
        inputs=[
            gr.Textbox(label="Enter Call Transcript"),
            gr.Slider(minimum=0.0, maximum=1.0, value=0.5, step=0.01, label="User Trust Score")
        ],
        outputs=[
            gr.Label(label="Prediction"),
            gr.Label(label="Model Fraud Probability"),
            gr.Label(label="Fused Risk Score"),
            gr.Label(label="Risk Level")
        ],
        title="Text-based Fraud Detection",
        description="Enter a call transcript and trust score to check fraud."
    )

    voice_input_tab = gr.Interface(
        fn=transcribe_and_predict,
        inputs=[
            gr.Audio(type="filepath", label="Upload Call Recording (wav/mp3)"),
            gr.Slider(minimum=0.0, maximum=1.0, value=0.5, step=0.01, label="User Trust Score")
        ],
        outputs=[
            gr.Label(label="Prediction"),
            gr.Label(label="Model Fraud Probability"),
            gr.Label(label="Fused Risk Score"),
            gr.Label(label="Risk Level"),
            gr.Textbox(label="Transcribed Text")
        ],
        title="Voice-based Fraud Detection",
        description="Upload a call recording and get fraud detection with risk analysis."
    )

    demo = gr.TabbedInterface([text_input_tab, voice_input_tab], ["Text Input", "Voice Input"])
    return demo


In [14]:

# Launch the Gradio app
build_interface().launch()


* Running on local URL:  http://127.0.0.1:7860


ValueError: When localhost is not accessible, a shareable link must be created. Please set share=True or check your proxy settings to allow access to localhost.

In [None]:
final_model_path = "./backend/bert_call_model"
model_to_use.save_pretrained(final_model_path)
tokenizer.save_pretrained(final_model_path)


NameError: name 'model_to_use' is not defined

In [15]:
from transformers import BertForSequenceClassification

# Get best model from checkpoint, or fallback to trained model
model_path = trainer.state.best_model_checkpoint

if model_path is None:
    print("No checkpoint found, using model from last epoch.")
    model_to_use = model
else:
    print(f"Loading model from best checkpoint: {model_path}")
    model_to_use = BertForSequenceClassification.from_pretrained(model_path)

# Define final save path
final_model_path = "./backend/bert_call_model"

# Save model and tokenizer
model_to_use.save_pretrained(final_model_path)
tokenizer.save_pretrained(final_model_path)

print("✅ Model and tokenizer saved to:", final_model_path)


Loading model from best checkpoint: ./bert-fraud-checkpoint\checkpoint-891
✅ Model and tokenizer saved to: ./backend/bert_call_model
