Setup & Dependencies

In [None]:
pip install nltk pandas sklearn flask python-dotenv twilio

Fraud Detection Engine

In [None]:
import re
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
import joblib

nltk.download('vader_lexicon')

class FraudDetector:
    def __init__(self):
        self.sia = SentimentIntensityAnalyzer()
        self.tax_evasion_keywords = ["pay cash", "no invoice", "avoid tax", "black money"]
        self.threat_keywords = ["pay or else", "leak your data", "harm you"]
        self.phishing_keywords = ["urgent action", "verify account", "click link"]
        
        # Load pre-trained model (train separately)
        self.model = joblib.load('fraud_model.pkl')  
        self.vectorizer = joblib.load('tfidf_vectorizer.pkl')

    def detect_threats(self, text):
        # Rule-based checks
        threats_found = any(keyword in text.lower() for keyword in self.threat_keywords)
        
        # ML-based classification
        text_vec = self.vectorizer.transform([text])
        ml_prediction = self.model.predict(text_vec)[0]
        
        return {
            "is_threat": threats_found or (ml_prediction == "threat"),
            "reason": "Threatening language detected" if threats_found else "ML model flagged as threat"
        }

    def detect_tax_fraud(self, text):
        tax_evasion = any(re.search(rf"\b{kw}\b", text.lower()) for kw in self.tax_evasion_keywords)
        return {
            "is_tax_fraud": tax_evasion,
            "keywords_found": [kw for kw in self.tax_evasion_keywords if kw in text.lower()]
        }

    def analyze_message(self, text):
        return {
            "threat_analysis": self.detect_threats(text),
            "tax_analysis": self.detect_tax_fraud(text),
            "sentiment": self.sia.polarity_scores(text)
        }