In [1]:
from google.colab import files
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

Saving complete_complaints_dataset.csv to complete_complaints_dataset.csv


In [None]:
import pandas as pd

file_name = "/content/complete_complaints_dataset.csv"
df = pd.read_csv(file_name)

# unique categories counts
category_counts = df["Category"].value_counts()
print(category_counts)


Category
Business Frauds/Email Takeover            10
Cryptocurrency Related Fraud              10
Cyber Bullying/Stalking/Sexting           10
Debit/Credit Card Fraud/SIM Swap Fraud    10
E-Mail Phishing                           10
E-Wallet Related Fraud                    10
Email Hacking                             10
Fake/Impersonating Profile                10
Fraud Call/Vishing                        10
Impersonating Email                       10
Internet Banking Related Fraud            10
Intimidating Email                        10
Online Gambling                           10
Online Job Fraud                          10
Online Trafficking                        10
Profile Hacking                           10
Provocative Speech                        10
Ransomware                                10
Unauthorized Access/Data Breach           10
Website Related/Defacement                10
Name: count, dtype: int64


In [3]:
import pandas as pd
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
file_name = "/content/complete_complaints_dataset.csv"
df = pd.read_csv(file_name)

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

def preprocess_text(text):
    doc = nlp(str(text).lower())
    return " ".join([token.lemma_ for token in doc if not token.is_stop and not token.is_punct])

# Apply preprocessing
df["Processed_Complaint"] = df["Complaint Description"].apply(preprocess_text)

# Prepare data for training
X = df["Processed_Complaint"]
y = df["Category"]  # Assuming "Category" is the target label

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

# Create a pipeline with TF-IDF and Logistic Regression
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('classifier', LogisticRegression(multi_class="multinomial", solver="lbfgs"))
])

# Train the model
pipeline.fit(X_train, y_train)

# Evaluate the model
y_pred = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
print("Classification Report:\n", classification_report(y_test, y_pred))

# Function to predict category
def predict_category(complaint_text):
    processed_text = preprocess_text(complaint_text)
    predicted_category = pipeline.predict([processed_text])[0]
    return predicted_category

# Example usage
sample_complaints = [
    "I clicked on a link in an email and my bank account was emptied."
]

for complaint in sample_complaints:
    predicted_category = predict_category(complaint)
    print(f"Complaint: {complaint}\nPredicted Category: {predicted_category}\n")


Model Accuracy: 1.00
Classification Report:
                                         precision    recall  f1-score   support

        Business Frauds/Email Takeover       1.00      1.00      1.00         1
          Cryptocurrency Related Fraud       1.00      1.00      1.00         1
       Cyber Bullying/Stalking/Sexting       1.00      1.00      1.00         1
Debit/Credit Card Fraud/SIM Swap Fraud       1.00      1.00      1.00         1
                       E-Mail Phishing       1.00      1.00      1.00         1
                E-Wallet Related Fraud       1.00      1.00      1.00         1
                         Email Hacking       1.00      1.00      1.00         1
            Fake/Impersonating Profile       1.00      1.00      1.00         1
                    Fraud Call/Vishing       1.00      1.00      1.00         1
                   Impersonating Email       1.00      1.00      1.00         1
        Internet Banking Related Fraud       1.00      1.00      1.00     



In [None]:
sample_complaints = [
    "I clicked on a link in an email and my bank account was emptied.",
    "A fake social media profile is using my name and pictures without my permission.",
    "I was scammed by a website claiming to sell cryptocurrency but never received my purchase.",
    "I received an email threatening to release my personal information if I didn't pay money.",
]

for complaint in sample_complaints:
    predicted_category, predicted_sub_category = predict_category(complaint)
    print(f"Complaint: {complaint}\nPredicted Category: {predicted_category}\n")


Complaint: I clicked on a link in an email and my bank account was emptied.
Predicted Category: Email Hacking

Complaint: A fake social media profile is using my name and pictures without my permission.
Predicted Category: Fake/Impersonating Profile

Complaint: I was scammed by a website claiming to sell cryptocurrency but never received my purchase.
Predicted Category: Cryptocurrency Related Fraud

Complaint: I received an email threatening to release my personal information if I didn't pay money.
Predicted Category: Email Hacking



In [None]:
# Function to predict category 
def predict_category(complaint_text):
    """Predict the category and sub-category of a given complaint."""
    processed_text = preprocess_text(complaint_text)
    predicted_category = pipeline.predict([processed_text])[0]
    sub_category = df[df['Category'] == predicted_category]['Sub-Category'].mode()[0] if 'Sub-Category' in df.columns else "Unknown"
    return predicted_category, sub_category

# Function to generate a detailed complaint report
def generate_report(complaint_text):
    """Generate a structured report for a given cybercrime complaint."""
    category, sub_category = predict_category(complaint_text)
    report = f"""
    Cybercrime Complaint Report
    --------------------------------
    Complaint: {complaint_text}
    Predicted Category: {category}

    Incident Summary:
    The complaint suggests a case of {category}, particularly {sub_category}. Such cases can lead to serious financial or personal harm.

    Possible Impact:
    - Financial loss
    - Unauthorized access to personal information
    - Legal implications

    Recommended Actions:
    ✅ Report the incident to the relevant authorities.
    ✅ Change passwords and enable two-factor authentication.
    ✅ Contact the affected service provider to seek resolution.
    ✅ Stay cautious and educate others about similar threats.

    Additional Information Needed:
    ❓ Were any personal details shared?
    ❓ Have there been similar past incidents?
    ❓ Was any monetary loss involved?

    Manual Complaint Submission:
    If the victim wishes to file a manual complaint, they should visit the cybercrime portal or nearest police station with all relevant details, including screenshots, transaction records, and communications.
    """
    return report

# Example usage
sample_complaints =[str(input("Enter your complaint"))]
for complaint in sample_complaints:
    report = generate_report(complaint)
    print(report)


Enter your complaint I received an email threatening to release my personal information if I didn't pay money.

    Cybercrime Complaint Report
    --------------------------------
    Complaint:  I received an email threatening to release my personal information if I didn't pay money.
    Predicted Category: Email Hacking

    Incident Summary:
    The complaint suggests a case of Email Hacking, particularly Unknown. Such cases can lead to serious financial or personal harm.

    Possible Impact:
    - Financial loss
    - Unauthorized access to personal information
    - Legal implications

    Recommended Actions:
    ✅ Report the incident to the relevant authorities.
    ✅ Change passwords and enable two-factor authentication.
    ✅ Contact the affected service provider to seek resolution.
    ✅ Stay cautious and educate others about similar threats.

    Additional Information Needed:
    ❓ Were any personal details shared?
    ❓ Have there been similar past incidents?
    ❓ Was a

In [5]:
!pip install fpdf

Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: fpdf
  Building wheel for fpdf (setup.py) ... [?25l[?25hdone
  Created wheel for fpdf: filename=fpdf-1.7.2-py2.py3-none-any.whl size=40704 sha256=c8af735f4aeb42530255c399b18745595ea741215f1ccd138e49558fdd9fc763
  Stored in directory: /root/.cache/pip/wheels/65/4f/66/bbda9866da446a72e206d6484cd97381cbc7859a7068541c36
Successfully built fpdf
Installing collected packages: fpdf
Successfully installed fpdf-1.7.2


In [None]:

from fpdf import FPDF

def predict_category(complaint_text):
    """Predict the category of a given complaint."""
    processed_text = preprocess_text(complaint_text)
    predicted_category = pipeline.predict([processed_text])[0]
    return predicted_category

def generate_report(complaint_text):
    """Generate a structured report for a given cybercrime complaint."""
    category = predict_category(complaint_text)
    report = f"""
    Complaint: {complaint_text}
    Predicted Category: {category}

    Incident Summary:
    The complaint suggests a case of {category}. Such cases can lead to serious financial or personal harm.

    Possible Impact:
    - Financial loss
    - Unauthorized access to personal information
    - Legal implications

    Recommended Actions:
    - Report the incident to the relevant authorities.
    - Change passwords and enable two-factor authentication.
    - Contact the affected service provider to seek resolution.
    - Stay cautious and educate others about similar threats.

    Additional Information Needed:
    - Were any personal details shared?
    - Have there been similar past incidents?
    - Was any monetary loss involved?

    Manual Complaint Submission:
    If the victim wishes to file a manual complaint, they should visit the cybercrime portal or nearest police station with all relevant details, including screenshots, transaction records, and communications.
    """
    return report

# Function to generate and save a well-formatted PDF report
def save_report_as_pdf(complaint_text, filename):
    """Generate and save the complaint report as a well-formatted PDF."""
    report_text = generate_report(complaint_text)
    report_text = report_text.replace("✅", "[✔]").replace("❓", "[?]")  

    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()

    # Title
    pdf.set_font("Arial", "B", 16)
    pdf.cell(200, 10, "Cybercrime Complaint Report", ln=True, align='C')
    pdf.ln(10)

    # Content
    pdf.set_font("Arial", size=11)
    for line in report_text.split("\n"):
        pdf.multi_cell(0, 7, line.encode('latin-1', 'ignore').decode('latin-1'))
        pdf.ln(2) 

    pdf.output(filename)
    print(f"Report saved as {filename}")

sample_complaints = [
    "I clicked on a link in an email and my bank account was emptied."
]

for i, complaint in enumerate(sample_complaints):
    filename = f"cybercrime_report_{i+1}.pdf"
    save_report_as_pdf(complaint, filename)


Report saved as cybercrime_report_1.pdf
