In [5]:
import pandas as pd
import random
import numpy as np
from faker import Faker

# Initialize Faker to generate fake user data
fake = Faker()
Faker.seed(42)
random.seed(42)
np.random.seed(42)

# Define transaction types
transaction_types = ["Online Purchase", "Bank Transfer", "ATM Withdrawal", "POS Purchase", "Crypto Exchange"]

# Define locations (major financial hubs)
locations = ["New York", "Los Angeles", "Miami", "Chicago", "San Francisco", "Houston", "Boston", "Seattle"]

# Generate dataset
num_records = 10_000  # Adjust this for dataset size
data = []

for i in range(1, num_records + 1):
    user_id = random.randint(1000, 5000)  # Unique users
    transaction_type = random.choice(transaction_types)
    amount = round(random.uniform(10, 10000), 2)  # Amount between $10 - $10,000
    timestamp = fake.date_time_between(start_date="-2y", end_date="now")  # Last 2 years
    location = random.choice(locations)

    # Fraud logic: Certain conditions increase fraud probability
    if (
        transaction_type in ["Bank Transfer", "Crypto Exchange"] and amount > 4000
    ) or (location in ["Miami", "Los Angeles"] and random.random() < 0.05):
        is_fraud = 1  # Fraudulent Transaction
    else:
        is_fraud = 0  # Legit Transaction

    data.append([i, user_id, transaction_type, amount, timestamp, location, is_fraud])

# Convert to DataFrame
df = pd.DataFrame(data, columns=["Transaction_ID", "User_ID", "Transaction_Type", "Amount", "Timestamp", "Location", "Is_Fraud"])

# Save to CSV
df.to_csv("financial_transactions.csv", index=False)

print("✅ Fraud Detection dataset successfully created and saved as 'financial_transactions.csv'!")


✅ Fraud Detection dataset successfully created and saved as 'financial_transactions.csv'!


In [4]:
!pip install faker


Collecting faker
  Downloading Faker-35.0.0-py3-none-any.whl.metadata (15 kB)
Downloading Faker-35.0.0-py3-none-any.whl (1.9 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.9 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.2/1.9 MB[0m [31m6.3 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━[0m [32m1.7/1.9 MB[0m [31m24.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faker
Successfully installed faker-35.0.0


In [6]:
import sqlite3

# Connect to SQLite (creates a database file)
conn = sqlite3.connect("fraud_detection.db")
cursor = conn.cursor()

# Load dataset into SQLite
df.to_sql("transactions", conn, if_exists="replace", index=False)

print("✅ Dataset successfully loaded into SQLite!")


✅ Dataset successfully loaded into SQLite!


In [7]:
query = "SELECT * FROM transactions LIMIT 5;"
pd.read_sql(query, conn)


Unnamed: 0,Transaction_ID,User_ID,Transaction_Type,Amount,Timestamp,Location,Is_Fraud
0,1,3619,Online Purchase,259.86,2024-05-12 02:57:22.340521,San Francisco,0
1,2,2003,Bank Transfer,1403.98,2023-02-18 07:18:32.599445,Los Angeles,0
2,3,4654,Crypto Exchange,878.52,2023-08-19 22:30:52.184068,Boston,0
3,4,1130,Online Purchase,946.02,2023-07-13 02:03:21.672255,Chicago,0
4,5,3069,Crypto Exchange,275.09,2024-07-22 00:17:32.353946,Chicago,0


In [8]:
query = """
SELECT
    COUNT(*) AS total_transactions,
    SUM(Is_Fraud) AS total_fraud_cases,
    (SUM(Is_Fraud) * 100.0 / COUNT(*)) AS fraud_percentage
FROM transactions;
"""
pd.read_sql(query, conn)


Unnamed: 0,total_transactions,total_fraud_cases,fraud_percentage
0,10000,2516,25.16


In [9]:
query = """
SELECT Transaction_Type,
       COUNT(*) AS total_transactions,
       SUM(Is_Fraud) AS fraud_cases,
       (SUM(Is_Fraud) * 100.0 / COUNT(*)) AS fraud_rate
FROM transactions
GROUP BY Transaction_Type
ORDER BY fraud_rate DESC;
"""
pd.read_sql(query, conn)


Unnamed: 0,Transaction_Type,total_transactions,fraud_cases,fraud_rate
0,Crypto Exchange,2008,1234,61.454183
1,Bank Transfer,2009,1195,59.48233
2,ATM Withdrawal,1968,31,1.575203
3,Online Purchase,1931,28,1.450026
4,POS Purchase,2084,28,1.34357


In [10]:
query = """
SELECT User_ID, COUNT(*) AS fraud_count
FROM transactions
WHERE Is_Fraud = 1
GROUP BY User_ID
HAVING fraud_count > 2
ORDER BY fraud_count DESC;
"""
pd.read_sql(query, conn)


Unnamed: 0,User_ID,fraud_count
0,4092,5
1,4509,4
2,4282,4
3,4140,4
4,4056,4
...,...,...
96,1107,3
97,1101,3
98,1094,3
99,1047,3


In [11]:
query = """
SELECT * FROM transactions
WHERE Amount > 4000 AND Is_Fraud = 1
ORDER BY Amount DESC;
"""
pd.read_sql(query, conn)


Unnamed: 0,Transaction_ID,User_ID,Transaction_Type,Amount,Timestamp,Location,Is_Fraud
0,4547,2367,Bank Transfer,9997.59,2023-08-13 04:59:04.604968,Chicago,1
1,6452,4751,Crypto Exchange,9992.53,2024-07-22 23:20:02.796647,Los Angeles,1
2,3633,2283,Crypto Exchange,9990.98,2024-10-05 04:28:17.644557,Chicago,1
3,5942,2683,Bank Transfer,9990.64,2023-04-03 21:54:36.821858,Miami,1
4,1421,1849,Crypto Exchange,9989.88,2024-02-19 07:27:15.072676,Boston,1
...,...,...,...,...,...,...,...
2456,4611,2089,Crypto Exchange,4007.30,2023-10-17 20:01:06.426623,Chicago,1
2457,2366,3661,Bank Transfer,4006.59,2023-07-28 04:59:49.922647,Los Angeles,1
2458,7760,4893,Crypto Exchange,4006.06,2023-06-20 09:32:46.496504,San Francisco,1
2459,100,2438,Crypto Exchange,4002.79,2024-10-20 19:36:32.635145,Houston,1


In [12]:
query = """
SELECT t1.User_ID, t1.Timestamp, t1.Location AS Location_1,
       t2.Timestamp AS Next_Timestamp, t2.Location AS Location_2
FROM transactions t1
JOIN transactions t2 ON t1.User_ID = t2.User_ID
WHERE t1.Timestamp < t2.Timestamp
AND ABS(strftime('%s', t1.Timestamp) - strftime('%s', t2.Timestamp)) < 1800
AND t1.Location <> t2.Location;
"""
pd.read_sql(query, conn)


Unnamed: 0,User_ID,Timestamp,Location_1,Next_Timestamp,Location_2


In [13]:
query = """
SELECT strftime('%H', Timestamp) AS hour,
       COUNT(*) AS total_transactions,
       SUM(Is_Fraud) AS fraud_cases
FROM transactions
GROUP BY hour
ORDER BY fraud_cases DESC;
"""
pd.read_sql(query, conn)


Unnamed: 0,hour,total_transactions,fraud_cases
0,21,454,119
1,19,417,119
2,5,433,117
3,2,417,116
4,17,477,114
5,8,407,114
6,1,400,114
7,10,395,111
8,4,428,108
9,23,424,107


In [22]:
df.to_csv("fraud_detection_data.csv", index=False)
from google.colab import files
files.download("fraud_detection_data.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [18]:
# Install Git (if not already installed)
!apt-get install git

# Set up Git (Replace with your details)
!git config --global user.name "DeepLearn2001"
!git config --global user.email "deepashreeshirke@2001"


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git is already the newest version (1:2.34.1-1ubuntu1.12).
0 upgraded, 0 newly installed, 0 to remove and 18 not upgraded.


In [21]:
!git clone https://github.com/DeepLearn2001/Fraud-Detection

fatal: destination path 'Fraud-Detection' already exists and is not an empty directory.
