In [31]:
import pandas as pd
import random
import uuid
from datetime import datetime, timedelta

# Users and transaction types
user_ids = [f'USER{str(i).zfill(5)}' for i in range(1000, 1100)]  # 100 users
transaction_types = ['PAYMENT', 'TRANSFER', 'CASH_OUT', 'DEPOSIT']

# Start time
start_time = datetime.now() - timedelta(days=1)

# Generate 10,000 transactions
transactions = []

for i in range(10000):
    from_user = random.choice(user_ids)
    to_user = random.choice([u for u in user_ids if u != from_user])
    amount = round(random.uniform(10, 20000), 2)
    txn_type = random.choice(transaction_types)
    
    # Simulate timestamps over 24 hrs
    txn_time = start_time + timedelta(seconds=random.randint(0, 86400))
    
    # Fraud logic
    is_fraud = 1 if (amount > 15000 and random.random() < 0.5) else 0

    transaction = {
        'timestamp': txn_time.strftime('%Y-%m-%d %H:%M:%S'),
        'transaction_id': str(uuid.uuid4()),
        'from_user': from_user,
        'to_user': to_user,
        'amount': amount,
        'transaction_type': txn_type,
        'is_fraud': is_fraud
    }
    
    transactions.append(transaction)

# Create DataFrame
df_large = pd.DataFrame(transactions)

# Save to CSV
df_large.to_csv('large_financial_transactions.csv', index=False)

print("✅ Dataset generated: 10,000+ transactions saved.")
df_large.head()


✅ Dataset generated: 10,000+ transactions saved.


Unnamed: 0,timestamp,transaction_id,from_user,to_user,amount,transaction_type,is_fraud
0,2025-03-18 09:33:05,99ce0032-db9a-4a2a-a63c-1d31a12b6e81,USER01052,USER01018,16997.53,TRANSFER,1
1,2025-03-17 19:02:00,cacad572-95c5-42b5-8a46-a7761cbc7dc4,USER01004,USER01065,18610.21,PAYMENT,0
2,2025-03-18 08:57:27,32afb436-693b-485f-97bc-170b056569ed,USER01038,USER01086,17497.43,DEPOSIT,0
3,2025-03-18 01:00:07,e091ec2b-893c-4432-a471-a790ee576824,USER01004,USER01094,182.78,TRANSFER,0
4,2025-03-17 18:04:08,f144d56b-6bef-4602-8e30-3d3adb567f4d,USER01015,USER01069,3855.4,TRANSFER,0


In [32]:
import pandas as pd

# Load full dataset (or simulate batch streaming)
df = pd.read_csv('large_financial_transactions.csv', parse_dates=['timestamp'])

print(f"✅ Loaded {len(df)} transactions.")
df.head()


✅ Loaded 10000 transactions.


Unnamed: 0,timestamp,transaction_id,from_user,to_user,amount,transaction_type,is_fraud
0,2025-03-18 09:33:05,99ce0032-db9a-4a2a-a63c-1d31a12b6e81,USER01052,USER01018,16997.53,TRANSFER,1
1,2025-03-17 19:02:00,cacad572-95c5-42b5-8a46-a7761cbc7dc4,USER01004,USER01065,18610.21,PAYMENT,0
2,2025-03-18 08:57:27,32afb436-693b-485f-97bc-170b056569ed,USER01038,USER01086,17497.43,DEPOSIT,0
3,2025-03-18 01:00:07,e091ec2b-893c-4432-a471-a790ee576824,USER01004,USER01094,182.78,TRANSFER,0
4,2025-03-17 18:04:08,f144d56b-6bef-4602-8e30-3d3adb567f4d,USER01015,USER01069,3855.4,TRANSFER,0


In [33]:
# Remove nulls (shouldn't have in generated data, but for real-life practice)
df_clean = df.dropna()

# Ensure correct data types
df_clean['amount'] = df_clean['amount'].astype(float)
df_clean['is_fraud'] = df_clean['is_fraud'].astype(int)

# Create derived column: High value flag
df_clean['high_value'] = df_clean['amount'].apply(lambda x: 1 if x > 15000 else 0)

print("✅ Data cleaned & transformed.")
df_clean.head()


✅ Data cleaned & transformed.


Unnamed: 0,timestamp,transaction_id,from_user,to_user,amount,transaction_type,is_fraud,high_value
0,2025-03-18 09:33:05,99ce0032-db9a-4a2a-a63c-1d31a12b6e81,USER01052,USER01018,16997.53,TRANSFER,1,1
1,2025-03-17 19:02:00,cacad572-95c5-42b5-8a46-a7761cbc7dc4,USER01004,USER01065,18610.21,PAYMENT,0,1
2,2025-03-18 08:57:27,32afb436-693b-485f-97bc-170b056569ed,USER01038,USER01086,17497.43,DEPOSIT,0,1
3,2025-03-18 01:00:07,e091ec2b-893c-4432-a471-a790ee576824,USER01004,USER01094,182.78,TRANSFER,0,0
4,2025-03-17 18:04:08,f144d56b-6bef-4602-8e30-3d3adb567f4d,USER01015,USER01069,3855.4,TRANSFER,0,0


In [34]:
# Total Transactions + Fraud %
total_txns = len(df_clean)
total_fraud = df_clean['is_fraud'].sum()
fraud_rate = (total_fraud / total_txns) * 100

print(f"📊 Total Transactions: {total_txns}")
print(f"⚠️ Fraudulent Transactions: {total_fraud} ({fraud_rate:.2f}%)")

# Top 5 Users by Amount Sent
top_users = df_clean.groupby('from_user')['amount'].sum().sort_values(ascending=False).head(5)
print("\n🏆 Top 5 Users by Amount Sent:")
print(top_users)

# Fraud Over Time
fraud_over_time = df_clean.groupby(df_clean['timestamp'].dt.hour)['is_fraud'].sum()
print("\n🕒 Fraud per Hour:")
print(fraud_over_time)


📊 Total Transactions: 10000
⚠️ Fraudulent Transactions: 1280 (12.80%)

🏆 Top 5 Users by Amount Sent:
from_user
USER01063    1365956.18
USER01086    1298043.74
USER01047    1246682.88
USER01074    1236161.58
USER01026    1235738.61
Name: amount, dtype: float64

🕒 Fraud per Hour:
timestamp
0     54
1     59
2     47
3     38
4     61
5     46
6     71
7     52
8     35
9     51
10    48
11    66
12    54
13    58
14    50
15    66
16    35
17    59
18    42
19    43
20    63
21    58
22    64
23    60
Name: is_fraud, dtype: int64


In [35]:
import plotly.express as px

# Fraud Count by Hour
fig = px.bar(x=fraud_over_time.index, y=fraud_over_time.values,
             labels={'x': 'Hour of Day', 'y': 'Fraud Count'},
             title='Fraudulent Transactions by Hour')
fig.show()


In [36]:
fig2 = px.bar(x=top_users.index, y=top_users.values,
              labels={'x': 'User', 'y': 'Total Sent'},
              title='Top 5 Users by Total Amount Sent')
fig2.show()


In [37]:
from sklearn.ensemble import IsolationForest
import numpy as np


In [38]:
# Encode transaction type
df_ml = df_clean.copy()
df_ml['transaction_type_encoded'] = df_ml['transaction_type'].astype('category').cat.codes

# Features for ML
features = df_ml[['amount', 'transaction_type_encoded']].values


In [39]:
# Train model
model = IsolationForest(n_estimators=100, contamination=0.01, random_state=42)
df_ml['fraud_predicted'] = model.fit_predict(features)

# Convert predictions: -1 = anomaly (fraud), 1 = normal
df_ml['fraud_predicted'] = df_ml['fraud_predicted'].apply(lambda x: 1 if x == -1 else 0)


In [40]:
# Compare ML prediction vs. actual
from sklearn.metrics import classification_report, confusion_matrix

print("📊 Classification Report:")
print(classification_report(df_ml['is_fraud'], df_ml['fraud_predicted']))

# Confusion Matrix
cm = confusion_matrix(df_ml['is_fraud'], df_ml['fraud_predicted'])
print("🧩 Confusion Matrix:\n", cm)


📊 Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.99      0.93      8720
           1       0.33      0.03      0.05      1280

    accuracy                           0.87     10000
   macro avg       0.60      0.51      0.49     10000
weighted avg       0.80      0.87      0.82     10000

🧩 Confusion Matrix:
 [[8653   67]
 [1247   33]]


In [41]:
import plotly.express as px

df_ml['hour'] = df_ml['timestamp'].dt.hour

# Group by hour for both
actual_fraud_hour = df_ml.groupby('hour')['is_fraud'].sum()
predicted_fraud_hour = df_ml.groupby('hour')['fraud_predicted'].sum()

# Combine
fraud_compare = pd.DataFrame({
    'Hour': actual_fraud_hour.index,
    'Actual_Fraud': actual_fraud_hour.values,
    'Predicted_Fraud': predicted_fraud_hour.values
})

fig = px.line(fraud_compare, x='Hour', y=['Actual_Fraud', 'Predicted_Fraud'],
              title='Actual vs Predicted Fraud per Hour')
fig.show()


In [42]:
# Set your fraud threshold (e.g., 5%)
fraud_threshold = 5.0  # 5%

# Function to check fraud spike
def check_fraud_alert(df_batch):
    total_txns = len(df_batch)
    fraud_count = df_batch['fraud_predicted'].sum()
    fraud_rate = (fraud_count / total_txns) * 100

    print(f"📊 Batch Transactions: {total_txns} | Predicted Fraud: {fraud_count} ({fraud_rate:.2f}%)")

    if fraud_rate > fraud_threshold:
        print("🚨 ALERT: High Fraud Detected! Take Action!")
    else:
        print("✅ Fraud levels normal.")

# Simulate checking on a sample batch
batch_sample = df_ml.sample(500)  # Simulate a new batch
check_fraud_alert(batch_sample)


📊 Batch Transactions: 500 | Predicted Fraud: 3 (0.60%)
✅ Fraud levels normal.


In [44]:
import time
# Simulate real-time monitoring
batch_size = 500
for i in range(0, len(df_ml), batch_size):
    batch = df_ml.iloc[i:i+batch_size]
    print(f"\n🟢 Checking Batch {i//batch_size + 1}")
    check_fraud_alert(batch)
    time.sleep(2)  # Simulate time between batches



🟢 Checking Batch 1
📊 Batch Transactions: 500 | Predicted Fraud: 3 (0.60%)
✅ Fraud levels normal.

🟢 Checking Batch 2
📊 Batch Transactions: 500 | Predicted Fraud: 5 (1.00%)
✅ Fraud levels normal.

🟢 Checking Batch 3
📊 Batch Transactions: 500 | Predicted Fraud: 7 (1.40%)
✅ Fraud levels normal.

🟢 Checking Batch 4
📊 Batch Transactions: 500 | Predicted Fraud: 4 (0.80%)
✅ Fraud levels normal.

🟢 Checking Batch 5
📊 Batch Transactions: 500 | Predicted Fraud: 5 (1.00%)
✅ Fraud levels normal.

🟢 Checking Batch 6
📊 Batch Transactions: 500 | Predicted Fraud: 3 (0.60%)
✅ Fraud levels normal.

🟢 Checking Batch 7
📊 Batch Transactions: 500 | Predicted Fraud: 8 (1.60%)
✅ Fraud levels normal.

🟢 Checking Batch 8
📊 Batch Transactions: 500 | Predicted Fraud: 2 (0.40%)
✅ Fraud levels normal.

🟢 Checking Batch 9
📊 Batch Transactions: 500 | Predicted Fraud: 4 (0.80%)
✅ Fraud levels normal.

🟢 Checking Batch 10
📊 Batch Transactions: 500 | Predicted Fraud: 9 (1.80%)
✅ Fraud levels normal.

🟢 Checking Batch 1

In [49]:
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

def send_fraud_alert_email(fraud_rate, total_txns, fraud_count):
    sender_email = "akshitamishra027@gmail.com"
    receiver_email = "makshita7844@gmail.com"
    app_password = "vaaw wfxs czmt lnkq"  # From Gmail app passwords

    subject = "🚨 Fraud Alert: High Fraud Detected in Transactions"
    body = f"""
    ALERT: Your financial agent detected high fraud activity.
    
    📊 Total Transactions Analyzed: {total_txns}
    ⚠️ Predicted Fraudulent Transactions: {fraud_count}
    🔺 Fraud Rate: {fraud_rate:.2f}%

    Please investigate immediately.

    -- AI Financial Agent
    """

    msg = MIMEMultipart()
    msg['From'] = sender_email
    msg['To'] = receiver_email
    msg['Subject'] = subject

    msg.attach(MIMEText(body, 'plain'))

    try:
        with smtplib.SMTP('smtp.gmail.com', 587) as server:
            server.starttls()
            server.login(sender_email, app_password)
            server.send_message(msg)
            print("✅ Email alert sent successfully.")
    except Exception as e:
        print("❌ Error sending email:", e)
