In [22]:
import pandas as pd
import sqlite3

In [23]:
# Load the subset dataset

df = pd.read_csv('Data/creditcard_subset.csv')
print("Columns:", df.columns.tolist())
print("Shape:", df.shape)

Columns: ['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount', 'Class']
Shape: (50000, 31)


In [24]:
# Create SQLite database
conn = sqlite3.connect('paysim.db')
df.to_sql('transactions', conn, if_exists='replace', index=False)
print("Database created and data loaded.")

Database created and data loaded.


Fraud Count by Hour:



In [25]:
query_hour = """
SELECT ROUND(Time / 3600) AS Hour, COUNT(*) AS Fraud_Count
FROM transactions
WHERE Class = 1
GROUP BY ROUND(Time / 3600)
ORDER BY Fraud_Count DESC
"""
fraud_by_hour = pd.read_sql(query_hour, conn)
fraud_by_hour.to_csv('fraud_by_hour.csv', index=False)
print(fraud_by_hour.head())

   Hour  Fraud_Count
0  16.0            6
1  11.0            6
2  21.0            5
3  15.0            5
4  43.0            4


Average Amount by Fraud Status:



In [26]:
query_amount = """
SELECT Class, AVG(Amount) AS Avg_Amount
FROM transactions
GROUP BY Class
"""
amount_by_fraud = pd.read_sql(query_amount, conn)
amount_by_fraud.to_csv('amount_by_fraud.csv', index=False)
print(amount_by_fraud)

   Class  Avg_Amount
0      0   87.249921
1      1  164.233855


Top 10 Fraud Transactions by Amount:

In [27]:
query_top = """
SELECT Time, Amount
From transactions
WHERE Class = 1
ORDER BY Amount DESC
LIMIT 10
"""
top_fraud = pd.read_sql(query_top, conn)
top_fraud.to_csv('top_fraud.csv', index=False)
print(top_fraud)

       Time   Amount
0  154278.0  1504.93
1   62467.0  1402.16
2  154309.0  1096.99
3  147501.0   996.27
4   87883.0   829.41
5   39729.0   776.83
6   64785.0   720.38
7  140308.0   592.90
8   52814.0   519.90
9   60353.0   454.82


In [28]:
conn.close()
print("Database connection closed.")

Database connection closed.
