Importing Libraries

In [7]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest

Dataset Generation

In [8]:
# Set random seed for reproducibility
np.random.seed(0)

# Number of records to generate
num_records = 400

# Generate Transaction IDs
transaction_ids = range(1, num_records + 1)

# Generate random dates within a range
start_date = datetime(2023, 1, 1)
end_date = datetime(2023, 12, 31)
date_list = [start_date + timedelta(days=random.randint(0, 365)) for _ in range(num_records)]
dates = [d.date() for d in date_list]

# Generate random times
times = [datetime.strptime(f"{random.randint(0,23):02}:{random.randint(0,59):02}:{random.randint(0,59):02}", "%H:%M:%S").time() for _ in range(num_records)]

# Generate random amounts (between 1 and 1000 dollars)
amounts = np.random.uniform(1, 1000, num_records)

# Generate random merchants
merchants = ['Merchant_' + str(random.randint(1, 50)) for _ in range(num_records)]

# Generate random card numbers (for simplicity, just generate unique integers)
card_numbers = np.random.randint(1000000000000000, 9999999999999999, num_records, dtype=np.int64)

# Generate random transaction types
transaction_types = ['purchase', 'withdrawal', 'transfer']
transaction_types = [random.choice(transaction_types) for _ in range(num_records)]

# Create DataFrame
data = {
    'Transaction ID': transaction_ids,
    'Date': dates,
    'Time': times,
    'Amount': amounts,
    'Merchant': merchants,
    'Card Number': card_numbers,
    'Transaction Type': transaction_types
}

df = pd.DataFrame(data)

# Print first few rows to verify
print(df.head())

# Save to Excel file
df.to_excel('/content/credit_card_transactions.xlsx', index=False)

   Transaction ID        Date      Time      Amount     Merchant  \
0               1  2023-07-01  11:18:14  549.264690  Merchant_46   
1               2  2023-04-02  13:50:17  715.474177  Merchant_35   
2               3  2023-01-02  00:20:54  603.160613  Merchant_33   
3               4  2023-10-18  05:16:44  545.338300  Merchant_10   
4               5  2023-08-24  09:53:44  424.231145  Merchant_40   

        Card Number Transaction Type  
0  8020663219505600         transfer  
1  2700694006837080         transfer  
2  1102403226669007         purchase  
3  9804902423091900         transfer  
4  7417772457172645       withdrawal  


Represent Transaction Data as Matrices/Vectors (Linear Algebra)

In [9]:
# Extract numeric columns for linear algebra representation
numeric_cols = ['Amount']

# Normalize data
scaler = StandardScaler()
df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

# Example of transforming 'Amount' to a vector (numpy array)
amounts_vector = df['Amount'].values

# Print first few values of the vector
print("Vector representation of 'Amount':")
print(amounts_vector[:10])

Vector representation of 'Amount':
[ 0.18340141  0.75915904  0.37009947  0.1698002  -0.24972079  0.519357
 -0.20150651  1.37024152  1.61902207 -0.38888222]


Apply Statistical Methods (Anomaly Detection)

In [10]:
# Fit Isolation Forest model
iso_forest = IsolationForest(contamination=0.01, random_state=0)  # Assuming 1% of data may be anomalous
df['Anomaly_Score'] = iso_forest.fit_predict(df[['Amount']])

# Flag potential fraud transactions (anomalies)
df['Potential Fraud'] = np.where(df['Anomaly_Score'] == -1, 'Yes', 'No')

# Print flagged transactions
print("Flagged potential fraud transactions:")
print(df[df['Potential Fraud'] == 'Yes'][['Transaction ID', 'Date', 'Time', 'Amount', 'Potential Fraud']].head())



Flagged potential fraud transactions:
     Transaction ID        Date      Time    Amount Potential Fraud
52               53  2023-05-17  07:02:30  1.704537             Yes
99              100  2023-03-13  07:58:51 -1.699565             Yes
149             150  2023-09-17  11:37:26  1.740780             Yes
210             211  2023-12-15  09:44:49  1.711337             Yes


Save Processed Data to Excel

In [12]:
df.to_excel('/content/processed_credit_card_transactions.xlsx', index=False)
print("Processed data saved to 'processed_credit_card_transactions.xlsx'.")

Processed data saved to 'processed_credit_card_transactions.xlsx'.
