In [70]:
import pandas as pd
import numpy as np
import os

def create_dummy_fraud_detection_dataset():
    # Set the number of rows and columns
    num_rows = 1000
    num_columns = 28  # 28 for V1-V28
    
    # Create random values for V1-V28
    data = np.random.randn(num_rows, num_columns)
    
    # Create a time column as the index + 1
    time = np.arange(1, num_rows + 1)  # Time will be 1, 2, 3, ...
    
    # Create an amount column with random transaction amounts (realistic values between $1 and $1000)
    amount = np.random.uniform(1, 1000, num_rows)
    
    # Introduce fraudulent patterns based on certain conditions
    # For example, let's assume that unusually high amounts or extreme values in V1-V28 represent potential fraud
    data[amount > 975] = data[amount > 975] * 10  # Amplify feature values for large transactions to simulate anomalies
    amount[amount > 975] *= 10  # Increase the amounts for fraud-like transactions
    
    # Combine all columns into a DataFrame
    df = pd.DataFrame(np.column_stack((time, data, amount)), 
                      columns=['Time'] + [f'V{i}' for i in range(1, 29)] + ['Amount'])
    
    # Define the file path (Desktop folder for saving the file)
    desktop = os.path.join(os.path.expanduser('~'), 'Documents/Github/Credit-Card-Fraud-Detection/data')
    file_path = os.path.join(desktop, 'credit_card_fraud_data.csv')
    
    # Save the DataFrame as a CSV file
    df.to_csv(file_path, index=False)
    
    print(f"CSV file saved at: {file_path}")

# Call the function to create and save the dataset
create_dummy_fraud_detection_dataset()


CSV file saved at: /Users/alperefesahin/Documents/Github/Credit-Card-Fraud-Detection/data/credit_card_fraud_data.csv


In [71]:
# Check the dataset
file = os.path.join(os.path.expanduser('~'), 'Documents/Github/Credit-Card-Fraud-Detection/data', 'credit_card_fraud_data.csv')
df = pd.read_csv(file)

df.head(10)

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
0,1.0,2.360711,-1.340329,-0.266155,-0.146773,-0.113791,-0.498714,-0.081863,0.775274,-0.589141,...,-1.264664,0.099289,1.353339,0.038316,-1.494783,-0.521578,-0.63075,-0.684547,0.938002,48.035223
1,2.0,0.887436,0.929977,-1.30496,-0.874665,-0.338074,0.330893,-1.108401,-0.008758,-0.435931,...,1.248951,-0.209464,0.288581,-1.202556,-0.445778,1.305949,-0.877375,1.617897,0.369638,749.867624
2,3.0,-0.969446,-0.906956,1.429651,0.692321,-0.099224,-0.163254,-0.38246,-0.209153,1.778792,...,-0.22525,0.968748,-1.034439,0.030322,1.407748,0.868051,-0.183395,0.505949,0.609561,443.118499
3,4.0,-0.63053,-0.733362,-0.535964,-0.725874,-0.098462,0.073949,-1.377229,-0.625888,0.284584,...,0.012126,-0.929484,0.450202,-0.882617,0.079028,0.151087,-1.390825,-0.726259,-1.445278,504.099822
4,5.0,0.00496,-1.891662,0.444574,1.320108,0.49841,0.06133,2.048006,-1.157922,-1.028235,...,-0.306783,-0.123106,0.552765,2.088462,0.139472,-1.113159,0.724724,0.656567,1.869401,884.2648
5,6.0,-0.360801,0.777536,1.44555,0.449746,0.484396,-1.51834,-0.010763,-0.698544,1.074912,...,0.028377,0.754188,0.971981,1.374796,-1.554154,0.154864,1.426371,0.713028,-0.740253,468.120283
6,7.0,-0.217422,-1.326081,-0.321282,-0.416312,-0.379864,-0.435204,0.284501,0.876658,1.174746,...,-0.14646,-1.137859,0.319883,-0.043923,0.115732,1.836466,-0.391068,2.021687,0.631886,346.678091
7,8.0,0.622201,-2.277141,-2.836874,-0.010287,0.133106,-0.921682,-0.633045,-1.941233,-1.47132,...,0.641543,0.626787,2.630579,-0.074751,-0.805147,-1.557494,0.278785,0.745097,0.850332,823.694137
8,9.0,-0.223203,-0.701941,-0.092948,-0.512009,-0.274624,-0.988207,-0.740732,-0.394396,-0.860509,...,0.168115,-0.800408,1.751779,0.815723,0.63972,0.364739,-0.646401,0.500794,0.389879,378.372791
9,10.0,-0.782026,1.075888,0.964041,0.005571,-0.880525,-1.05325,0.130386,-1.54677,-1.576782,...,-0.41683,0.520642,1.409089,0.177418,-1.354241,-0.320309,-0.449473,0.771463,2.414992,163.984427


In [72]:
print("\nAmounts greater than 9000:")
print(df[df['Amount'] > 9000]['Amount'].value_counts())


Amounts greater than 9000:
Amount
9753.015838    1
9754.704957    1
9796.976830    1
9912.591730    1
9804.701156    1
9812.847142    1
9895.904292    1
9952.367014    1
9879.044141    1
9978.026169    1
9923.369183    1
9818.763226    1
9856.821036    1
9782.260082    1
9759.969473    1
9789.600503    1
9977.815615    1
9804.904405    1
9782.906849    1
9796.855987    1
9810.882489    1
Name: count, dtype: int64


In [73]:
df.dtypes

Time      float64
V1        float64
V2        float64
V3        float64
V4        float64
V5        float64
V6        float64
V7        float64
V8        float64
V9        float64
V10       float64
V11       float64
V12       float64
V13       float64
V14       float64
V15       float64
V16       float64
V17       float64
V18       float64
V19       float64
V20       float64
V21       float64
V22       float64
V23       float64
V24       float64
V25       float64
V26       float64
V27       float64
V28       float64
Amount    float64
dtype: object