In [1]:
# read data files
import pandas as pd

df = pd.read_csv('../data/cc_data.csv')
df['timestamp'] = pd.to_datetime(df['timestamp'], format='%m/%d/%Y %H:%M')

print(df.head())


            timestamp             location  price  last4ccnum
0 2014-01-06 07:28:00  Brew've Been Served  11.34        4795
1 2014-01-06 07:34:00     Hallowed Grounds  52.22        7108
2 2014-01-06 07:35:00  Brew've Been Served   8.33        6816
3 2014-01-06 07:36:00     Hallowed Grounds  16.72        9617
4 2014-01-06 07:37:00  Brew've Been Served   4.24        7384


# Trying to find the same card with purchases at different locations in a short period of time

In [19]:
df['timestamp'] = pd.to_datetime(df['timestamp'], format='%m/%d/%Y %H:%M')

# sort by time
df.sort_values(by=['last4ccnum', 'timestamp'], inplace=True)

# define detection function
def find_suspicious_transactions(data):
    # interval window = 10mins
    time_diff = pd.Timedelta(minutes=10)
    suspicious_transactions = []

    # group by card num
    for card_num, group in data.groupby('last4ccnum'):
        if len(group) > 1:  # only check when more than 1 
            
            for i in range(len(group) - 1):  
                current_transaction = group.iloc[i]
                
                for j in range(i + 1, len(group)):
                    subsequent_transaction = group.iloc[j]
                    if (subsequent_transaction['timestamp'] - current_transaction['timestamp']) <= time_diff:
                        if subsequent_transaction['location'] != current_transaction['location']:
                            suspicious_transactions.append({
                                'Credit Card Ending': card_num,
                                'Initial Transaction Time': current_transaction['timestamp'],
                                'Subsequent Transaction Time': subsequent_transaction['timestamp'],
                                'Initial Location': current_transaction['location'],
                                'Subsequent Location': subsequent_transaction['location']
                            })
                    else:
                        break

    return suspicious_transactions
suspicious = find_suspicious_transactions(df)

for trans in suspicious:
    print(f"Credit Card Ending: {trans['Credit Card Ending']}")
    print(f"Initial Transaction Time: {trans['Initial Transaction Time']}")
    print(f"Subsequent Transaction Time: {trans['Subsequent Transaction Time']}")
    print(f"Initial Location: {trans['Initial Location']}")
    print(f"Subsequent Location: {trans['Subsequent Location']}\n")


Credit Card Ending: 9551
Initial Transaction Time: 2014-01-13 13:18:00
Subsequent Transaction Time: 2014-01-13 13:28:00
Initial Location: U-Pump
Subsequent Location: Hippokampos

Credit Card Ending: 9551
Initial Transaction Time: 2014-01-13 19:20:00
Subsequent Transaction Time: 2014-01-13 19:30:00
Initial Location: Frydos Autosupply n' More
Subsequent Location: Ouzeri Elian



In [1]:
# Calculate descriptive statistics
mean_price = df['price'].mean()
median_price = df['price'].median()
std_dev_price = df['price'].std()
min_price = df['price'].min()
max_price = df['price'].max()
quantiles = df['price'].quantile([0.25, 0.5, 0.75])

# Print the statistics
print(f"Mean price: {mean_price}")
print(f"Median price: {median_price}")
print(f"Standard Deviation: {std_dev_price}")
print(f"Minimum price: {min_price}")
print(f"Maximum price: {max_price}")
print("Quantiles:")
print(quantiles)


Mean price: 207.69653020134234
Median price: 28.240000000000002
Standard Deviation: 740.8554671870584
Minimum price: 2.01
Maximum price: 10000.0
Quantiles:
0.25    15.1275
0.50    28.2400
0.75    67.1775
Name: price, dtype: float64


# Trying to find unusual large transactions

In [6]:
top_transactions = df.sort_values(by='price', ascending=False).head(10)

# Print the top 10 transactions
print(top_transactions)

             timestamp                      location     price  last4ccnum
841   01/13/2014 19:20     Frydos Autosupply n' More  10000.00        9551
309   01/08/2014 13:03         Carlyle Chemical Inc.   4983.52        4530
966   01/14/2014 15:32                 Abila Airport   4918.39        8642
352   01/08/2014 14:26         Carlyle Chemical Inc.   4901.88        7792
220   01/07/2014 14:34         Carlyle Chemical Inc.   4803.13        7792
437   01/09/2014 13:18                 Abila Airport   4792.50        2276
1090  01/15/2014 15:11        Maximum Iron and Steel   4788.22        4530
1160  01/16/2014 11:25           Nationwide Refinery   4742.67        7792
1034  01/15/2014 11:44  Stewart and Sons Fabrication   4740.68        9735
1033  01/15/2014 11:18  Stewart and Sons Fabrication   4545.38        9152


In [5]:
import pandas as pd

# Load the dataset
file_path = 'cc_data.csv'  # Update this with your file path if necessary
df = pd.read_csv(file_path)

# Define a threshold for large transactions
threshold = 2000

# Filter the dataframe for transactions above the threshold
large_transactions = df[df['price'] > threshold]

# Print the large transactions
print(large_transactions)


             timestamp                      location     price  last4ccnum
29    01/06/2014 08:23                 Abila Airport   4540.08        9220
32    01/06/2014 10:18        Maximum Iron and Steel   2859.51        9220
33    01/06/2014 11:33  Stewart and Sons Fabrication   2144.62        9735
34    01/06/2014 11:57         Carlyle Chemical Inc.   3959.66        9220
157   01/07/2014 09:17                 Abila Airport   3840.37        3506
218   01/07/2014 14:16               Abila Scrapyard   2149.28        2276
219   01/07/2014 14:19    Kronos Pipe and Irrigation   3920.82        4530
220   01/07/2014 14:34         Carlyle Chemical Inc.   4803.13        7792
222   01/07/2014 15:20        Maximum Iron and Steel   3207.31        4530
223   01/07/2014 15:41                 Abila Airport   3411.29        8642
289   01/08/2014 09:43         Carlyle Chemical Inc.   3048.31        7792
290   01/08/2014 09:54           Nationwide Refinery   4513.16        9152
291   01/08/2014 10:14   