In [86]:
import pandas as pd
import numpy as np
import datetime

In [115]:
blacklist = pd.read_csv("user_materials/Blacklist.csv")
devices = pd.read_csv("user_materials/Devices.csv")
hosts = pd.read_csv("user_materials/Hosts.csv")
notes = pd.read_csv("user_materials/Notes.csv")
transactions = pd.read_csv("user_materials/Transactions.csv")
users = pd.read_csv("user_materials/Users.csv")

hosts["HOST_ID"] = np.uint32(hosts["HOST_ID"])
transactions["HOST_ID"] = np.uint32(transactions["HOST_ID"])

notes["DATE_ADDED"] = pd.to_datetime(notes["DATE_ADDED"], dayfirst=True)

transactions["DATE_TIME"] = pd.to_datetime(transactions["DATE_TIME"], dayfirst=True)

# Suspicious note filtering

In [76]:
def notes_at_date(date: datetime.datetime):
    return notes[notes["DATE_ADDED"] < date]

def filter_suspicious_transactions(row):
    suspicious_notes = notes_at_date(row["DATE_TIME"])
    return any(note.lower() in row["NOTES"].lower() for note in suspicious_notes["KEYWORD"])

In [77]:
suspicious_transactions = transactions.apply(filter_suspicious_transactions, axis=1)
transactions[suspicious_transactions]

Unnamed: 0,TXID,DATE_TIME,SENDER_ID,RECIPIENT_ID,HOST_ID,AMOUNT_SENT,AMOUNT_RECEIVED,NOTES,FEE
376,377,15/03/2025 08:14:54,643351,625134,584090,434.65,431.48,Spconcealed shellrts gear,3.17


# Routing issues

In [107]:
m = transactions.merge(hosts, on="HOST_ID").merge(users, left_on="SENDER_ID", right_on="USER_ID", suffixes=["_hosts", "_users"])
m.head()

Unnamed: 0,TXID,DATE_TIME,SENDER_ID,RECIPIENT_ID,HOST_ID,AMOUNT_SENT,AMOUNT_RECEIVED,NOTES,FEE,IP_ADDRESS,COUNTRY_hosts,PROVIDER,USER_ID,NAME,DAILY_LIMIT,ESTABLISHED_DATE,ASSIGNED_DEVICE,COUNTRY_users
0,1,14/11/2023 09:38:00,290279,771256,892058,999.42,993.42,TEXTBOOK SHARE,6.0,55.51.77.106,United States,Linode,290279,Isabelle Jackson,49711,21/09/2023,621430,United States
1,76,06/01/2025 15:01:52,290279,962129,892058,800.73,795.73,Thanks for dinner!,5.0,55.51.77.106,United States,Linode,290279,Isabelle Jackson,49711,21/09/2023,621430,United States
2,447,15/03/2024 18:59:32,290279,795275,892058,483.71,480.29,Pet food share,3.42,55.51.77.106,United States,Linode,290279,Isabelle Jackson,49711,21/09/2023,621430,United States
3,746,22/08/2025 15:34:42,290279,244990,892058,60.75,59.45,Tutorial payment,1.3,55.51.77.106,United States,Linode,290279,Isabelle Jackson,49711,21/09/2023,621430,United States
4,807,03/09/2025 20:49:25,290279,251878,892058,797.7,792.71,Cleaning supplies,4.99,55.51.77.106,United States,Linode,290279,Isabelle Jackson,49711,21/09/2023,621430,United States


In [106]:
m[(m["COUNTRY_hosts"] != m["COUNTRY_users"])]

Unnamed: 0,TXID,DATE_TIME,SENDER_ID,RECIPIENT_ID,HOST_ID,AMOUNT_SENT,AMOUNT_RECEIVED,NOTES,FEE,IP_ADDRESS,COUNTRY_hosts,PROVIDER,USER_ID,NAME,DAILY_LIMIT,ESTABLISHED_DATE,ASSIGNED_DEVICE,COUNTRY_users
684,372,01/08/2025 08:11:29,756639,951587,426032,545.96,542.23,Cooking class,3.73,120.37.18.160,Denmark,Linode,756639,Thomas Ferrari,17083,05/07/2025,314283,Norway


# Daily limits

In [None]:
transactions["day"] = transactions["DATE_TIME"].dt.date
transactions["day"]

0      2023-11-14
1      2024-10-10
2      2024-06-21
3      2024-03-17
4      2025-06-14
          ...    
995    2023-10-02
996    2024-11-13
997    2025-06-25
998    2025-04-16
999    2021-06-20
Name: day, Length: 1000, dtype: object

In [153]:
user_transaction_limits = {}

for user in users.iloc:
    user_transaction_limits[user["USER_ID"]] = user["DAILY_LIMIT"]

user_sums_by_day = transactions.groupby(["SENDER_ID", "day"])["AMOUNT_SENT"].sum()

In [147]:
user_sums_by_day

SENDER_ID  day       
123752     2023-11-02    636.58
           2023-12-24    981.89
           2024-01-29    344.85
           2024-04-12    810.93
           2024-04-21    141.05
                          ...  
999885     2024-09-02     91.38
           2025-04-18    109.93
           2025-08-19    492.98
           2025-08-25    887.23
           2025-09-02    194.77
Name: AMOUNT_SENT, Length: 975, dtype: float64

In [151]:
for ((user_id, date), val) in zip(user_sums_by_day.index, user_sums_by_day):
    if val > user_transaction_limits[user_id]:
        print (user_id, date)

543676 2022-07-16
543676 2024-01-24
543676 2025-08-06
