In [1]:
import pandas as pd
import numpy as np
import csv

In [2]:
file = '../raw_data/HI-Small_Trans.csv'
df = pd.read_csv(file, decimal=',')

In [3]:
df.drop("Amount Paid", axis=1, inplace=True)

df = df.rename(columns={"Timestamp": "timestamp",
                   "From Bank": "from_bank",
                   "Account": "from_account",
                   "Account.1": "to_account",
                   "From Bank": "from_bank",
                   "To Bank": "to_bank",
                   "Amount Received": "amount",
                   "Payment Format": "payment_format",
                   "Is Laundering": "is_laundering"})


In [4]:
# dict for currency conversion

currency_dict = {"US Dollar": "USD",
                 "Bitcoin": "XBT",
                 "Euro": "EUR",
                 "Australiean Dollar": "AUD",
                 "Yuan": "CNY",
                 "Rupee": "INR",
                 "Yen": "JPY",
                 "Mexican Peso": "MXN",
                 "UK Pound": "GBP",
                 "Ruble": "RUB",
                 "Canadian Dollar": "CAD",
                 "Swiss Franc": "CHF",
                 "Brazil Real": "BRL",
                 "Saudi Riyal": "SAR",
                 "Shekel": "ILS"}

In [5]:
# put currency pair together and delete obsolete columns

df["Receiving Currency"] = df["Receiving Currency"].map(currency_dict)
df["Payment Currency"] = df["Payment Currency"].map(currency_dict)

df["currency_pair"] = df["Receiving Currency"] + "_" + df["Payment Currency"]
df = df.drop(['Receiving Currency', 'Payment Currency'], axis=1)

In [7]:
# list all unique accounts

from_accounts = df["from_account"].to_numpy()
to_accounts  = df["to_account"].to_numpy()

all_accounts = np.append(from_accounts, to_accounts)
all_accounts = np.unique(all_accounts)

In [9]:
# sort whole dataframe by time

sorted_df = df.sort_values(by='timestamp')

sorted_df = sorted_df.drop("timestamp", axis=1)

In [None]:
# final master df

master_df = pd.DataFrame()


# loop over df to append to master_df

for account in all_accounts:
    mask = (sorted_df["from_account"] == account) | (sorted_df["to_account"] == account)
    temp_df = df[mask]
    

In [10]:
# drop columns depending on purpose

account = all_accounts[0]

print(account)

mask = (sorted_df["from_account"] == account) | (sorted_df["to_account"] == account)
temp_df = sorted_df[mask]

temp_df


100428660


Unnamed: 0,from_bank,from_account,to_bank,to_account,amount,payment_format,is_laundering,currency_pair
122160,70,100428660,10642,8147D3210,1463.34,Cheque,0,USD_USD
120978,70,100428660,15916,8142568B0,66884.37,Cheque,0,USD_USD
16577,70,100428660,21940,801CDF8A0,18411882.89,Cash,0,USD_USD
14317,70,100428660,220,801872B60,7500.23,Cheque,0,USD_USD
18523,70,100428660,21174,802253A00,88.83,Cheque,0,USD_USD
...,...,...,...,...,...,...,...,...
5075230,70,100428660,213580,80BD645C0,156.86,Credit Card,0,USD_USD
5075437,70,100428660,30851,80E5F8990,354.22,Cash,0,USD_USD
5075054,70,100428660,2454,80941CF80,19.77,Cash,0,USD_USD
5074508,70,100428660,13037,8018531C0,928.08,Credit Card,0,USD_USD


In [None]:
# Idos idea

temp_df.set_index('Account', inplace=True)

temp_df

In [11]:
df_transposed = temp_df.transpose()
df_transposed

Unnamed: 0,122160,120978,16577,14317,18523,111363,104813,103759,19192,19494,...,5075045,5074410,5075598,5074732,5074731,5075230,5075437,5075054,5074508,5074472
from_bank,70,70,70,70,70,70,70,70,70,70,...,70,70,70,70,70,70,70,70,70,70
from_account,100428660,100428660,100428660,100428660,100428660,100428660,100428660,100428660,100428660,100428660,...,100428660,100428660,100428660,100428660,100428660,100428660,100428660,100428660,100428660,100428660
to_bank,10642,15916,21940,220,21174,44345,236046,35684,4766,220,...,28771,1420,4726,2991,2991,213580,30851,2454,13037,11318
to_account,8147D3210,8142568B0,801CDF8A0,801872B60,802253A00,812BA6A80,811729500,8113BBD50,802017A10,802442CD0,...,80923E5E0,8006199D0,8115F2160,80385B520,80385B520,80BD645C0,80E5F8990,80941CF80,8018531C0,8011F26B0
amount,1463.34,66884.37,18411882.89,7500.23,88.83,22984258.64,11230.32,21314.05,141.17,6506.14,...,135.89,199.61,1804.01,14.42,198.63,156.86,354.22,19.77,928.08,19.78
payment_format,Cheque,Cheque,Cash,Cheque,Cheque,Cash,Credit Card,Cash,Credit Card,Cheque,...,Cheque,Cash,Cash,Cash,Cheque,Credit Card,Cash,Cash,Credit Card,Credit Card
is_laundering,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
currency_pair,USD_USD,USD_USD,USD_USD,USD_USD,USD_USD,USD_USD,USD_USD,USD_USD,USD_USD,USD_USD,...,USD_USD,USD_USD,USD_USD,USD_USD,USD_USD,USD_USD,USD_USD,USD_USD,USD_USD,USD_USD


In [None]:
df.Account.value_counts().hist(bins=)