In [47]:
import pandas as pd
from edge_list_builder import EdgeListBuilder
from wallet_builder import WalletBuilder
from wallet import Wallet
import csv

In [8]:
# edge-list-builder example:
## within the edge_list_builder.py, you can change the directory that you want to export the edgecsv
## EdgeListBuilder class take in a mapped-block-csv file, i.e. the file that already mapped string address to integer address
## for a mapped-block-csv-file, it can contain multiple block heights (you can concatenate the files yourself before building the edgecsv)
## here is a mapped-block-csv file, that the EdgeListBuilder can read (file name: block546634_546636_mapped.csv, this file contains block height 546634-546636)

In [5]:
mappedcsv="../mapped/block546634_546636_mapped.csv"
mapped_df=pd.read_csv(mappedcsv)
mapped_df

Unnamed: 0,tran_id,value,tran_type,block_id,addr
0,0,12.567255,output,546634,1
1,1,0.001637,input,546634,2
2,1,0.001062,input,546634,3
3,1,0.082945,input,546634,4
4,1,0.002452,input,546634,5
...,...,...,...,...,...
32939,3552,0.000003,input,546636,25267
32940,3552,0.000003,output,546636,25209
32941,3552,0.000003,output,546636,25266
32942,3552,0.000003,output,546636,25267


In [10]:
## edge-list-builder constructors
## there are two constructors in this class, you can either take all data from the block file or take partial data from the block file
## take all data from the block file: EdgeListBuilder.fromWhole(mappedcsv)
## take partial data from the block file: EdgeListBuilder.fromPartial(mappedcsv) 
## scenario: I usually use EdgeListBuilder.fromPartial for testing or when the computer memory is not enough.
## If you run the script on peach, .fromWhole should works fine even for a mappedcsv that contains lots of block height (eg. the above block546634_547756_mapped.csv). 
## Your mappedcsv file con contain only one block height as well
mappedcsv2="../mapped/block546634_mapped.csv"
## case1: build from whole data
eb = EdgeListBuilder.fromWhole(mappedcsv2)
## case2: build from partial data
eb2 = EdgeListBuilder.fromPartial(mappedcsv,546634,546635)



In [11]:
## if all your file's headers look the same as the mapped_df, you'll only need to call .constructEdgeListCSV() to construct the edgelist csv
eb.constructEdgeListCSV()
eb2.constructEdgeListCSV()
## if you're not sute where the files are, do:
print(eb.getCSVFileName())
print(eb2.getCSVFileName())


../edgecsv/block546634_546634_edge.csv
../edgecsv/block546634_546635_edge.csv


In [None]:
## to build edgecsv for multiple files (you can put the code in driver.py or create your own script), for example:

# mappedcsv_ls=["block570001_571667_mapped.csv","block571668_573333_mapped.csv","block573334_575000_mapped.csv"]
# directory="../mapped/fivesth_batch/"
# for m in mappedcsv_ls:
#     mappedcsv=directory+m
#     eb=EdgeListBuilder.fromWhole(mappedcsv)
#     eb.constructEdgeListCSV()
#     print("yielded edgecsv for  %s"%m)

In [None]:
# wallet builder example:
## this wallet builder can incrementally read the edgecsv files.
## It implements the union-find algorithm to group addr into a wallet. It records each addr's parent, and the child amount each parent has.
## This class has three constructors:
## (1) .new: need to use this when you build the wallet structure for the very first time, it takes a wallet-structure-file name you would like, and the number of addrs you want to initialize. I tested as large as 600,000,000..So, it should be able to accomodate all addrs in bitcoin ledger
## (2) .edit: need to use this when you want to add edges to the wallet structure, but no need to add verteces. Eg. If the first time I initialize the wallet structure to be as large as 600,000,000-addrs; then, in the future, every time I just need to open an existed wallet-structure-file, and pass in edgecsv files to add edges. It takes an existed wallet-structure-file name.
## (3) .edit_and_append: need this when you need to add more edges and add more addrs to the wallet structure. It takes an existed wallet-structure-file name, and the max addr you would like the wallet structure to include until. 

In [18]:
## (1) constructor case 1: .new, output wallet-structure file is test_walletStructure.csv, and initialize address number to 30 (this will include address 0-29)
wb=WalletBuilder.new("walletstr_test.csv",30)
## initially, each address is its onw parent, you can call .par varialbe to see the parent id array
wb.par

create a new wallet structure: walletstr_test.csv with 30 addrs


[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29]

In [22]:
## here is a peak of an xxxedge.csv that the wallet builder can make use of:
pd.read_csv("./testdata/test_edge.csv", header=None)
# so, the edges are (2,3); (2,4); ...

Unnamed: 0,0,1
0,2,3
1,2,4
2,2,5
3,2,6
4,2,7
5,2,8
6,2,9
7,2,10
8,2,11
9,2,12


In [26]:
## mainly, we insert each edge-pair in the edgcsv to the wallet builder
## the method to build edge is .unite
## these testdata are available in the ./testdata/ directory
directory="./testdata/"
edgecsvs=["test_edge.csv","test_edge2.csv","test_edge3.csv"]

for ec in edgecsvs:
    ecpath=directory+ec
    with open(ecpath) as f:
        readCSV=csv.reader(f,delimiter=',')
        for row in readCSV:
            i=int(row[0])
            j=int(row[1])
            wb.unite(i,j)

In [27]:
## if you need to take a break from reading edgecsv, you can save the process. By saving the process, we save the wallet structure, including the parent id(wb.par) and each parent's size(wb.sz) list. 
## In this example, parent id list is saved as walletstr_test,csv; size list is saved as walletstr_test_size.csv
## Next time, when you wan to resume wallet building, we can just read in the wallet structure
## To save the process:
wb.saveProcess()
## if you run the example above, after calling .saveProcess you should now see two csv files: 'walletstr_test.csv' and 'walletstr_test_size.csv'. 
## p.s. the size.csv file is named automatically according to the name you gave to the wallet builder constructor

In [28]:
## you can verify that after reading "test_edge.csv","test_edge2.csv","test_edge3.csv", the 'walletstr_test.csv' now looks different
## initially, each address is its own parent, now, 0-25,28 has 2 as their parent; 26's parent: 26; 27,29's parent: 27
pd.read_csv("walletstr_test.csv", header=None)

Unnamed: 0,0,1
0,0,2
1,1,2
2,2,2
3,3,2
4,4,2
5,5,2
6,6,2
7,7,2
8,8,2
9,9,2


In [29]:
## you can also verify the size of each parent (i.e. number of child each parent has)
pd.read_csv("walletstr_test_size.csv", header=None)

Unnamed: 0,0,1
0,0,1
1,1,1
2,2,27
3,3,1
4,4,1
5,5,1
6,6,1
7,7,1
8,8,1
9,9,1


In [38]:
## (2) constructor case 2: edit existed wallet strucutre
wb2=WalletBuilder.edit("walletstr_test.csv")
edgecsvs=["test_edge4.csv","test_edge5.csv"]

for ec in edgecsvs:
    ecpath=directory+ec
    with open(ecpath) as f:
        readCSV=csv.reader(f,delimiter=',')
        for row in readCSV:
            i=int(row[0])
            j=int(row[1])
            wb2.unite(i,j)
wb2.saveProcess()


edit exist wallet structure file: walletstr_test.csv


In [39]:
## verify that the walletstr_test.csv has changed
pd.read_csv("walletstr_test.csv", header=None)

Unnamed: 0,0,1
0,0,2
1,1,2
2,2,2
3,3,2
4,4,2
5,5,2
6,6,2
7,7,2
8,8,2
9,9,2


In [40]:
## verify that the walletstr_test_size.csv also changed
pd.read_csv("walletstr_test_size.csv", header=None)

Unnamed: 0,0,1
0,0,1
1,1,1
2,2,30
3,3,1
4,4,1
5,5,1
6,6,1
7,7,1
8,8,1
9,9,1


In [42]:
## constructor case 3: edit existed wallet strucutre and append more addrs. Here is an example that we edit the exist "walletstr_test_size.csv" and add addrs undil 32
wb3=WalletBuilder.edit_and_append("walletstr_test.csv",33)
## also, add an edge to test
wb3.unite(24,32)
wb3.saveProcess()

edit exist wallet structure file: walletstr_test.csv
extend to addr 33


In [45]:
## verify that the walletstr_test.csv and walletstr_test_size.csv has changed
pd.read_csv("walletstr_test.csv", header=None)

Unnamed: 0,0,1
0,0,2
1,1,2
2,2,2
3,3,2
4,4,2
5,5,2
6,6,2
7,7,2
8,8,2
9,9,2


In [44]:
pd.read_csv("walletstr_test_size.csv", header=None)

Unnamed: 0,0,1
0,0,1
1,1,1
2,2,31
3,3,1
4,4,1
5,5,1
6,6,1
7,7,1
8,8,1
9,9,1


In [48]:
# wallet example: when you finish feeding all edges into the wallet builder, you can then export the wallet csv
## the wallet object takes the wallet structure result from wallet builder, and it only needs the file that store the parent id. 
## mainly, it will find the root for all vertices, and we'll use these roots as the wallet id
## input file name:
walletstrcsv="walletstr_test.csv"
## desired output file name"
walletcsv="wallet_test.csv"

w=Wallet(walletstrcsv,walletcsv)
w.constructWalletCSV()

find walletID from exist wallet structure file: walletstr_test.csv


In [49]:
## verify the wallet_test.csv is correct
pd.read_csv("wallet_test.csv",header=None)
## first column is the addr id, second column is the wallet id
## it is shown that, comparing to "walletstr_test.csv", addr29 has changed, since its parent is 27, and 27's parent is 2. Thus, addr29's root is 2, i.e. its wallet id is 2

Unnamed: 0,0,1
0,0,2
1,1,2
2,2,2
3,3,2
4,4,2
5,5,2
6,6,2
7,7,2
8,8,2
9,9,2
