/
scraper.go
123 lines (99 loc) · 2.73 KB
/
scraper.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
package scraper
import (
"sort"
"strconv"
"sync"
"time"
"github.com/alethio/web3-go/ethrpc/provider/httprpc"
"github.com/pkg/errors"
"github.com/Alethio/memento/data"
"github.com/alethio/web3-go/ethrpc"
"github.com/sirupsen/logrus"
)
var log = logrus.WithField("module", "scraper")
type Config struct {
NodeURL string
EnableUncles bool
}
type Scraper struct {
config Config
conn *ethrpc.ETH
}
func New(config Config) (*Scraper, error) {
batchLoader, err := httprpc.NewBatchLoader(0, 4*time.Millisecond)
if err != nil {
return nil, errors.Wrap(err, "could not init batch loader")
}
provider, err := httprpc.NewWithLoader(config.NodeURL, batchLoader)
if err != nil {
return nil, errors.Wrap(err, "could not init httprpc provider")
}
provider.SetHTTPTimeout(5000 * time.Millisecond)
c, err := ethrpc.New(provider)
if err != nil {
return nil, errors.Wrap(err, "could not init ethrpc")
}
return &Scraper{
config: config,
conn: c,
}, nil
}
// Exec does the JSONRPC calls necessary for scraping a given block and returns the raw data
// It:
// - scrapes the block using eth_getBlockByNumber
// - for each transaction in the block, scrapes the receipts using eth_getTransactionReceipt
// - for each uncle in the block, scrapes the data using eth_getUncleByBlockHashAndIndex
func (s *Scraper) Exec(block int64) (*data.FullBlock, error) {
log = log.WithField("block", block)
b := &data.FullBlock{}
log.Debug("getting block")
start := time.Now()
dataBlock, err := s.conn.GetBlockByNumber("0x" + strconv.FormatInt(block, 16))
if err != nil {
log.Error(err)
return nil, err
}
b.Block = dataBlock
log.WithField("duration", time.Since(start)).Debug("got block")
log.Debug("getting receipts")
start = time.Now()
var wg sync.WaitGroup
var errs []error
var mu sync.Mutex
for _, tx := range dataBlock.Transactions {
wg.Add(1)
txCopy := tx
go func() {
defer wg.Done()
dataReceipt, err := s.conn.GetTransactionReceipt(txCopy.Hash)
if err != nil {
errs = append(errs, err)
return
}
mu.Lock()
b.Receipts = append(b.Receipts, dataReceipt)
mu.Unlock()
}()
}
wg.Wait()
sort.Sort(b.Receipts)
log.WithField("duration", time.Since(start)).Debugf("got %d receipts", len(b.Receipts))
if len(errs) > 0 {
return nil, errs[0]
}
if s.config.EnableUncles {
log.Debug("getting uncles")
start = time.Now()
for idx := range dataBlock.Uncles {
dataUncle, err := s.conn.GetUncleByBlockHashAndIndex(b.Block.Hash, "0x"+strconv.FormatInt(int64(idx), 16))
if err != nil {
log.Error(err)
return nil, err
}
b.Uncles = append(b.Uncles, dataUncle)
}
log.WithField("duration", time.Since(start)).Debugf("got %d uncles", len(b.Uncles))
}
log.Debug("done scraping block")
return b, nil
}