This repository has been archived by the owner on May 19, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse.py
40 lines (33 loc) · 1.4 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import re
def extract_address(sentence):
adresses = []
# remove all non ascii characters
sentence = re.sub(r'[^\x00-\x7F]+', '#', sentence)
# remove all non alphanumeric characters
sentence = re.sub(r'[^a-zA-Z0-9.-]+', '#', sentence)
# if the sentence contains .eth
if ".eth" in sentence:
words = re.split("#", sentence)
for word in words:
if word.endswith(".eth") and len(word) > 4:
adresses.append(word)
return adresses
def extract_address_from_item(item):
# Simple text in name and description(bio)
address = set(extract_address(item["name"]))
address = address.union(extract_address(item["description"]))
# Links in name and description(bio)
if "entities" in item:
entities = item["entities"]
if "description" in entities and "urls" in entities["description"]:
urls = entities["description"]["urls"]
for url in urls:
if "expanded_url" in url:
address = address.union(extract_address(url["expanded_url"]))
# Don't have to check for URL/Location .eth entries
# if "url" in entities and "urls" in entities["url"]:
# urls = entities["url"]["urls"]
# for url in urls:
# if "expanded_url" in url:
# address = address.union(extract_address(url["expanded_url"]))
return address