In [1]:
import collections
import csv
import itertools
import matplotlib.pyplot as plt
import json
import os
import pathlib
import re

import pandas as pd


def extract_words_numbers(s):
    return re.findall(r"[a-zA-Z]+|[0-9]+|\$+(?![a-zA-Z0-9])", s)


def split_at_capital(s):
    return re.sub(r"([a-z])([A-Z])", r"\1 \2", s).split()


def tokenize(s):
    return [x.lower() for y in extract_words_numbers(s) for x in split_at_capital(y)]


def slug_to_url(slug):
    return f"https://opensea.io/collection/{slug}"

In [2]:
print(extract_words_numbers("aa12 Bc! #Net$"))
print(split_at_capital("SuperCoolNFT"))
print(tokenize("moneyTake123 #BadApple$$ $N$"))

['aa', '12', 'Bc', 'Net', '$']
['Super', 'Cool', 'NFT']
['money', 'take', '123', 'bad', 'apple', '$$', 'n', '$']


In [3]:
import snowflake.connector as sf_conn

_SNOWFLAKE_CONTEXT = None


def snowflake_run_query(sql):
    global _SNOWFLAKE_CONTEXT
    if _SNOWFLAKE_CONTEXT is None:
        _SNOWFLAKE_CONTEXT = sf_conn.connect(
            user="xiang_wu",
            password=os.getenv("SNOWFLAKE_PASSWORD"),
            account="opensea.us-east-1",
        )

    cs = _SNOWFLAKE_CONTEXT.cursor()
    cs.execute(sql)
    return cs.fetch_pandas_all()

In [4]:
MIN_30D_SALES = 1

ALL_COLLECTION_SQL = f"""
SELECT
  a.ID,
  a.NAME,
  a.DEFAULT_CHAIN_ID,
  a.SLUG,
  a.DESCRIPTION
FROM
  PC_FIVETRAN_DB.PUBLIC.API_COLLECTION a
  LEFT JOIN PC_FIVETRAN_DB.PUBLIC.API_COLLECTIONSTATS b
  ON a.ID = b.COLLECTION_ID
WHERE
  a.IS_DELISTED = FALSE
  AND a.HIDDEN = FALSE
  AND a.IS_NSFW = FALSE
  AND a.BLACKLISTED = FALSE
  AND b.THIRTY_DAY_SALES >= {MIN_30D_SALES}
"""

print(ALL_COLLECTION_SQL)


SELECT
  a.ID,
  a.NAME,
  a.DEFAULT_CHAIN_ID,
  a.SLUG,
  a.DESCRIPTION
FROM
  PC_FIVETRAN_DB.PUBLIC.API_COLLECTION a
  LEFT JOIN PC_FIVETRAN_DB.PUBLIC.API_COLLECTIONSTATS b
  ON a.ID = b.COLLECTION_ID
WHERE
  a.IS_DELISTED = FALSE
  AND a.HIDDEN = FALSE
  AND a.IS_NSFW = FALSE
  AND a.BLACKLISTED = FALSE
  AND b.THIRTY_DAY_SALES >= 1



In [5]:
RUN_SQL = False
ALL_COLLECTION_DF_FILE = pathlib.Path(f"/Users/shinewu/tmp/all_collection_min_30d_sales_{MIN_30D_SALES}.pickle")

if RUN_SQL:
    ALL_COLLECTION_DF = snowflake_run_query(ALL_COLLECTION_SQL)
    ALL_COLLECTION_DF.to_pickle(ALL_COLLECTION_DF_FILE)
else:
    ALL_COLLECTION_DF = pd.read_pickle(ALL_COLLECTION_DF_FILE)

In [6]:
ALL_COLLECTION_DF.iloc[100:120]

Unnamed: 0,ID,NAME,DEFAULT_CHAIN_ID,SLUG,DESCRIPTION
100,1924641,NiftyCookies,1,niftycookies,Is this art?
101,14579724,Hawaii Shot on Film,1,hawaii-shot-on-film,"The ""Hawaii Shot on Film"" is a 1/1 collection ..."
102,1290331,Lemaitre Series,1,lemaitre,"Evolution of an artist, this is an ever evolvi..."
103,4134603,"The Starry Night, 1889 by Vincent Van Gogh",3,the-starry-night-1889-by-vincent-van-gogh,"The Starry Night, 1889 by Vincent Van Gogh\nWi..."
104,675375,My Brother Is A Jerk.....,1,my-brother-is-a-jerk,Since I was a kid... my brother has been a jer...
105,14495850,Official Youtopia,1,official-youtopia,Youtopia is based on the concept of Web 3 Meta...
106,8466410,Crypto octupus,1,cryptooctupus,"Welcome to my ""Pulpitante"" collection"
107,14221798,PI - g9DkUvMr9B,3,pi-g9dkuvmr9b,
108,1516023,KlayChicken V1,2,klaychicken,### The Very First KLAYPROJECT - KlayChicken\n...
109,6298791,JiggyVerse,1,jiggyverse,"The JiggyVerse is an alternate universe of 9,4..."


In [7]:
KEYWORDS = """
APR
APY
Backed
Bond
Buyback
Casino
Coin
Currency
CUSIP
Debt
DeFi
Disbursement
Distribution
Dividend
Entitled
Exchange
Finance
Financial
Forex
Fraction
Gains
ICO
IDO
IEO
Insurance
Interest
Interest Bearing
Invest
Liquidity
Mining
Monetize
Monthly
Offering
Ownership
Passive Income
Payout
Profit
Profit Share
Redistribution
Returns
Rev Share
Revenue Share
ROI
Royalties
Royalty
Securities
Share
STO
Stock
USD
Weekly
Yield
""".lower().split(
    "\n"
)[
    1:-1
]

KEYWORDS = [x.split(" ") for x in KEYWORDS]
SINGLE_KEYWORDS = {x[0] for x in KEYWORDS if len(x) == 1}
DOUBLE_KEYWORDS = {tuple(x) for x in KEYWORDS if len(x) == 2}

print(KEYWORDS)

[['apr'], ['apy'], ['backed'], ['bond'], ['buyback'], ['casino'], ['coin'], ['currency'], ['cusip'], ['debt'], ['defi'], ['disbursement'], ['distribution'], ['dividend'], ['entitled'], ['exchange'], ['finance'], ['financial'], ['forex'], ['fraction'], ['gains'], ['ico'], ['ido'], ['ieo'], ['insurance'], ['interest'], ['interest', 'bearing'], ['invest'], ['liquidity'], ['mining'], ['monetize'], ['monthly'], ['offering'], ['ownership'], ['passive', 'income'], ['payout'], ['profit'], ['profit', 'share'], ['redistribution'], ['returns'], ['rev', 'share'], ['revenue', 'share'], ['roi'], ['royalties'], ['royalty'], ['securities'], ['share'], ['sto'], ['stock'], ['usd'], ['weekly'], ['yield']]


In [8]:
def model_v0(tokens):
    for kw in SINGLE_KEYWORDS:
        if any(token == kw for token in tokens):
            return kw

    for kw_pair in DOUBLE_KEYWORDS:
        if any(x == kw_pair for x in zip(tokens[:-1], tokens[1:])):
            return " ".join(kw_pair)

    return ""


def model_v0_label(row):
    for field in ["NAME", "DESCRIPTION"]:
        tokens = tokenize(str(row[field]))
        flag = model_v0(tokens)
        if flag:
            return flag
    return "SAFE"

In [9]:
desc = """
If you buy all these collections for $200k I will buy for double in the near future. It's a rarest collection of my NFT's
"""
tokens = tokenize(desc)
print(tokens)
model_v0(tokens)

['if', 'you', 'buy', 'all', 'these', 'collections', 'for', '200', 'k', 'i', 'will', 'buy', 'for', 'double', 'in', 'the', 'near', 'future', 'it', 's', 'a', 'rarest', 'collection', 'of', 'my', 'nft', 's']


''

In [10]:
ALL_COLLECTION_DF["label"] = ALL_COLLECTION_DF.apply(model_v0_label, axis=1)

In [11]:
def print_full(x):
    pd.set_option("display.max_rows", None)
    pd.set_option("display.max_columns", None)
    pd.set_option("display.width", 2000)
    pd.set_option("display.float_format", "{:20,.2f}".format)
    pd.set_option("display.max_colwidth", None)
    print(x)
    pd.reset_option("display.max_rows")
    pd.reset_option("display.max_columns")
    pd.reset_option("display.width")
    pd.reset_option("display.float_format")
    pd.reset_option("display.max_colwidth")

In [12]:
POSITIVES = ALL_COLLECTION_DF.query("label == 'buyback'")
print(
    f"Positive rate = {len(POSITIVES)} / {len(ALL_COLLECTION_DF)} = {len(POSITIVES) / len(ALL_COLLECTION_DF)}"
)
for row in itertools.islice(POSITIVES.itertuples(), 5):
    print(f"Link: https://opensea.io/collection/{row.SLUG}")
    print("Description: {", row.DESCRIPTION, "}")
    print("Label:", row.label)
    print("\n====================\n")

Positive rate = 5 / 38941 = 0.00012839937341105776
Link: https://opensea.io/collection/bumblebee0
Description: { Stake Bumblebee NFTs now on Bumblebee app. Download Bumblebee App at: 
https://urlzs.com/31qZQ

Unique set of NFTs build around the polygon network using the Bee meme to give back to the community. Bubblebee project main priority is to give back 20% of holding value after 7days using buyback clause. Bubblebee project will make investing simple, straightforward and most importantly,profitable. 

Note: 1. % increase in the value Creator sold it.
          2. Buy directly from Creator to be eligible for the 
               Creator's buyback clause.
           3.Only one NFT card can be sold back by each 
               account per day.
           4.Only accounts holding any one of our other NFTs 
              (NFTs that are not eligible for buyback) will be able 
               to trigger the buyback clause on Bumblebee.


Bumblebee ∆ is is not eligible for compulsory buyback 

In [13]:
# Cases don't matter, tokenizer will lower everything.
# Occurrences of terms on the same line are tallies in a single counter.
# "-" at the beginning merges current line with previous one.
TERMS_MONEY = """
Dollar, Dollars, Income, Incomes
Entitled, Entitlement, Entitlements
Gain, Gains, Payout, Profit, Profits, Return, Returns, Upside, Upsides
Financial Freedom, Financially Free
Make Money, Makes Money, Making Money, Money Maker, Money Making, Monetize
Never Lose, Never Loses, Never Losing, No Loss
Rev Share, Revenue Share, Revenue Sharing
Retire Early, Retiring Early, Early Retirement
"""

TERMS_TIME = """
Hourly, Every Hour
Daily, Every Day
Every Week, Weekly
Every Month, Monthly
Every Quarter, Quarterly
Annual, Anually, Every Year, Yearly
You Buy, I Will Buy, Buy This
"""

TERMS_HYPE = """
Chance, Chances, Opportunity, Opportunities
Enormous, Significant, Substantial
Good, Better, Best, Big, Bigger, Biggest, Great, Greater, Greatest, Large, Larger, Largest
Absolutely, Certainly, Guaranteed, No Doubt, Surely 
To The Moon
Alltime, All time, In A Lifetime, In Ages, Of The Year 
"""

TERMS_GAMBLE = """
Always Win, Always Winning
Bet, Betting, Bets
Casino
Odds
"""

TERMS_CRYPTO = """
Coin
DeFi
ICO, IDO, IEO
Liquidity Pool
Mine, Mining
Stake, Staking
Yield
"""

TERMS_FIN = """
Approved, Certified, Registered
APY, ROI
Bond, Bonds, Collateral, Debt, Debts, Finance, Financed, Financial, Financing
- Loan, Loans, Securities, Stock, Stocks
Buy back, Buyback
Currency, Forex, USD
CUSIP
Disbursement, Distribution, Dividend, Interest, Interest Bearing, Redistribution
Exchange, Invest, Investing, Investment, Investor
Fraction
Insurance
Liquidity Provider, Provide Liquidity, Providing Liquidity, Provides Liquidity
- Liquidity Taker, Take Liquidity, Taking Liquidity, Takes Liquidity
- Make Market, Making Market, Makes Market, Market Maker, Market Making
Offering
Ownership
Royalties, Royalty
"""

TERMS_ALL = "\n".join([TERMS_MONEY, TERMS_TIME, TERMS_GAMBLE, TERMS_CRYPTO, TERMS_FIN])

TERMS_CONFIG = {
    "MONEY": TERMS_MONEY,
    "TIME": TERMS_TIME,
    "HYPE": TERMS_HYPE,
    "GAMBLE": TERMS_GAMBLE,
    "CRYPTO": TERMS_CRYPTO,
    "FIN": TERMS_FIN
}

def parse_terms(terms):
    lines = terms.split("\n")
    forward = {}
    backward = collections.defaultdict(set)
    cur_id = -1
    line_i = 0
    for line in lines:
        if not line:
            continue

        if line_i == 0:
            assert line[0] != "-"
        if line[0] != "-":
            cur_id += 1
        if line[0] == "-":
            line = line[1:]

        items = [x.strip() for x in line.lower().split(",")]
        for item in items:
            if not item:
                continue
            seq = tuple(item.split(" "))
            forward[seq] = cur_id
            backward[cur_id].add(seq)
            
        line_i += 1

    return forward, backward

def build_terms():
    ret = {}
    for k, terms in TERMS_CONFIG.items():
        forward = parse_terms(terms)[0]
        max_i = max(forward.values())
        ret[k] = forward, (max_i + 1)
    return ret

TERMS_PARSER = build_terms()


def backward_to_list(backward):
    backward_list = [None] * len(backward)
    for k, v in backward.items():
        backward_list[k] = list(v)
    return backward_list

In [14]:
forward, backward = parse_terms(TERMS_ALL)
print(forward)
print(backward)
for k, v in backward.items():
    print(k, '->', v)

print(json.dumps(backward_to_list(backward)))

{('dollar',): 0, ('dollars',): 0, ('income',): 0, ('incomes',): 0, ('entitled',): 1, ('entitlement',): 1, ('entitlements',): 1, ('gain',): 2, ('gains',): 2, ('payout',): 2, ('profit',): 2, ('profits',): 2, ('return',): 2, ('returns',): 2, ('upside',): 2, ('upsides',): 2, ('financial', 'freedom'): 3, ('financially', 'free'): 3, ('make', 'money'): 4, ('makes', 'money'): 4, ('making', 'money'): 4, ('money', 'maker'): 4, ('money', 'making'): 4, ('monetize',): 4, ('never', 'lose'): 5, ('never', 'loses'): 5, ('never', 'losing'): 5, ('no', 'loss'): 5, ('rev', 'share'): 6, ('revenue', 'share'): 6, ('revenue', 'sharing'): 6, ('retire', 'early'): 7, ('retiring', 'early'): 7, ('early', 'retirement'): 7, ('hourly',): 8, ('every', 'hour'): 8, ('daily',): 9, ('every', 'day'): 9, ('every', 'week'): 10, ('weekly',): 10, ('every', 'month'): 11, ('monthly',): 11, ('every', 'quarter'): 12, ('quarterly',): 12, ('annual',): 13, ('anually',): 13, ('every', 'year'): 13, ('yearly',): 13, ('you', 'buy'): 14, (

In [15]:
print(TERMS_PARSER)

{'MONEY': ({('dollar',): 0, ('dollars',): 0, ('income',): 0, ('incomes',): 0, ('entitled',): 1, ('entitlement',): 1, ('entitlements',): 1, ('gain',): 2, ('gains',): 2, ('payout',): 2, ('profit',): 2, ('profits',): 2, ('return',): 2, ('returns',): 2, ('upside',): 2, ('upsides',): 2, ('financial', 'freedom'): 3, ('financially', 'free'): 3, ('make', 'money'): 4, ('makes', 'money'): 4, ('making', 'money'): 4, ('money', 'maker'): 4, ('money', 'making'): 4, ('monetize',): 4, ('never', 'lose'): 5, ('never', 'loses'): 5, ('never', 'losing'): 5, ('no', 'loss'): 5, ('rev', 'share'): 6, ('revenue', 'share'): 6, ('revenue', 'sharing'): 6, ('retire', 'early'): 7, ('retiring', 'early'): 7, ('early', 'retirement'): 7}, 8), 'TIME': ({('hourly',): 0, ('every', 'hour'): 0, ('daily',): 1, ('every', 'day'): 1, ('every', 'week'): 2, ('weekly',): 2, ('every', 'month'): 3, ('monthly',): 3, ('every', 'quarter'): 4, ('quarterly',): 4, ('annual',): 5, ('anually',): 5, ('every', 'year'): 5, ('yearly',): 5, ('you

In [16]:
def featureize(tokens):
    ret = {}
    for k, (parser, width) in TERMS_PARSER.items():
        counter = [0] * width
        for i in range(len(tokens)):
            for term_tuple, counter_i in parser.items():
                if tuple(tokens[i:(i + len(term_tuple))]) == term_tuple:
                    counter[counter_i] += len(term_tuple)
        ret[k] = [float(x) / len(tokens) for x in counter]
    return ret     

In [17]:
featureize(tokenize("Get rich every day with interest bearing"))

{'MONEY': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 'TIME': [0.0, 0.2857142857142857, 0.0, 0.0, 0.0, 0.0, 0.0],
 'HYPE': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 'GAMBLE': [0.0, 0.0, 0.0, 0.0],
 'CRYPTO': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 'FIN': [0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.42857142857142855,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0]}

In [18]:
def featurize_df(row):
    all_text = str(row['NAME']) + " 0 " + str(row["DESCRIPTION"])
    return row['SLUG'], featureize(tokenize(all_text))


ALL_FEATURES = ALL_COLLECTION_DF.apply(featurize_df, axis=1)

In [19]:
ALL_FEATURES_JOINED = dict(zip(ALL_COLLECTION_DF.ID, ALL_FEATURES))

In [20]:
def positive_heur(features):
    money = features['MONEY']
    time = features['TIME']
    hype = features['HYPE']
    gamble = features['GAMBLE']
    crypto = features['CRYPTO']
    fin = features['FIN']
    
    if sum(money) > 0.1 or sum(gamble) > 0.1 or sum(fin) > 0.1:
        return True
    if sum(time) > 0.05 or sum(hype) > 0.05:
        if sum(money) > 0.05 or sum(gamble) > 0.05 or sum(fin) > 0.05:
            return True
        
    return False

In [21]:
postive_flags = {k: positive_heur(v[1]) for k, v in ALL_FEATURES_JOINED.items()}
print(sum(postive_flags.values()))

55


In [22]:
for k, v in ALL_FEATURES_JOINED.items():
    if postive_flags[k]:
        print(slug_to_url(v[0]))
        # print(v)
        print()

https://opensea.io/collection/healing-coral-royalties-test

https://opensea.io/collection/winged-turtles

https://opensea.io/collection/invest-hacking-genesis-cards

https://opensea.io/collection/buy-back-my-fore-coffee

https://opensea.io/collection/giraffe-tower

https://opensea.io/collection/limited-free-distribution

https://opensea.io/collection/fegex

https://opensea.io/collection/return-by-aaron-penne

https://opensea.io/collection/bondgraphics

https://opensea.io/collection/passive-income-nft-33

https://opensea.io/collection/okay-goats

https://opensea.io/collection/investman-justforfun

https://opensea.io/collection/f5doitmusic

https://opensea.io/collection/warp-finance

https://opensea.io/collection/multinodechain

https://opensea.io/collection/arc-finance-twitter-party

https://opensea.io/collection/pickles

https://opensea.io/collection/gambling-gnomes-official

https://opensea.io/collection/attack-on-titan-2022

https://opensea.io/collection/dollar-land

https://opensea.

In [39]:
class BaseMatcher:
    def match(self, i, tokens):
        if i < 0 or not tokens or i >= len(tokens):
            return None
        else:
            return self._match_impl(i, tokens)


class SingleWordMatcher(BaseMatcher):
    def __init__(self, word):
        assert isinstance(word, str)
        self._word = word

    def _match_impl(self, i, tokens):
        return (i, i + 1) if tokens[i] == self._word else None


def cast_to_matcher(matcher):
    return SingleWordMatcher(matcher) if isinstance(matcher, str) else matcher


class MultiWordMatcher(BaseMatcher):
    def __init__(self, words):
        if not isinstance(words, set):
            words = set(words)
        self._words = words

    def _match_impl(self, i, tokens):
        return (i, i + 1) if tokens[i] in self._words else None

    def extend(self, new_words):
        if not isinstance(new_words, set):
            new_words = set(new_words)
        return MultiWordMatcher(self._words | new_words)


class SeqMatcher(BaseMatcher):
    def __init__(self, matcher_list):
        self._matcher_list = [cast_to_matcher(matcher) for matcher in matcher_list]

    def _match_impl(self, i, tokens):
        start = i
        j = 0
        while i < len(tokens):
            match = self._matcher_list[j].match(i, tokens)
            if match is None:
                return None
            i = match[1]
            j += 1
            if j == len(self._matcher_list):
                return start, match[1]
        return None


class DistanceMatcher(BaseMatcher):
    def __init__(self, distances, matcher1, matcher2):
        assert all(x < y for x, y in zip(distances[:-1], distances[1:]))
        self._distances = distances
        self._matcher1 = cast_to_matcher(matcher1)
        self._matcher2 = cast_to_matcher(matcher2)

    def _match_impl(self, i, tokens):
        match1 = self._matcher1.match(i, tokens)
        if match1 is None:
            return None
        for j in self._distances:
            k = match1[1] + j
            if k >= len(tokens):
                break
            match2 = self._matcher2.match(k, tokens)
            if match2 is not None:
                return i, match2[1]
        return None


def make_distance_matcher(max_distance, matcher1, matcher2, min_distance=0):
    return DistanceMatcher(
        list(range(min_distance, max_distance + 1)), matcher1, matcher2
    )


class FirstMatcher(BaseMatcher):
    def __init__(self, matchers):
        self._matchers = [cast_to_matcher(matcher) for matcher in matchers]

    def _match_impl(self, i, tokens):
        for matcher in self._matchers:
            match = matcher.match(i, tokens)
            if match is not None:
                return match
        return None


def match_all(matcher, tokens):
    i = 0
    while i < len(tokens):
        match = matcher.match(i, tokens)
        if match is not None:
            yield match
            i = match[1]
        else:
            i += 1


MONEY_MATCHER = MultiWordMatcher(
    {
        "$",
        "$$",
        "$$$",
        "$s",
        "bitcoin",
        "btc",
        "btcs",
        "cash",
        "dollar",
        "dollars",
        "money",
        "moneys",
        "sol",
        "sols",
        "usd",
        "usds",
    }
)

INTEREST_MATCHER = MultiWordMatcher(
    {
        "coupon",
        "coupons",
        "disimbursement",
        "disimbursements",
        "distribution",
        "distributions",
        "dividend",
        "dividends",
        "gain",
        "gains",
        "income",
        "incomes",
        "interest",
        "interests",
        "payment",
        "payments",
        "payout",
        "payouts",
        "profit",
        "profits",
        "royalties",
        "royalty",
        "upside",
        "upsides",
        "yield",
        "yields",
    }
)

PAYMENT_MATCHER = FirstMatcher([MONEY_MATCHER, INTEREST_MATCHER])

GET_MATCHER = MultiWordMatcher({"get", "gets", "getting", "got"})
GUARANTEE_MATCHER = MultiWordMatcher(
    {"guarantee", "guarantees", "guaranteeing", "guaranteed"}
)
MAKE_MATCHER = MultiWordMatcher({"make", "makes", "making", "made"})
PAY_MATCHER = MultiWordMatcher({"pay", "pays", "paying", "paid"})
RECEIVE_MATCHER = MultiWordMatcher({"receive", "receives", "receiving", "received"})
SEND_MATCHER = MultiWordMatcher({"send", "sends", "sending", "sent"})
TAKE_MATCHER = MultiWordMatcher({"take", "takes", "taking", "took", "taken"})

ACTIVE_GET_MATCHER = FirstMatcher(
    [
        GET_MATCHER,
        GUARANTEE_MATCHER,
        MAKE_MATCHER,
        PAY_MATCHER,
        RECEIVE_MATCHER,
        SEND_MATCHER,
        TAKE_MATCHER,
    ]
)
ACTIVE_GET_MONEY_MATCHER = make_distance_matcher(
    4,
    ACTIVE_GET_MATCHER,
    PAYMENT_MATCHER,
)

PASSIVE_GET_MATCHER = MultiWordMatcher(
    {"guaranteed", "made", "paid", "received", "sent", "taken"}
)
PASSIVE_GET_MONEY_MATCHER = make_distance_matcher(
    2,
    PAYMENT_MATCHER,
    PASSIVE_GET_MATCHER,
)

FINANCIAL_FREEDOM_MATCHER = make_distance_matcher(
    2,
    MultiWordMatcher({"finance", "financial"}),
    "freedom",
)

RETIRE_MATCHER = MultiWordMatcher({"retire", "retires", "retiring", "retired"})
RETIRE_EARLY_MATCHER = make_distance_matcher(
    2,
    RETIRE_MATCHER,
    "early",
)

SECURITY_MATCHER = MultiWordMatcher(
    {
        "bond",
        "bonds",
        "commodities",
        "commodity",
        "contract",
        "contracts",
        "currencies",
        "currency",
        "derivative",
        "derivatives",
        "option",
        "options",
        "securities",
        "security",
        "stock",
        "stocks",
    }
)

BE_MATCHER = MultiWordMatcher({"be", "is", "are", "was", "were"})
INCREASE_MATCHER = MultiWordMatcher(
    {
        "double",
        "doubling",
        "grow",
        "growing",
        "increase",
        "increasing",
        "multiply",
        "multiplying",
        "triple",
        "tripling",
    }
)
FINANCIAL_INSTRUMENTS_MATCHER = FirstMatcher(
    [
        MultiWordMatcher(
            {
                "cusip",
                "forex",
                "ico",
                "ido",
                "ieo",
                "ipo",
                "roi",
            }
        ),
        make_distance_matcher(
            4,
            BE_MATCHER,
            FirstMatcher(
                [
                    MultiWordMatcher(
                        {
                            "investing"
                            "investment",
                            "investments",
                            "lucrative",
                            "profitable",
                        }
                    ),
                    SeqMatcher(
                        [
                            MultiWordMatcher({"money", "profit"}),
                            "making",
                        ]
                    ),
                ]
            ),
        ),
        make_distance_matcher(
            3,
            INCREASE_MATCHER,
            MultiWordMatcher({"investment", "investments", "money", "moneys"}),
        ),
        make_distance_matcher(
            2,
            SECURITY_MATCHER.extend({"coin", "crypto"}),
            "offering",
        ),
        SeqMatcher(
            [
                MultiWordMatcher({"profit", "rev", "revenue"}),
                MultiWordMatcher({"share", "sharing"}),
            ]
        ),
    ]
)

ACTIVE_TRADE_MATCHER = MultiWordMatcher(
    {
        "buy",
        "buying",
        "bought",
        "sell",
        "selling",
        "sold",
        "trade",
        "trading",
        "traded",
        "transact",
        "transacting",
        "transacted",
    }
)
PASSIVE_TRADE_MATCHER = MultiWordMatcher({"bought", "sold", "traded", "transacted"})

ACTIVE_TRADE_SECURITY_MATCHER = make_distance_matcher(
    4,
    ACTIVE_TRADE_MATCHER,
    SECURITY_MATCHER,
)
PASSIVE_TRADE_SECURITY_MATCHER = make_distance_matcher(
    2,
    SECURITY_MATCHER,
    PASSIVE_TRADE_MATCHER,
)

In [40]:
list(match_all(ACTIVE_GET_MONEY_MATCHER, tokenize("paying you top dollar and you get paid $$$ and you receive monthly payment")))

[(0, 4), (6, 9), (11, 14)]

In [41]:
FINANCIAL_FREEDOM_MATCHER.match(0, tokenize("financial total freedom!"))

(0, 3)

In [42]:
RETIRE_EARLY_MATCHER.match(0, tokenize("Retires pretty sure early!"))

(0, 4)

In [43]:
PASSIVE_TRADE_SECURITY_MATCHER.match(0, tokenize("stocks will be sold to him"))

(0, 4)

In [44]:
def label_with_matcher(row):
    all_text = str(row["NAME"]) + " " + str(row["DESCRIPTION"])
    all_tokens = tokenize(all_text)


    for label, matcher in [
        ("ACTIVE_GET", ACTIVE_GET_MONEY_MATCHER),
        ("PASSIVE_GET", PASSIVE_GET_MONEY_MATCHER),
        ("FINANCIAL_FREEDOM", FINANCIAL_FREEDOM_MATCHER),
        ("RETIRE_EARLY", RETIRE_EARLY_MATCHER),
        ("FINANCIAL_INSTRUMENTS", FINANCIAL_INSTRUMENTS_MATCHER),
        ("ACTIVE_TRADE", ACTIVE_TRADE_SECURITY_MATCHER),
        ("PASSIVE_TRADE", PASSIVE_TRADE_SECURITY_MATCHER),
    ]:
        matches = list(match_all(matcher, all_tokens))
        if matches:
            return label, all_tokens[matches[0][0]:matches[0][1]]
    
    return "SAFE", []


LABELS_WITH_MATCHER = ALL_COLLECTION_DF.apply(label_with_matcher, axis=1)

In [45]:
desc = '''


Aiyah Verse Stickman-X Genesis 465 out of 666 Genesis Stickman-X have awakened on the Ethereum ERC721 blockchain. Nobody know when the rest will do so. These Stickman-X are the origins of the vast Aiyah Verse and will allow their masters to travel between the worlds within. New worlds are uncovered as time moves forward... to explore the new or to dominate the old, their fate in the Aiyah Verse is yours to decide.

Holders of the Stickman-X Genesis will be entitled to profit-sharing in our upcoming mobile game - Crypto Warframe as well as secondary sale revenue-sharing in our upcoming ID NFTs to be used in the game


'''

print(list(match_all(FINANCIAL_INSTRUMENTS_MATCHER, tokenize(desc))))

[(87, 89), (101, 103)]


In [46]:
num_printed = 0
for row, (label, matched) in zip(ALL_COLLECTION_DF.itertuples(), LABELS_WITH_MATCHER):
    if label != "SAFE":
        if num_printed < 10:
            print(label, matched, slug_to_url(row.SLUG))
            print(row.NAME, row.DESCRIPTION)
            print("==========")
        num_printed += 1

print("total =", num_printed)

ACTIVE_GET ['take', 'some', 'of', 'our', 'profit'] https://opensea.io/collection/ordinary-goldfish
Ordinary Goldfish Exclusive hand-drawn Goldfish Collections!

All the goldfish are absolutely unique and Exclusive,
Exclusively Drawn with love and passion!

Utility : We sell and we buy !
We sell our NFT and we take some of our profit to buy and support other small NFT artist
ACTIVE_GET ['get', 'merch', 'royalties'] https://opensea.io/collection/3d-pixel-bearcats
3D Pixel Bearcats #The OFFICIAL 3D Pixel Bearcats NFT Collection
*Soon to be playable characters within the Sandbox Metaverse!*

Purchase a 3D Pixel Bearcat on the Ethereum blockchain to reserve your spot in the Pixel Bearcats Metaverse! - *by TwistedColorTheory*

*CHECK OUT OUR NFT ROADMAP!*

____

CLASSIC Pixel Bearcats NFT Collection - https://opensea.io/collection/pixel-bearcats

____

#UTILITY

HOLDERS GET MERCH ROYALTIES!

ONLY PAY GAS FEE TO MINT YOUR 3D PLAYABLE AVATAR WHEN RELEASED ON OPENSEA!

YOU WILL BE SENT THE CLAS