In [47]:
# data manipulation
import pandas as pd
import numpy as np

import nltk

from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.decomposition import LatentDirichletAllocation
from sklearn import preprocessing

from nltk.corpus import stopwords
from nltk import SnowballStemmer
import string

from pprint import pprint  # PRETTY PRINT long dictionaries

import re # REGEX remove non-alphanumeric

In [None]:
df = pd.read_csv("../raw/Onboard_Survey.csv")

df.head()

In [None]:
# skip first seven columns df.iloc[:, 0:7].head()

# selecting only open-ended responses 
df.iloc[:, 6:14].head()

open_ended = df.iloc[:, 6:14]

open_ended.head()

In [29]:
# rename columns to better manage columns
open_ended.columns = ['walletwhat_walletwhy', 'wallet_pain', 'defi_when', 'defiwhat_defiwhy', 'defi_pain', 'defi_outcome', 'defi_interest', 'defi_endgame']


open_ended.head()

Unnamed: 0,walletwhat_walletwhy,wallet_pain,defi_when,defiwhat_defiwhy,defi_pain,defi_outcome,defi_interest,defi_endgame
0,Trezor - needed cold storage.,keeping up with all the security parameters,Within the last year,uniswap - seems to have a stellar reputation.,Learning how to navigate web3 websites.,Discovered new financial products and revenue ...,Alchemix,Passive income through DeFi
1,"Trustwallet, was not knowing much,","still not coming to terms, which wallet to use...",I have never used DeFi,,,,,
2,"Coinbase, ease of transactions",,Within the last year,,,,AAVE,Move my traditional investments over
3,"trezor, it just works and its secure","setting up is painful, and dealing with the se...",Within the last year,"Uniswap, i had to trade between assets",Gas fees are fluctuating each second,lost money from weird protocols,Options,Become a DeFi native and have more DeFi assets...
4,Coinbase bc it was a whileee ago,"Feees, centralization etc",3-5 years ago,,,,,


In [None]:
# open_ended['defi_interest']

# Split strings in each row into individual words

defi_interest_list = []

for row in open_ended['defi_interest']:
    try:
        defi_interest_list.append(row.split())
    except:
        continue
        
defi_interest_list # nested list

In [31]:
# flatten nested list

defi_interest_list_flat = [item for sublist in defi_interest_list for item in sublist]

defi_interest_list_flat

['Alchemix',
 'AAVE',
 'Options',
 'Pancakebunny,',
 'Pancake,',
 'Uniswap',
 'barnbridge',
 'looks',
 'cool.',
 'waiting',
 'for',
 'it',
 'to',
 'get',
 'to',
 'polygon',
 'Anything',
 'from',
 'Bankless',
 'DAO.',
 'Alchemix',
 'Lending',
 '(Aave,',
 'Compound,',
 'Cream)',
 'Alchemix',
 'Alchemix,',
 'specifically',
 'alETH.',
 'And',
 'Curve',
 'once',
 'it',
 'makes',
 'more',
 'sense',
 'for',
 'me',
 'to',
 'hold',
 'DAI',
 'instead',
 'of',
 'USD.',
 'any,',
 'all,',
 'still',
 'learning',
 'the',
 'differences',
 'Alchemix',
 'Zerion;',
 'Sushi',
 'Poly',
 'farms;',
 'liquidity',
 'Uniswap',
 'v3',
 'Alchemix',
 'Hmmmm....The',
 'one',
 'that',
 "haven't",
 'found',
 'yet?',
 'Free',
 'money',
 'printer?',
 'L2',
 'apps',
 'like',
 'Immutable',
 'X,',
 'Loopring,',
 'zkSync',
 'sushiswap',
 'with',
 'all',
 'the',
 'new',
 'features',
 'Rocket',
 'Pool,',
 'the',
 'decentralized',
 'staking',
 'service',
 'that',
 'I',
 'see',
 'as',
 'the/a',
 'future',
 'pillar',
 'of',
 't

In [44]:
# lower case
final_defi_interest_list = []


for word in defi_interest_list_flat:
    l = word.lower()
    final_defi_interest_list.append(l)
    
final_defi_interest_list

['alchemix',
 'aave',
 'options',
 'pancakebunny,',
 'pancake,',
 'uniswap',
 'barnbridge',
 'looks',
 'cool.',
 'waiting',
 'for',
 'it',
 'to',
 'get',
 'to',
 'polygon',
 'anything',
 'from',
 'bankless',
 'dao.',
 'alchemix',
 'lending',
 '(aave,',
 'compound,',
 'cream)',
 'alchemix',
 'alchemix,',
 'specifically',
 'aleth.',
 'and',
 'curve',
 'once',
 'it',
 'makes',
 'more',
 'sense',
 'for',
 'me',
 'to',
 'hold',
 'dai',
 'instead',
 'of',
 'usd.',
 'any,',
 'all,',
 'still',
 'learning',
 'the',
 'differences',
 'alchemix',
 'zerion;',
 'sushi',
 'poly',
 'farms;',
 'liquidity',
 'uniswap',
 'v3',
 'alchemix',
 'hmmmm....the',
 'one',
 'that',
 "haven't",
 'found',
 'yet?',
 'free',
 'money',
 'printer?',
 'l2',
 'apps',
 'like',
 'immutable',
 'x,',
 'loopring,',
 'zksync',
 'sushiswap',
 'with',
 'all',
 'the',
 'new',
 'features',
 'rocket',
 'pool,',
 'the',
 'decentralized',
 'staking',
 'service',
 'that',
 'i',
 'see',
 'as',
 'the/a',
 'future',
 'pillar',
 'of',
 't

In [48]:
# remove non-alpha numeric
final_defi_interest_list2 = []

for word in final_defi_interest_list:
    a = re.sub("[^a-zA-Z0-9]+", "", word)   # Regex
    final_defi_interest_list2.append(a)
    
final_defi_interest_list2

['alchemix',
 'aave',
 'options',
 'pancakebunny',
 'pancake',
 'uniswap',
 'barnbridge',
 'looks',
 'cool',
 'waiting',
 'for',
 'it',
 'to',
 'get',
 'to',
 'polygon',
 'anything',
 'from',
 'bankless',
 'dao',
 'alchemix',
 'lending',
 'aave',
 'compound',
 'cream',
 'alchemix',
 'alchemix',
 'specifically',
 'aleth',
 'and',
 'curve',
 'once',
 'it',
 'makes',
 'more',
 'sense',
 'for',
 'me',
 'to',
 'hold',
 'dai',
 'instead',
 'of',
 'usd',
 'any',
 'all',
 'still',
 'learning',
 'the',
 'differences',
 'alchemix',
 'zerion',
 'sushi',
 'poly',
 'farms',
 'liquidity',
 'uniswap',
 'v3',
 'alchemix',
 'hmmmmthe',
 'one',
 'that',
 'havent',
 'found',
 'yet',
 'free',
 'money',
 'printer',
 'l2',
 'apps',
 'like',
 'immutable',
 'x',
 'loopring',
 'zksync',
 'sushiswap',
 'with',
 'all',
 'the',
 'new',
 'features',
 'rocket',
 'pool',
 'the',
 'decentralized',
 'staking',
 'service',
 'that',
 'i',
 'see',
 'as',
 'thea',
 'future',
 'pillar',
 'of',
 'the',
 'defi',
 'zapperfi',

In [51]:
tally = pd.Series(final_defi_interest_list2).value_counts()

type(tally)

pandas.core.series.Series

In [52]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(tally)

alchemix              34
aave                  29
uniswap               26
to                    22
the                   21
and                   14
that                  13
like                  12
i                     11
on                     9
of                     8
polygon                8
is                     7
                       7
v3                     7
with                   6
in                     6
a                      6
defi                   5
sushi                  5
are                    5
zapper                 5
sushiswap              5
curve                  5
one                    5
yearn                  5
yield                  4
it                     4
id                     4
for                    4
dydx                   4
zapperfi               4
all                    4
things                 4
would                  4
liquity                4
aleth                  4
have                   4
anything               4
staking                4
