This is an example to show the use of unique_keyword_search() defined below.

In [1]:
# import library
import pandas as pd
from collections import Counter

In [2]:
df_org = pd.read_csv("../bulk_export_processed/cleaned_csv/organizations_cleaned.csv")

In [3]:
df_org.head()

Unnamed: 0,uuid,country_code,region,city,status,category_list,category_groups_list,num_funding_rounds,total_funding_usd,founded_on,last_funding_on,closed_on,employee_count,primary_role,num_exits
0,cf3a40e6-920e-edfd-569e-371f84e0a4e4,USA,California,San Francisco,operating,"Finance,Financial Services,Health Care,Venture...","Financial Services,Health Care,Lending and Inv...",,,2015-01-01,,,101-250,investor,24.0
1,fa65a572-1621-dd22-57a8-92bb49217ac5,GBR,England,London,operating,"Financial Services,FinTech,Venture Capital","Financial Services,Lending and Investments",,,2018-01-01,,,11-50,investor,10.0
2,74a20af3-f4dd-6188-de60-c4ee6cd0ca4a,CHN,Zhejiang,Hangzhou,operating,"Banking,E-Commerce,Financial Services,FinTech,...","Commerce and Shopping,Financial Services,Inter...",4.0,22000000000.0,2014-10-01,2018-06-08,,5001-10000,company,12.0
3,f33a3674-ec6b-14ca-16dc-437f280dc10b,USA,Virginia,Reston,operating,"E-Commerce,Financial Services,Information Tech...","Commerce and Shopping,Financial Services,Infor...",,,2015-01-01,,,1-10,investor,19.0
4,8fa7fd0d-d5cc-425d-52cc-a2019e7d42a3,USA,California,Santa Monica,operating,"Cyber Security,Developer APIs,FinTech,Software","Financial Services,Information Technology,Priv...",1.0,,2013-08-20,2014-01-23,,1-10,company,


In [4]:
# count the values of categories
org_series = df_org['category_list'].value_counts()
print(org_series)

Financial Services                                      1882
Finance,Financial Services                              1492
Financial Services,FinTech                               748
Finance,Financial Services,Venture Capital               692
Finance                                                  616
                                                        ... 
Financial Services,FinTech,Search Engine                   1
Blockchain,Cryptocurrency,Freelance                        1
Big Data,Blockchain,Internet,Machine Learning,Mobile       1
Blockchain,Marketplace,Sharing Economy                     1
Payments,QR Codes                                          1
Name: category_list, Length: 17450, dtype: int64


In [5]:
# Stardust ver unique keyword search
def unique_keyword_search(df_col: pd.Series, num_selected: int) -> list:
    '''
    Performs unique keyword search on a dataframe's column for its most common keywords

    :param pd.Series df_col: column of a pd.DataFrame (e.g. df['col'])
    :param int num_selected: number of keywords
    :return: list of keywords in decreasing occurrence
    :rtype: list
    '''
    keywords = []
    for entry in df_col:
        words = entry.split(',')
        [keywords.append(word) for word in words]
    sorted_keywords = Counter(keywords).most_common()
    output_keywords = []
    for i in range(num_selected):
        output_keywords.append(sorted_keywords[i][0])
    return output_keywords

In [6]:
fintech_keywords = unique_keyword_search(df_org['category_list'], 30)
fintech_keywords

['Financial Services',
 'FinTech',
 'Finance',
 'Blockchain',
 'Information Technology',
 'Software',
 'Payments',
 'Cryptocurrency',
 'Venture Capital',
 'Internet',
 'Banking',
 'Consulting',
 'Mobile Payments',
 'E-Commerce',
 'Insurance',
 'Bitcoin',
 'Artificial Intelligence',
 'Mobile',
 'Crowdfunding',
 'SaaS',
 'Real Estate',
 'Apps',
 'Personal Finance',
 'Accounting',
 'Mobile Apps',
 'Asset Management',
 'Marketplace',
 'Lending',
 'Big Data',
 'Machine Learning']