# Filtering fem/men tech companies based on keywords (U.S.)

In [21]:
# import pandas and numpy libraries
import pandas as pd
import numpy as np
import csv

# read in the trimmed database (U.S., no target or target ID duplicates, added fem/men tech column) as a dataframe
fulldf = pd.read_csv('fem_men_tech_db.csv')
fulldf = fulldf.loc[:, ~fulldf.columns.str.contains('^Unnamed')]
fulldf = fulldf.replace(np.nan, '', regex = True)
df = fulldf.copy()
df.head(10)

Unnamed: 0,Target,AnnounceDate,CloseDate,Fem/Men,Sector,TargetDescription
0,Medigistics,1/21/14,1/21/14,,,Provides multi-specialty medical billing and a...
1,Relatient,1/21/14,1/21/14,,Patient Communications,Provider of an online patient engagement platf...
2,API Healthcare Corporation,1/21/14,2/13/14,,Infrastructure Technology,Provides healthcare specific workforce managem...
3,Kareo,1/21/14,1/21/14,,PM/EMR,Provides cloud-based medical office software a...
4,Change Collective,1/21/14,,,Provider - NC,"Platform offers videos, articles, and podcasts..."
5,Quantum Health,1/22/14,1/22/14,,Content,Operates as a consumer navigation and care coo...
6,MDLive,1/22/14,1/22/14,,Telemed,A network of Board certified physicians and li...
7,Eyelation,1/22/14,1/22/14,,Employer,A provider of kiosks and web technology to pro...
8,Daymarck,1/23/14,,,,Provides home health care coding and auditing ...
9,Fitmob,1/23/14,1/23/14,,,Owns and operates an online community for fitn...


In [22]:
# find number of companies
count = len(df['TargetDescription'])
# change all descriptions to lowercase to match keywords
for i in range(count):
    df.at[i, 'TargetDescription'] = df['TargetDescription'][i].lower()
df.head(10)

Unnamed: 0,Target,AnnounceDate,CloseDate,Fem/Men,Sector,TargetDescription
0,Medigistics,1/21/14,1/21/14,,,provides multi-specialty medical billing and a...
1,Relatient,1/21/14,1/21/14,,Patient Communications,provider of an online patient engagement platf...
2,API Healthcare Corporation,1/21/14,2/13/14,,Infrastructure Technology,provides healthcare specific workforce managem...
3,Kareo,1/21/14,1/21/14,,PM/EMR,provides cloud-based medical office software a...
4,Change Collective,1/21/14,,,Provider - NC,"platform offers videos, articles, and podcasts..."
5,Quantum Health,1/22/14,1/22/14,,Content,operates as a consumer navigation and care coo...
6,MDLive,1/22/14,1/22/14,,Telemed,a network of board certified physicians and li...
7,Eyelation,1/22/14,1/22/14,,Employer,a provider of kiosks and web technology to pro...
8,Daymarck,1/23/14,,,,provides home health care coding and auditing ...
9,Fitmob,1/23/14,1/23/14,,,owns and operates an online community for fitn...


In [23]:
femKeys = list(np.genfromtxt('fem-keys.txt',dtype='str'))
menKeys = list(np.genfromtxt('men-keys.txt',dtype='str', delimiter = '\n'))
for i in range(len(menKeys)):
    if menKeys[i] == 'men' or menKeys[i] == 'hair':
        menKeys[i] = ' ' + menKeys[i] + ' '
bothKeys = list(np.genfromtxt('both-keys.txt', dtype='str'))
print('Only fem: ',femKeys)
print('Only men: ', menKeys)
print('Can be both: ', bothKeys)

Only fem:  ['women', 'woman', 'feminine', 'female', 'femtech', 'pregnancy', 'birth', 'fetus', 'vitro', 'maternity', 'maternal', 'egg', 'breast', 'feeding', 'embryo', 'ovulation', 'menstrual', 'menstruation', 'tampon', 'contraceptive', 'contraception']
Only men:  [' men ', ' hair ', 'male', 'sperm', 'erectile', 'paternal', 'paternity']
Can be both:  ['fertile', 'fertility', 'baby', 'babies', 'std', 'sex', 'sexual']


In [24]:
# label femtech companies
for i in range(count): 
    for j in femKeys:
        if j in df['TargetDescription'][i]:
            df.at[i, 'Fem/Men'] = 'F'
df.tail(10)

Unnamed: 0,Target,AnnounceDate,CloseDate,Fem/Men,Sector,TargetDescription
3255,Ride Health,1/7/20,1/7/20,,Other,provider and coordinator of transportation for...
3256,Ripple Science,1/7/20,1/7/20,,Clinical Trial Management,provider of saas software that is designed to ...
3257,MD-Reports,1/7/20,1/7/20,,PM/EMR,"provider of electronic medical record (emr), p..."
3258,GHR RevCycle Workforce,12/31/19,12/31/19,,RCM Services,"the company provides coding, clinical document..."
3259,WiserCare,11/22/19,11/22/19,,Decision Support,developer of an interactive patient decision s...
3260,Advantia Health,1/8/20,1/8/20,F,Telemed,operator of an integrated healthcare delivery ...
3261,Yaro,1/8/20,1/8/20,,Benefits Management,provider of benefits navigation and care guida...
3262,RecoveryOne,12/17/19,12/17/19,,Population Health - Chronic,developer of a medical software designed to tr...
3263,Humm,12/19/19,12/19/19,,Fitness,developer of brain stimulating wearable patch....
3264,Blue Mesa,1/9/20,1/9/20,,Care Management,developer of diabetes prevention digital platf...


In [25]:
# extract fem-only companies to new dataframe
dfFem = df.loc[df['Fem/Men'] == 'F']
countFem = len(dfFem)
print('Number of fem-only companies in database: ', countFem)
dfFem.head(10)

Number of fem-only companies in database:  43


Unnamed: 0,Target,AnnounceDate,CloseDate,Fem/Men,Sector,TargetDescription
203,Counsyl,5/8/14,5/8/14,F,Genomics,operates as a medical genomics company. the co...
227,BabyWatch (aka Bellabeat),5/23/14,5/23/14,F,Fitness,developer of heath tracking devices designed t...
393,Kindara,8/31/14,8/31/14,F,Fitness,developer of a fertility tracking application ...
396,WiserTogether,9/2/14,,F,,provides an interactive platform for health ca...
538,Celmatix,12/3/14,7/17/15,F,,a personalized medicine company that focuses o...
724,Maven Clinic,6/5/14,4/9/15,F,Telemed,provides a telemedicine platform for women.
838,Wildflower Health,6/25/15,6/25/15,F,Telemed,operates an online health engagement platform ...
961,Joylux,9/22/15,9/22/15,F,,"offers led-based therapeutic products, includi..."
1142,Mammosphere,1/26/16,1/1/16,F,,provides a medical image sharing network that ...
1498,Bloomlife,8/17/16,8/17/16,F,Fitness,developer of wearable pregnancy monitors.


In [26]:
dfFem.index

Int64Index([ 203,  227,  393,  396,  538,  724,  838,  961, 1142, 1498, 1541,
            1562, 1699, 1705, 1898, 1967, 1974, 2031, 2122, 2231, 2258, 2282,
            2358, 2412, 2448, 2463, 2466, 2508, 2615, 2632, 2799, 2816, 2835,
            2837, 2841, 2856, 2862, 2976, 3034, 3072, 3087, 3142, 3260],
           dtype='int64')

In [27]:
# export fem data to new csv
dfFem.to_csv('filtered_fem_db.csv')

In [28]:
# do same for men-only companies

In [29]:
# label them with M
for i in range(count): 
    for j in menKeys:
        if j in df['TargetDescription'][i] and df['Fem/Men'][i] == '':
            df.at[i, 'Fem/Men'] = 'M'

In [30]:
# extract all femtech-labeled companies to new dataframe
dfMen = df.loc[df['Fem/Men'] == 'M']
countMen = len(dfMen)
print('Number of mentech companies in database: ', countMen)
dfMen

Number of mentech companies in database:  4


Unnamed: 0,Target,AnnounceDate,CloseDate,Fem/Men,Sector,TargetDescription
2078,Roman Health Medical (dba Roman),10/31/17,10/31/17,M,Telemed,"a cloud pharmacy for erectile dysfunction, han..."
2290,Hims,3/2/18,3/2/18,M,Fitness,provider of wellness products intended to offe...
2635,ThirtyMadison,10/16/18,10/16/18,M,Telemed,an online virtual men's health platform focuse...
3037,Dadi,8/8/19,8/8/19,M,Fitness,provider of health services intended for male ...


In [31]:
# export to new csv
dfMen.to_csv('filtered_men_db.csv')

In [32]:
# do same for potentially both gender companies

In [33]:
# label with B
for i in range(count): 
    for j in bothKeys:
        if j in df['TargetDescription'][i] and df['Fem/Men'][i] == '':
            df.at[i, 'Fem/Men'] = 'B'        

In [34]:
# extract to new dataframe
dfBoth = df.loc[df['Fem/Men'] == 'B']
countBoth = len(dfBoth)
print('Number of companies likely for general parenting in database: ', countBoth)
dfBoth

Number of companies likely for general parenting in database:  12


Unnamed: 0,Target,AnnounceDate,CloseDate,Fem/Men,Sector,TargetDescription
169,Owlet Baby Care,4/22/14,4/22/14,B,Fitness,maker of a baby monitor that includes heart ra...
647,Sproutling,2/16/15,,B,,offers sensor system designed to wrap around a...
685,FertilityAuthority.com,3/12/15,3/12/15,B,,operates as a fertility web portal
999,Hatch Baby,10/15/15,10/15/15,B,Fitness,developer of smart wireless parenting devices ...
1462,Progyny,6/15/16,5/2/17,B,Benefits Management,provider of fertility benefits management serv...
1596,Renew.com,11/4/16,11/17/16,B,,operates a cloud-based senior benefits platfor...
1891,Wearless Tech (dba Cocoon Cam),6/28/17,6/28/17,B,Fitness,developer of an intelligent baby breathing vid...
2004,Carrot Fertility,9/14/17,9/14/17,B,Benefits Management,provider of a modern fertility platform intend...
2529,myLAB Box,8/16/18,8/16/18,B,Fitness,provider of at-home std testing kits intended ...
2623,Pr3vent,8/1/18,8/1/18,B,Decision Support,provider of an artificial intelligence based s...


In [35]:
# export to new csv
dfBoth.to_csv('filtered_both_db.csv')

In [36]:
for i in range(len(df)):
    if df['Fem/Men'][i] == "":
        df = df.drop([i], axis = 0)
df.head(10)
df.to_csv('filtered_all_db.csv')