In [1]:
import pandas as pd
import numpy as np
import requests
import inflection as inf
from collections import OrderedDict

In [2]:
from nltk.corpus import wordnet as wn

In [3]:
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/dcomfort/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [4]:
pd.__version__

'0.25.3'

In [5]:
import warnings
warnings.filterwarnings('ignore')

# 1. Read raw data

In [6]:
df = pd.read_csv('data/ISS mandate - Raw data.csv')

In [7]:
test = df[['TI','DE','ID' , 'DI']].head(100)

In [8]:
test

Unnamed: 0,TI,DE,ID,DI
0,Two regimes of a single n-heptane droplet comb...,Combustion; Cool flame; Droplet; Mathematical ...,DIFFUSION COMBUSTION; COOL-FLAMES; MICROGRAVITY,10.1016/j.actaastro.2019.01.045
1,Desert cyanobacteria under space and planetary...,Chroococcidiopsis; desiccation tolerance; dese...,RADIATION-RESISTANCE; MARTIAN ATMOSPHERE; SYNT...,10.1017/S147355041800037X
2,Photolysis of Cometary Organic Dust Analogs on...,Astrobiology; Astrochemistry; Comets; Internat...,ULTRACARBONACEOUS ANTARCTIC MICROMETEORITES; I...,10.1089/ast.2018.1853
3,Diffusive Motion in a 3-D Cluster in PK-4,Diffusion; dusty plasma; microgravity; Plasmak...,ANOMALOUS DIFFUSION; DUST PARTICLES; DYNAMICS;...,10.1109/TPS.2019.2893155
4,Negative Effects of Long-duration Spaceflight ...,aerospace medicine; atrophy; back pain; comput...,LOW-BACK-PAIN; FAT INFILTRATION; MECHANICAL-PR...,10.1097/BRS.0000000000002959
...,...,...,...,...
95,CONTRIBUTION OF DIFFERENT PARTICLES MEASURED W...,,RADIATION; DOSIMETRY,10.1093/rpd/ncx189
96,Dust density waves in a dc flowing complex pla...,,ACOUSTIC-WAVES,10.1063/1.5040417
97,Capturing the Urban Divide in Nighttime Light ...,Deprived areas; Global South; International Sp...,SATELLITE IMAGERY; CHINA; SLUMS; DMSP,10.1109/JSTARS.2018.2828340
98,Space-Based Microgravity Experiments on Flame ...,Flame spread; Droplet cloud; Group combustion;...,COMBUSTION EXPERIMENTS ABOARD; FUEL; ARRAYS; P...,10.1007/s12217-018-9637-2


In [9]:
test['DE'][0]

'Combustion; Cool flame; Droplet; Mathematical modeling; Diffusion'

In [10]:
test['DE'] = test['DE'].str.replace('; ', ', ')
test['ID'] = test['ID'].str.replace('; ', ', ')

test['DE'] = test['DE'].str.replace('-', ' ')
test['ID'] = test['ID'].str.replace('-', ' ')

test['DE'] = test['DE'].fillna(' ')
test['ID'] = test['ID'].fillna(' ')

In [11]:
# 2. Combine keyword columns
test['keywords'] = test['DE'] + ', ' + test['ID']

In [12]:
# 3. lowercase keyword case
test['keywords'] = test['keywords'].str.lower()

test = test[['TI','keywords' , 'DI']]

In [13]:
# 4. singularize
# test['keywords'] = test['keywords'].apply(lambda x: ', '.join([inf.singularize(item) for item in x.split(', ')]))

In [14]:
# maybe lowercase afterwards
# POS tags - if noun don't singularize

In [15]:
# 5. Remove duplicates
# https://stackoverflow.com/questions/47316783/python-dataframe-remove-duplicate-words-in-the-same-cell-within-a-column-in-pyt
test['keywords'] = (test['keywords'].str.split(', ')
                              .apply(lambda x: OrderedDict.fromkeys(x).keys())
                              .str.join(', '))

In [16]:
# 6. Split strings
test['keywords'] = test['keywords'].str.split(', ', expand=False)

In [17]:
test['keywords'][0]

['combustion',
 'cool flame',
 'droplet',
 'mathematical modeling',
 'diffusion',
 'diffusion combustion',
 'cool flames',
 'microgravity']

In [18]:
test.shape[0]

100

In [19]:
# 7. pandas explode - Transform each element of a list-like to a row, replicating the index values.
test_ = test.explode('keywords')

test_ = test_[test_['keywords'] != ' ']

test_.reset_index(inplace=True, drop=True)

In [20]:
test_['keywords'].nunique()

842

In [21]:
test_['keywords']

0                       combustion
1                       cool flame
2                          droplet
3            mathematical modeling
4                        diffusion
                   ...            
1121                  microgravity
1122    spherical diffusion flames
1123                   cool flames
1124                    combustion
1125                       heptane
Name: keywords, Length: 1126, dtype: object

___

# Extract from Wordnet

In [22]:
from nltk.corpus import wordnet
wn_lemmas = set(wordnet.all_lemma_names())

def wordnet_exits(word):
    word = word.replace(' ', '_')
    if word in wn_lemmas:
        return True
    else:
        return False

In [23]:
wordnet_exits('microgravity')

False

In [24]:
def acquire_definition(keyword):
    define = wn.synsets(keyword)[0].definition()
    return define

In [25]:
def acquire_hypernym(keyword):
    hypernym = wn.synsets(keyword)[0].hypernyms()[0].name().split(".")[0]
    return hypernym

In [26]:
def acquire_wordnet_id(keyword):
    synset = wn.synsets(keyword)[0].name()
    ss = wn.synset(synset)
    offset = str(ss.offset()).zfill(8) + '-' + ss.pos()
    return offset

In [27]:
acquire_wordnet_id('combustion')


'13450206-n'

In [28]:
test_.reset_index(drop=True, inplace=True)

In [29]:
test_.head()

Unnamed: 0,TI,keywords,DI
0,Two regimes of a single n-heptane droplet comb...,combustion,10.1016/j.actaastro.2019.01.045
1,Two regimes of a single n-heptane droplet comb...,cool flame,10.1016/j.actaastro.2019.01.045
2,Two regimes of a single n-heptane droplet comb...,droplet,10.1016/j.actaastro.2019.01.045
3,Two regimes of a single n-heptane droplet comb...,mathematical modeling,10.1016/j.actaastro.2019.01.045
4,Two regimes of a single n-heptane droplet comb...,diffusion,10.1016/j.actaastro.2019.01.045


In [30]:
test_.shape[0]

1126

In [31]:
df_test_ = test_.copy()

In [32]:
df_test_.rename(columns={'TI': 'topic_title', 'keywords': 'definition', 
                       'DI': 'reference' }, inplace=True)

In [33]:
df_test_[df_test_['definition'].str.contains('^mar$', regex=True)]

Unnamed: 0,topic_title,definition,reference


In [34]:
df_test_['definition'] = df_test_['definition'].str.replace('^mar$', 'mars', regex=True)

In [35]:
df_test_[df_test_['definition'].str.contains('^mars', regex=True)]

Unnamed: 0,topic_title,definition,reference
13,Desert cyanobacteria under space and planetary...,mars like conditions,10.1017/S147355041800037X
21,Desert cyanobacteria under space and planetary...,mars,10.1017/S147355041800037X
741,"Survival, DNA, and Ultrastructural Integrity o...",mars conditions,10.1089/ast.2017.1728


In [36]:
# df_test_ = df_test_.head(10)

In [37]:
df_test_.shape[0]

1126

In [38]:
data = []
j = 1
for index, row in df_test_.iterrows():
    print('index: ', index, 'keywords: ', row['definition'])
    if wordnet_exits(row['definition']):
        word = row['definition']
        word = word.replace(' ', '_')
        print('Word exists in WordNet')
        print('*****************')
        print('j: ', j)
        key = 'T' + str(j)
        print('initial key: ', key)

        data.append(dict(zip(
            ('key',
             'type', 
             'topic_title', 
             'definition', 
             'reference',
            'wordnet_ID'), 
            (key,
            'topic',
             row['topic_title'],
             word,  
             '',
           acquire_wordnet_id(word))
        )))
        j = j + 1
        print('j: ', j)
        key = 'T' + str(j)
        print('key: ', key)

        data.append(dict(zip(
            ('key',
             'type', 
             'topic_title', 
             'definition', 
             'reference',
            'wordnet_ID'), 
            (key,
            'topic',
             word,
             acquire_definition(word),  
             '',
            acquire_wordnet_id(word))
        )))
        
        try:
            hypernym = acquire_hypernym(word)
            j = j + 1
            print('j: ', j)
            key = 'T' + str(j)
            print('key: ', key)

            data.append(dict(zip(
                ('key',
                 'type', 
                 'topic_title', 
                 'definition', 
                 'reference',
                'wordnet_ID'), 
                (key,
                'cluster',
                 hypernym,
                 acquire_definition(hypernym),  
                 '',
                acquire_wordnet_id(hypernym))
            )))
        except:
            pass

        j = j + 1
        print('j: ', j)
        key = 'T' + str(j)
        print('key: ', key)
        # repeat for i >= 4 through 9
        i = 4
#         while i <=9:
#             try:
#                 hypernym = acquire_hypernym(hypernym)
#                 key = 'T' + str(j)
#                 print('j: ', j)
#                 key = 'T' + str(j)
#                 print('key: ', key)

#                 data.append(dict(zip(
#                     ('key',
#                      'type', 
#                      'topic_title', 
#                      'definition', 
#                      'reference',
#                 'wordnet_ID'), 
#                     (key,
#                     'cluster',
#                      hypernym,
#                      acquire_definition(hypernym),  
#                      '',
#                 '')
#                 )))
#             except:
#                 pass
#             i = i +1
#             j = j + 1

        df_topic = pd.DataFrame(data)
    
    else:
        print('Word DOES NOT exists in WordNet')
        print('*****************')


index:  0 keywords:  combustion
Word exists in WordNet
*****************
j:  1
initial key:  T1
j:  2
key:  T2
j:  3
key:  T3
j:  4
key:  T4
index:  1 keywords:  cool flame
Word DOES NOT exists in WordNet
*****************
index:  2 keywords:  droplet
Word exists in WordNet
*****************
j:  4
initial key:  T4
j:  5
key:  T5
j:  6
key:  T6
j:  7
key:  T7
index:  3 keywords:  mathematical modeling
Word DOES NOT exists in WordNet
*****************
index:  4 keywords:  diffusion
Word exists in WordNet
*****************
j:  7
initial key:  T7
j:  8
key:  T8
j:  9
key:  T9
j:  10
key:  T10
index:  5 keywords:  diffusion combustion
Word DOES NOT exists in WordNet
*****************
index:  6 keywords:  cool flames
Word DOES NOT exists in WordNet
*****************
index:  7 keywords:  microgravity
Word DOES NOT exists in WordNet
*****************
index:  8 keywords:  chroococcidiopsis
Word DOES NOT exists in WordNet
*****************
index:  9 keywords:  desiccation tolerance
Word DOES NOT

j:  100
key:  T100
j:  101
key:  T101
j:  102
key:  T102
index:  100 keywords:  exploration
Word exists in WordNet
*****************
j:  102
initial key:  T102
j:  103
key:  T103
j:  104
key:  T104
j:  105
key:  T105
index:  101 keywords:  lengths and angles of fibers
Word DOES NOT exists in WordNet
*****************
index:  102 keywords:  spaceflight
Word exists in WordNet
*****************
j:  105
initial key:  T105
j:  106
key:  T106
j:  107
key:  T107
j:  108
key:  T108
index:  103 keywords:  ultrasonography
Word exists in WordNet
*****************
j:  108
initial key:  T108
j:  109
key:  T109
j:  110
key:  T110
j:  111
key:  T111
index:  104 keywords:  voluntary and evoked contractions
Word DOES NOT exists in WordNet
*****************
index:  105 keywords:  human skeletal muscle
Word DOES NOT exists in WordNet
*****************
index:  106 keywords:  cross sectional area
Word DOES NOT exists in WordNet
*****************
index:  107 keywords:  bed rest
Word exists in WordNet
******

index:  175 keywords:  spaceflight
Word exists in WordNet
*****************
j:  179
initial key:  T179
j:  180
key:  T180
j:  181
key:  T181
j:  182
key:  T182
index:  176 keywords:  microgravity
Word DOES NOT exists in WordNet
*****************
index:  177 keywords:  arabidopsis
Word exists in WordNet
*****************
j:  182
initial key:  T182
j:  183
key:  T183
j:  184
key:  T184
j:  185
key:  T185
index:  178 keywords:  transcriptome
Word DOES NOT exists in WordNet
*****************
index:  179 keywords:  dna methylation
Word DOES NOT exists in WordNet
*****************
index:  180 keywords:  epigenome
Word DOES NOT exists in WordNet
*****************
index:  181 keywords:  epigenetic
Word DOES NOT exists in WordNet
*****************
index:  182 keywords:  veggie
Word exists in WordNet
*****************
j:  185
initial key:  T185
j:  186
key:  T186
j:  187
key:  T187
j:  188
key:  T188
index:  183 keywords:  iss
Word DOES NOT exists in WordNet
*****************
index:  184 keyword

j:  335
key:  T335
j:  336
key:  T336
j:  337
key:  T337
index:  337 keywords:  degradation
Word exists in WordNet
*****************
j:  337
initial key:  T337
j:  338
key:  T338
j:  339
key:  T339
j:  340
key:  T340
index:  338 keywords:  expression
Word exists in WordNet
*****************
j:  340
initial key:  T340
j:  341
key:  T341
j:  342
key:  T342
j:  343
key:  T343
index:  339 keywords:  regulator
Word exists in WordNet
*****************
j:  343
initial key:  T343
j:  344
key:  T344
j:  345
key:  T345
j:  346
key:  T346
index:  340 keywords:  alpha magnetic spectrometer
Word DOES NOT exists in WordNet
*****************
index:  341 keywords:  geomagnetic reference field
Word DOES NOT exists in WordNet
*****************
index:  342 keywords:  dark matter
Word exists in WordNet
*****************
j:  346
initial key:  T346
j:  347
key:  T347
j:  348
key:  T348
j:  349
key:  T349
index:  343 keywords:  energy spectra
Word DOES NOT exists in WordNet
*****************
index:  344 keyw

j:  442
key:  T442
index:  425 keywords:  plasma membrane
Word exists in WordNet
*****************
j:  442
initial key:  T442
j:  443
key:  T443
j:  444
key:  T444
j:  445
key:  T445
index:  426 keywords:  binding protein
Word DOES NOT exists in WordNet
*****************
index:  427 keywords:  gamma enolase
Word DOES NOT exists in WordNet
*****************
index:  428 keywords:  acyl coenzyme
Word DOES NOT exists in WordNet
*****************
index:  429 keywords:  flame spread limit
Word DOES NOT exists in WordNet
*****************
index:  430 keywords:  flame spread
Word DOES NOT exists in WordNet
*****************
index:  431 keywords:  interactive burning droplets
Word DOES NOT exists in WordNet
*****************
index:  432 keywords:  microgravity
Word DOES NOT exists in WordNet
*****************
index:  433 keywords:  combustion
Word exists in WordNet
*****************
j:  445
initial key:  T445
j:  446
key:  T446
j:  447
key:  T447
j:  448
key:  T448
index:  434 keywords:  array


j:  590
key:  T590
j:  591
key:  T591
j:  592
key:  T592
index:  543 keywords:  equations
Word DOES NOT exists in WordNet
*****************
index:  544 keywords:  gas
Word exists in WordNet
*****************
j:  592
initial key:  T592
j:  593
key:  T593
j:  594
key:  T594
j:  595
key:  T595
index:  545 keywords:  complex (dusty) plasmas
Word DOES NOT exists in WordNet
*****************
index:  546 keywords:  microgravity research
Word DOES NOT exists in WordNet
*****************
index:  547 keywords:  low temperature plasmas
Word DOES NOT exists in WordNet
*****************
index:  548 keywords:  ion drag force
Word DOES NOT exists in WordNet
*****************
index:  549 keywords:  falling dust particles
Word DOES NOT exists in WordNet
*****************
index:  550 keywords:  void formation
Word DOES NOT exists in WordNet
*****************
index:  551 keywords:  crystal
Word exists in WordNet
*****************
j:  595
initial key:  T595
j:  596
key:  T596
j:  597
key:  T597
j:  598
ke

index:  656 keywords:  x rays: individual (maxi j1820+070)
Word DOES NOT exists in WordNet
*****************
index:  657 keywords:  black hole candidate
Word DOES NOT exists in WordNet
*****************
index:  658 keywords:  radio emission
Word exists in WordNet
*****************
j:  712
initial key:  T712
j:  713
key:  T713
j:  714
key:  T714
j:  715
key:  T715
index:  659 keywords:  compact jet
Word DOES NOT exists in WordNet
*****************
index:  660 keywords:  binaries
Word DOES NOT exists in WordNet
*****************
index:  661 keywords:  mass
Word exists in WordNet
*****************
j:  715
initial key:  T715
j:  716
key:  T716
j:  717
key:  T717
j:  718
key:  T718
index:  662 keywords:  mitsume
Word DOES NOT exists in WordNet
*****************
index:  663 keywords:  disc
Word exists in WordNet
*****************
j:  718
initial key:  T718
j:  719
key:  T719
j:  720
key:  T720
j:  721
key:  T721
index:  664 keywords:  absorption
Word exists in WordNet
*****************
j:  7

j:  805
key:  T805
index:  741 keywords:  mars conditions
Word DOES NOT exists in WordNet
*****************
index:  742 keywords:  space conditions
Word DOES NOT exists in WordNet
*****************
index:  743 keywords:  simulated space
Word DOES NOT exists in WordNet
*****************
index:  744 keywords:  black fungi
Word DOES NOT exists in WordNet
*****************
index:  745 keywords:  rapd assay
Word DOES NOT exists in WordNet
*****************
index:  746 keywords:  resistance
Word exists in WordNet
*****************
j:  805
initial key:  T805
j:  806
key:  T806
j:  807
key:  T807
j:  808
key:  T808
index:  747 keywords:  radiation
Word exists in WordNet
*****************
j:  808
initial key:  T808
j:  809
key:  T809
j:  810
key:  T810
j:  811
key:  T811
index:  748 keywords:  lichens
Word DOES NOT exists in WordNet
*****************
index:  749 keywords:  biomex
Word DOES NOT exists in WordNet
*****************
index:  750 keywords:  damage
Word exists in WordNet
*************

j:  880
initial key:  T880
j:  881
key:  T881
j:  882
key:  T882
j:  883
key:  T883
index:  840 keywords:  liquid alumina
Word DOES NOT exists in WordNet
*****************
index:  841 keywords:  density
Word exists in WordNet
*****************
j:  883
initial key:  T883
j:  884
key:  T884
j:  885
key:  T885
j:  886
key:  T886
index:  842 keywords:  temperature
Word exists in WordNet
*****************
j:  886
initial key:  T886
j:  887
key:  T887
j:  888
key:  T888
j:  889
key:  T889
index:  843 keywords:  solidification
Word exists in WordNet
*****************
j:  889
initial key:  T889
j:  890
key:  T890
j:  891
key:  T891
j:  892
key:  T892
index:  844 keywords:  microgravity
Word DOES NOT exists in WordNet
*****************
index:  845 keywords:  oscillations
Word DOES NOT exists in WordNet
*****************
index:  846 keywords:  oxides
Word DOES NOT exists in WordNet
*****************
index:  847 keywords:  al2o3
Word DOES NOT exists in WordNet
*****************
index:  848 keywor

index:  937 keywords:  gamma rays: general
Word DOES NOT exists in WordNet
*****************
index:  938 keywords:  instrumentation: detectors
Word DOES NOT exists in WordNet
*****************
index:  939 keywords:  methods: data analysis
Word DOES NOT exists in WordNet
*****************
index:  940 keywords:  fermi lat observations
Word DOES NOT exists in WordNet
*****************
index:  941 keywords:  aspergillus niger
Word DOES NOT exists in WordNet
*****************
index:  942 keywords:  international space station
Word DOES NOT exists in WordNet
*****************
index:  943 keywords:  phylogenetic analysis
Word DOES NOT exists in WordNet
*****************
index:  944 keywords:  proteomics
Word exists in WordNet
*****************
j:  958
initial key:  T958
j:  959
key:  T959
j:  960
key:  T960
j:  961
key:  T961
index:  945 keywords:  transcriptional activator xlnr
Word DOES NOT exists in WordNet
*****************
index:  946 keywords:  ionizing radiation
Word exists in WordNet


j:  1064
key:  T1064
j:  1065
key:  T1065
index:  1010 keywords:  countermeasure
Word exists in WordNet
*****************
j:  1065
initial key:  T1065
j:  1066
key:  T1066
j:  1067
key:  T1067
j:  1068
key:  T1068
index:  1011 keywords:  duration space flight
Word DOES NOT exists in WordNet
*****************
index:  1012 keywords:  spatial orientation
Word DOES NOT exists in WordNet
*****************
index:  1013 keywords:  treadmill walking
Word DOES NOT exists in WordNet
*****************
index:  1014 keywords:  recovery
Word exists in WordNet
*****************
j:  1068
initial key:  T1068
j:  1069
key:  T1069
j:  1070
key:  T1070
j:  1071
key:  T1071
index:  1015 keywords:  strategies
Word DOES NOT exists in WordNet
*****************
index:  1016 keywords:  fall
Word exists in WordNet
*****************
j:  1071
initial key:  T1071
j:  1072
key:  T1072
j:  1073
key:  T1073
j:  1074
key:  T1074
index:  1017 keywords:  functional tests
Word DOES NOT exists in WordNet
*****************


In [39]:
df_topic.head(10)

Unnamed: 0,key,type,topic_title,definition,reference,wordnet_ID
0,T1,topic,Two regimes of a single n-heptane droplet comb...,combustion,,13450206-n
1,T2,topic,combustion,a process in which a substance reacts with oxy...,,13450206-n
2,T3,cluster,oxidation,the process of oxidizing; the addition of oxyg...,,13530408-n
3,T4,topic,Two regimes of a single n-heptane droplet comb...,droplet,,13771828-n
4,T5,topic,droplet,a tiny drop,,13771828-n
5,T6,cluster,drop,a shape that is spherical and small,,13901585-n
6,T7,topic,Two regimes of a single n-heptane droplet comb...,diffusion,,13465530-n
7,T8,topic,diffusion,(physics) the process in which there is moveme...,,13465530-n
8,T9,cluster,natural_process,a process existing in or produced by nature (r...,,13518963-n
9,T10,topic,Desert cyanobacteria under space and planetary...,life_support,,03664159-n


In [40]:
df_topic[df_topic['type'] == 'cluster'].shape[0]

379

In [41]:
df_topic[df_topic['key'] == 'T264']

Unnamed: 0,key,type,topic_title,definition,reference,wordnet_ID
263,T264,topic,perception,the representation of what is perceived; basic...,,05930136-n


In [42]:
df_topic[df_topic['definition'].str.contains('^ab', regex=True)]

Unnamed: 0,key,type,topic_title,definition,reference,wordnet_ID
35,T36,topic,Photolysis of Cometary Organic Dust Analogs on...,absorption,,13423922-n
190,T191,topic,The 11 yr of low activity of the magnetar XTE ...,absorption,,13423922-n
223,T224,topic,3D Printing in Zero G Technology Demonstration...,abs,,14592028-n
720,T721,topic,"X-Ray, Optical, and Near-infrared Monitoring o...",absorption,,13423922-n


In [43]:
df_topic[df_topic['key'] == 'T204']

Unnamed: 0,key,type,topic_title,definition,reference,wordnet_ID
203,T204,topic,spaceflight,a voyage outside the Earth's atmosphere,,00313502-n


In [44]:
df_topic[df_topic['topic_title'].str.contains('device', regex=True)]

Unnamed: 0,key,type,topic_title,definition,reference,wordnet_ID
176,T177,topic,devices,an inclination or desire; used in the plural i...,,06198876-n
359,T360,cluster,device,an instrumentality invented for a particular p...,,03183080-n
782,T783,cluster,device,an instrumentality invented for a particular p...,,03183080-n


In [45]:
df_topic[df_topic['key'].isin(['T176','T177', 'T178'])]

Unnamed: 0,key,type,topic_title,definition,reference,wordnet_ID
175,T176,topic,Chemiluminescence-based biosensor for monitori...,devices,,06198876-n
176,T177,topic,devices,an inclination or desire; used in the plural i...,,06198876-n
177,T178,cluster,inclination,an attitude of mind especially one that favors...,,06196584-n


In [46]:
df_topic[df_topic['key'].isin(['T358','T359', 'T360'])]

Unnamed: 0,key,type,topic_title,definition,reference,wordnet_ID
357,T358,topic,Towards Understanding the Origin of Cosmic-Ray...,detector,,03180969-n
358,T359,topic,detector,any device that receives a signal or stimulus ...,,03180969-n
359,T360,cluster,device,an instrumentality invented for a particular p...,,03183080-n


In [47]:
df_topic[df_topic['key'].isin(['T781','T782', 'T783'])]

Unnamed: 0,key,type,topic_title,definition,reference,wordnet_ID
780,T781,topic,REMOTE ECHOGRAPHY BETWEEN A GROUND CONTROL CEN...,remote_control,,04074963-n
781,T782,topic,remote_control,a device that can be used to control a machine...,,04074963-n
782,T783,cluster,device,an instrumentality invented for a particular p...,,03183080-n


In [48]:
# df_topic.to_csv('20200122_df_topic.csv', index=None)

In [49]:
list_of_duplicated_topic_titles = \
df_topic[(df_topic.duplicated('topic_title')) 
         & (df_topic['type'] == 'cluster')
#          & (df_topic['topic_title'].str.contains('^device', regex=True))
        ]['topic_title'].values.tolist()

In [53]:
# list_of_duplicated_topic_titles

In [51]:
df_topic[(df_topic['topic_title'].str.contains('device'))]

Unnamed: 0,key,type,topic_title,definition,reference,wordnet_ID
176,T177,topic,devices,an inclination or desire; used in the plural i...,,06198876-n
359,T360,cluster,device,an instrumentality invented for a particular p...,,03183080-n
782,T783,cluster,device,an instrumentality invented for a particular p...,,03183080-n


In [52]:
df_topic[df_topic['topic_title'].str.contains('measurement')]

Unnamed: 0,key,type,topic_title,definition,reference,wordnet_ID
1019,T1020,cluster,measurement,the act or process of assigning numbers to phe...,,00996969-n
1134,T1135,cluster,measurement,the act or process of assigning numbers to phe...,,00996969-n
