In [1]:
import numpy as np
import scrapePM as spm
from bs4 import BeautifulSoup
import urllib
import pandas
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import csv
def save_pairs(filename, terms, pairs):
    # saves pair counts as csv, with header row as terms
    with open(filename, 'wb') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(terms)
        for row in pairs:
            writer.writerow(row)

In [3]:
counts = pandas.read_csv('../data/term_counts_pubmed.csv')
counts

Unnamed: 0,term,all,cog,neu,neu_methods
0,,0,290890,248883,262314
1,abductive reasoning,32,2,3,0
2,abstract analogy,2,0,0,0
3,abstract knowledge,88,23,10,4
4,acoustic coding,11,3,2,1
5,acoustic encoding,5,0,0,0
6,acoustic phonetic processing,10,0,3,6
7,acoustic processing,104,10,22,29
8,action,605111,9802,13305,8180
9,action initiation,64,14,13,4


In [4]:
def threshed_terms(counts, cat, thresh=0.001):
    # return the thresholded list of terms for further processing
    pruned_terms = [t for t in counts[counts[cat]/counts[cat][0]>=thresh][1:]['term']]
    print len(pruned_terms), ' terms remaining at threshold % of ', thresh*100.
    return pruned_terms

In [12]:
# scrape neuro method pairs in pubmed
neumet_list = threshed_terms(counts, 'neu_methods')
base_phrase = ('AND('+
                '("fmri"OR"neuroimaging")OR'+
                '("pet"OR"positron emission tomography")OR'+
                '("eeg"OR"electroencephalography")OR'+
                '("meg"OR"magnetoencephalography")OR'+
                '("ecog"OR"electrocorticography")OR'+
                '("lfp"OR"local field potential")OR'+
                '("erp"OR"event related potential")OR'+
                '("single unit"OR"single-unit"OR"single neuron")OR'+
                '("calcium imaging")'
                ')')

neumet_pairs = spm.scrape_pairs(neumet_list, base_phrase=base_phrase, fieldkey='TIAB')
save_pairs('../data/paircount_neumet.csv', neumet_list, neumet_pairs)

175  terms remaining at threshold % of  0.1
action 1 / 175
activation 2 / 175
acuity 3 / 175
adaptation 4 / 175
addiction 5 / 175
agency 6 / 175
alveolar 7 / 175
anticipation 8 / 175
anxiety 9 / 175
appetite 10 / 175
arousal 11 / 175
association 12 / 175
attachment 13 / 175
attention 14 / 175
attentional resources 15 / 175
balance 16 / 175
behavioral inhibition (cognitive) 17 / 175
belief 18 / 175
categorization 19 / 175
cognitive control 20 / 175
cognitive development 21 / 175
communication 22 / 175
competition 23 / 175
concept 24 / 175
consciousness 25 / 175
consolidation 26 / 175
context 27 / 175
coordination 28 / 175
cueing 29 / 175
decision 30 / 175
decision making 31 / 175
declarative memory 32 / 175
desire 33 / 175
detection 34 / 175
discrimination 35 / 175
distraction 36 / 175
dyslexia 37 / 175
eating 38 / 175
efficiency 39 / 175
effort 40 / 175
emotion 41 / 175
emotion regulation 42 / 175
empathy 43 / 175
encoding 44 / 175
episodic memory 45 / 175
excitation 46 / 175
expectanc

AttributeError: 'NoneType' object has no attribute 'text'

In [6]:
# scrape general neuro pairs in pubmed
neu_list = threshed_terms(counts, 'neu')
base_phrase = 'AND("neural"OR"neuroscience")'
neu_pairs = spm.scrape_pairs(neu_list, base_phrase=base_phrase, fieldkey='TIAB')
save_pairs('../data/paircount_neu.csv', neu_list, neu_pairs)

173  terms remaining at threshold % of  0.1
action 1 / 173
activation 2 / 173
acuity 3 / 173
adaptation 4 / 173
addiction 5 / 173
agency 6 / 173
alveolar 7 / 173
analogy 8 / 173
anhedonia 9 / 173
anticipation 10 / 173
anxiety 11 / 173
appetite 12 / 173
arousal 13 / 173
association 14 / 173
attachment 15 / 173
attention 16 / 173
auditory perception 17 / 173
balance 18 / 173
behavioral inhibition (cognitive) 19 / 173
belief 20 / 173
categorization 21 / 173
circadian rhythm 22 / 173
cognitive control 23 / 173
cognitive development 24 / 173
communication 25 / 173
competition 26 / 173
concept 27 / 173
consciousness 28 / 173
consolidation 29 / 173
context 30 / 173
context dependent 31 / 173
coordination 32 / 173
critical period 33 / 173
crosstalk 34 / 173
cueing 35 / 173
decision 36 / 173
decision making 37 / 173
declarative memory 38 / 173
desire 39 / 173
detection 40 / 173
discrimination 41 / 173
distraction 42 / 173
dyslexia 43 / 173
eating 44 / 173
efficiency 45 / 173
effort 46 / 173
emo

In [7]:
# scrape cog pairs in pubmed
cog_list = threshed_terms(counts, 'cog')
base_phrase = 'AND("cognitive"OR"cognition")'
cog_pairs = spm.scrape_pairs(cog_list, base_phrase=base_phrase, fieldkey='TIAB')
save_pairs('../data/paircount_cog.csv', cog_list, cog_pairs)

228  terms remaining at threshold % of  0.1
action 1 / 228
activation 2 / 228
acuity 3 / 228
adaptation 4 / 228
addiction 5 / 228
agency 6 / 228
analogy 7 / 228
anhedonia 8 / 228
anticipation 9 / 228
anxiety 10 / 228
appetite 11 / 228
arousal 12 / 228
articulation 13 / 228
association 14 / 228
attachment 15 / 228
attention 16 / 228
attentional bias 17 / 228
attentional resources 18 / 228
attitude 19 / 228
autobiographical memory 20 / 228
automaticity 21 / 228
balance 22 / 228
behavioral inhibition (cognitive) 23 / 228
belief 24 / 228
categorization 25 / 228
central executive 26 / 228
circadian rhythm 27 / 228
cognitive control 28 / 228
cognitive development 29 / 228
cognitive dissonance 30 / 228
cognitive effort 31 / 228
cognitive load 32 / 228
cognitive map 33 / 228
cognitive training 34 / 228
communication 35 / 228
competition 36 / 228
concept 37 / 228
conceptualization 38 / 228
conduct disorder 39 / 228
consciousness 40 / 228
consolidation 41 / 228
context 42 / 228
context dependent

In [9]:
counts['term'][1:]

1                abductive reasoning
2                   abstract analogy
3                 abstract knowledge
4                    acoustic coding
5                  acoustic encoding
6       acoustic phonetic processing
7                acoustic processing
8                             action
9                  action initiation
10                 action perception
11                        activation
12                  activation level
13                active maintenance
14                     active recall
15                  active retrieval
16                            acuity
17                        adaptation
18                  adaptive control
19                         addiction
20                 affect perception
21                affect recognition
22                            agency
23                     agreeableness
24                          altruism
25             altruistic motivation
26                          alveolar
27             amodal representation
2