In [1]:
import os
import spacy

nlp = spacy.load("en_core_web_sm")

path = r'c:\users\jeff levy\documents\github\nlp_nyt_lecture'
#https://www.nytimes.com/2019/10/29/upshot/economic-conditions-2020-race.html

In [2]:
#read text in from file
with open(os.path.join(path, 'article.txt'), 'r', encoding='latin_1') as ifile:
    text = ifile.read()

doc = nlp(text)

FileNotFoundError: [Errno 2] No such file or directory: 'c:\\users\\jeff levy\\documents\\github\\nlp_nyt_lecture/article.txt'

In [3]:
doc

If you want to know what the economic backdrop to the 2020 elections is likely to be, pay close attention to some of this week’s headlines.

Over the next few days, we are going to get readings that tell us a lot about how the economy is holding up after a summer recession scare. If the numbers and policy announcements play out as forecasters are expecting, we’ll also see some of the contradictions of the Trump-era economy exposed.

Specifically, the economy in late 2019 appears to feature a combination of economic growth that is slowing but not falling into a recession, a booming stock market, low interest rates, a tight labor market and a weak manufacturing sector. There is always the possibility of surprises, but this could be the mix that shapes the discussion on the campaign trail in the year before Election Day.

In particular, if recent trends hold up, Democrats will have a lane to attack President Trump, pointing to weak growth in employee wages and a struggling factory sector.

In [4]:
econ_terms = ['economic', 'economics', 'economy']
econ_tokens = [t for t in doc if any([e in t.string for e in econ_terms])]
econ_tokens

[economic,
 economy,
 economy,
 economy,
 economic,
 economy,
 economic,
 economy,
 economy,
 economy,
 economy,
 economic,
 economy,
 economic,
 economy,
 economy,
 economy,
 economy]

Analysis:

In [5]:
econ_ancestors = [list(t.ancestors) for t in econ_tokens]
econ_ancestors

[[backdrop, know, want, pay],
 [holding, about, lot, tell, readings, get, going],
 [of, contradictions, of, some, see],
 [appears],
 [growth, of, combination, feature, appears],
 [of, state, about, clarity, offer],
 [growth, of, estimate, release],
 [grew],
 [grown, fallen],
 [appears],
 [protect, trying, reversing, keep, aiming],
 [growth, slowing],
 [is, is],
 [report, help],
 [endanger, rout, into, turning, ’s, case, support],
 [slumping, likely, looks],
 [of, story, remains, be],
 [helping, argue]]

In [7]:
def flatten_list(l):
    return [item for sublist in l for item in sublist]

In [8]:
econ_ancestors_type = [[(a, a.pos_) for a in ancestors] for ancestors in econ_ancestors]
econ_ancestors_type

[[(backdrop, 'NOUN'), (know, 'VERB'), (want, 'VERB'), (pay, 'VERB')],
 [(holding, 'VERB'),
  (about, 'ADP'),
  (lot, 'NOUN'),
  (tell, 'VERB'),
  (readings, 'NOUN'),
  (get, 'VERB'),
  (going, 'VERB')],
 [(of, 'ADP'),
  (contradictions, 'NOUN'),
  (of, 'ADP'),
  (some, 'DET'),
  (see, 'VERB')],
 [(appears, 'VERB')],
 [(growth, 'NOUN'),
  (of, 'ADP'),
  (combination, 'NOUN'),
  (feature, 'VERB'),
  (appears, 'VERB')],
 [(of, 'ADP'),
  (state, 'NOUN'),
  (about, 'ADP'),
  (clarity, 'NOUN'),
  (offer, 'VERB')],
 [(growth, 'NOUN'), (of, 'ADP'), (estimate, 'NOUN'), (release, 'VERB')],
 [(grew, 'VERB')],
 [(grown, 'VERB'), (fallen, 'VERB')],
 [(appears, 'VERB')],
 [(protect, 'VERB'),
  (trying, 'VERB'),
  (reversing, 'VERB'),
  (keep, 'VERB'),
  (aiming, 'VERB')],
 [(growth, 'NOUN'), (slowing, 'VERB')],
 [(is, 'VERB'), (is, 'VERB')],
 [(report, 'NOUN'), (help, 'VERB')],
 [(endanger, 'VERB'),
  (rout, 'NOUN'),
  (into, 'ADP'),
  (turning, 'VERB'),
  (’s, 'VERB'),
  (case, 'NOUN'),
  (support,

In [9]:

econ_ancestors_verbs = [[a for a in ancestors if a.pos_ == 'VERB'] for ancestors in econ_ancestors]
econ_ancestors_verbs

[[know, want, pay],
 [holding, tell, get, going],
 [see],
 [appears],
 [feature, appears],
 [offer],
 [release],
 [grew],
 [grown, fallen],
 [appears],
 [protect, trying, reversing, keep, aiming],
 [slowing],
 [is, is],
 [help],
 [endanger, turning, ’s, support],
 [slumping, looks],
 [remains, be],
 [helping, argue]]

In [10]:
pos_verbs = ['grew', 'grown']
neg_verbs = ['slumping', 'slowing']

econ_ancestors_pos_verbs = [[a for a in ancestors if a.string in pos_verbs] for ancestors in econ_ancestors]
econ_ancestors_neg_verbs = [[a for a in ancestors if a.string in neg_verbs] for ancestors in econ_ancestors]

In [11]:
econ_ancestors_pos_verbs

[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]

In [12]:
econ_ancestors_neg_verbs

[[], [], [], [], [], [], [], [], [], [], [], [slowing], [], [], [], [], [], []]

Debugging:

In [13]:
anc = econ_ancestors_verbs[7]
anc

[grew]

In [14]:
t = anc[0]
t

grew

In [15]:
t.string

'grew '

In [16]:
'grew' == 'grew '

False

Fixing:

In [20]:
pos_verbs = ['grew', 'grown']
neg_verbs = ['slumping', 'slowing']

econ_ancestors_pos_verbs = [[a for a in ancestors if a.string.strip() in pos_verbs] for ancestors in econ_ancestors]
econ_ancestors_neg_verbs = [[a for a in ancestors if a.string.strip() in neg_verbs] for ancestors in econ_ancestors]

econ_ancestors_pos_verbs = [l for l in econ_ancestors_pos_verbs if len(l) > 0]
econ_ancestors_neg_verbs = [l for l in econ_ancestors_neg_verbs if len(l) > 0]

In [21]:
econ_ancestors_pos_verbs

[[grew], [grown]]

In [22]:
econ_ancestors_neg_verbs

[[slowing], [slumping]]

Check for negation:

In [23]:
#[item for sublist in l for item in sublist]
econ_ancestors_pos_verbs = [item for sublist in econ_ancestors_pos_verbs for item in sublist]
econ_ancestors_neg_verbs = [item for sublist in econ_ancestors_neg_verbs for item in sublist]

In [24]:
pos_verb_ancestors = [list(t.ancestors) for t in econ_ancestors_pos_verbs]
neg_verb_ancestors = [list(t.ancestors) for t in econ_ancestors_neg_verbs]

In [25]:
pos_verb_ancestors

[[], [fallen]]

In [26]:
neg_verb_ancestors

[[], [likely, looks]]

Results:

In [27]:
neg_list = ['fallen', 'down', 'not', "n't"]
pos_negation = len([pva for pva in pos_verb_ancestors if any([p.string.strip() in neg_list for p in pva])])
neg_negation = len([pva for pva in neg_verb_ancestors if any([p.string.strip() in neg_list for p in pva])])

In [28]:
pos_negation

1

In [29]:
neg_negation

0

In [34]:
pos_econ = len(econ_ancestors_pos_verbs)
neg_econ = len(econ_ancestors_neg_verbs)

pos_econ = pos_econ - pos_negation + neg_negation
neg_econ = neg_econ - neg_negation + pos_negation

In [35]:
pos_econ

1

In [36]:
neg_econ

3

In [37]:
if pos_econ > neg_econ:
    print('This article seems positive about the economy.')
elif neg_econ > pos_econ:
    print('This article seems negative about the economy.')
else:
    print('This artcile seems undecided or mixed about the economy.')

This article seems negative about the economy.
