# Extract contructions from Penn treebank

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tqdm
import torch
from collections import defaultdict
import random
import math
import pickle
import nltk
from nltk.corpus import treebank

%matplotlib inline
%load_ext autoreload
%autoreload 2
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

## Read PTB Data

In [2]:
penn = nltk.corpus.reader.bracket_parse.BracketParseCorpusReader("../data/PTB3", ".*\.tree")

In [3]:
len(penn.parsed_sents())

49207

In [4]:
print(penn.parsed_sents()[0])

(S
  (NP-SBJ
    (NP (NNP Pierre) (NNP Vinken))
    (, ,)
    (ADJP (NP (CD 61) (NNS years)) (JJ old))
    (, ,))
  (VP
    (MD will)
    (VP
      (VB join)
      (NP (DT the) (NN board))
      (PP-CLR (IN as) (NP (DT a) (JJ nonexecutive) (NN director)))
      (NP-TMP (NNP Nov.) (CD 29))))
  (. .))


## Find LET-ALONE and WAY constructions

In [5]:
def find_let_alone(tree):
  return "let alone" in " ".join(tree.leaves())

for tree in penn.parsed_sents():
  if find_let_alone(tree):
    print(" ".join(tree.leaves()))

Muzzling provisions , which *T* might be called * *PRO* `` blindfold laws '' as well , prevent the executive branch from *PRO* even looking at certain policy options , let alone from *PRO* recommending them to Congress .
For one thing , it *EXP* 's unlikely that many people would receive federal death sentences , let alone be executed * .
Here was *T* the state security appartus poised * to pounce on any words or acts of provocation , let alone revolution .
Those events continue * to move at a rate *RNR* , and in a direction *RNR* , which *T* leave informed commentary -- let alone policy -- far in their wake .
It was the kind of snubbing rarely seen within the Congress , let alone within the same party .


In [6]:
# Find trees containing something like (NP (PRP$ his) (NN way))
def find_way(tree):
  for st in tree.subtrees():
    if len(st) == 2 and st[0].label().startswith("PRP") and st[1].leaves() == ["way"]:
      return True
  return False

for tree in penn.parsed_sents():
  if find_way(tree):
    print(" ".join(tree.leaves()))

Indeed , analysts say that payouts have sometimes risen most sharply when prices were already on their way down from cyclical peaks *T* .
Program traders ' `` power *PRO* to create total panic is so great that they ca n't be allowed * to have their way , '' says *0* *T* Rep. Edward Markey , a Massachusetts Democrat .
*PRO* Keeping the mood *PRO* light , the two then chanted and chortled their way through some murderous polyrhythms , devised * by Mr. Douglas as an alternative to Hindemith 's dry theory - teaching techniques , and then , with Mr. Gomez , soared and improvised on the composer 's tight `` Bebop Etudes . ''
The curator , 27 - year - old Sherman Krisher of Greenwich , Conn. , had worked his way up from janitor in seven years at the museum .
*PRO* Following a problem - solving formula used * by teams throughout Federal Express , members of the Natick team monitored their morning routine , *PRO* carefully noting where and when the work group 's resources were used * effectivel