In [1]:
import pandas as pd
from collections import Counter 
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.util import ngrams

### Reading in the data

In [2]:
filings = pd.read_csv('data/nc_validation_filings.csv')
filings.head()

Unnamed: 0,ticker,accession_number
0,ACAD,0001564590-18-003526
1,ACC,0001283630-18-000024
2,AFSI,0001365555-18-000052
3,AHL,0001267395-18-000024
4,AKRX,0001628280-18-002518


In [3]:
purchase = pd.read_csv('data/share_repurchase_paragraphs.csv')
aapl = purchase[purchase['data_key_friendly_name'] == 'Amount Spent on Share Repurchases']
aapl

Unnamed: 0,ticker,accession_number,data_key_friendly_name,text,data_value,reported_data_value,reported_units,paragraph_text
4,A,0001090872-17-000018,Amount Spent on Share Repurchases,repurchased,194000000,194.000,millions,"Table of Contents 2016, upon the completion ..."
9,AAL,0000006201-18-000009,Amount Spent on Share Repurchases,repurchased,1600000000,1.600,billions,"During the year ended December 31, 2017, we ..."
14,AAP,0001158449-18-000039,Amount Spent on Share Repurchases,at an aggregate cost of,6500000,6.500,millions,The Company repurchased 57 thousand and 116 th...
17,AAPL,0000320193-17-000070,Amount Spent on Share Repurchases,Total open market common stock repurchases,18001000000,18001.000,millions,Number of Average ...
18,AAPL,0000320193-17-000070,Amount Spent on Share Repurchases,February 2017 ASR,3000000000,3000.000,millions,Number of Average ASR ...
26,AAPL,0000320193-17-000070,Amount Spent on Share Repurchases,May 2017 ASR,3000000000,3000.000,millions,Number of Average ASR ...
27,AAPL,0000320193-17-000070,Amount Spent on Share Repurchases,November 2016 ASR,6000000000,6000.000,millions,Number of Average ASR ...
28,AAPL,0000320193-17-000070,Amount Spent on Share Repurchases,August 2017 ASR,3000000000,3000.000,millions,Number of Average ASR ...
30,ABBV,0001551152-18-000014,Amount Spent on Share Repurchases,repurchased,1000000000,1.000,billions,Stock Repurchase Program The company's stock...
35,ABC,0001140859-17-000047,Amount Spent on Share Repurchases,for a total of,211100000,211.100,millions,"In November 2016, the Company's board of dire..."


In [4]:
aapl.paragraph_text

4       Table of Contents   2016, upon the completion ...
9       During the year ended  December 31, 2017,  we ...
14      The Company repurchased 57 thousand and 116 th...
17      Number of         Average                     ...
18      Number of             Average           ASR   ...
26      Number of             Average           ASR   ...
27      Number of             Average           ASR   ...
28      Number of             Average           ASR   ...
30      Stock Repurchase Program   The company's stock...
35      In November 2016, the  Company's board of dire...
43      Accenture Holdings plc Ordinary Shares and    ...
52      Cash provided by operating  activities was $2....
56      We continued our shareholder friendly actions ...
59      During the years ended December 31, 2017, 2016...
71      Shares Purchased                              ...
83      Financing cash flows consist primarily of borr...
85      Repurchases of shares under the program are ma...
90      Years 

### Exploratory Data Analysis

In [5]:
key_text = purchase["data_key_friendly_name"].unique()
key_text

array(['Share Repurchase Authorization Date',
       'Share Repurchase Authorization', 'Share Repurchase Intention',
       'Share Repurchase Count', 'Amount Spent on Share Repurchases',
       'Share Repurchase Utilization', 'Unknown Share Repurchase Data'], dtype=object)

In [23]:
text = purchase[purchase["data_key_friendly_name"] == 'Share Repurchase Authorization'].text
text

1       The 2015 share repurchase program authorizes t...
6                   share repurchase programs aggregating
13                                     repurchase program
15                         share repurchase authorization
32                                             authorized
34      the Company's board of directors authorized a ...
38                          authorization was in addition
39                              authorized the repurchase
46                                             repurchase
50      In the aggregate, the Board of Directors has a...
58          total stock repurchase authorization of up to
80              the Company's Board of Directors approved
87      our Board of Directors authorized an additiona...
94      publicly announced share repurchase authorizat...
100                   the Board of Directors authorized a
109     we announced that our Board of Directors had a...
115                   the Board of Directors authorized a
122           

### Processing the Data : Tokenizing, lowercasing and removing stop words for each of the data_key_friendly_name and picking the most common words

In [24]:
tokens =   [word_tokenize(t.lower()) for t in text]
def flatten(lst):
    return sum( ([x] if not isinstance(x, list) else flatten(x)
         for x in lst), [] )
list_tokens = flatten(tokens)
english_stops = set(stopwords.words('english'))
no_stops = [t for t in list_tokens if t not in english_stops]
repurchase_text = Counter(no_stops)  
repurchase_text.most_common(20)



[('repurchase', 260),
 ('board', 227),
 ('authorized', 202),
 ('directors', 188),
 ('program', 126),
 ('company', 93),
 ('share', 78),
 ("'s", 66),
 ('approved', 63),
 ('stock', 54),
 ('additional', 52),
 ('authorization', 50),
 (',', 42),
 ('new', 37),
 ('announced', 31),
 ('purchase', 29),
 ('common', 25),
 ('aggregate', 19),
 ('authorizing', 18),
 ('total', 16)]

In [25]:
para_text = purchase[purchase["data_key_friendly_name"] == 'Share Repurchase Authorization'].paragraph_text
para_text.head()

1     On May 28, 2015 we  announced that our board  ...
6     4. Share Repurchase Programs and Dividends   S...
13    The Company's stock repurchase program allows ...
15    Share Repurchase Program   In May 2017, the Co...
32    On February 15, 2018, AbbVie's board of direct...
Name: paragraph_text, dtype: object

In [26]:
# alpha_token =  [t for t in list_tokens if t.isalpha()]
# alpha_token

In [27]:
para_tokens =   [word_tokenize(t.lower()) for t in para_text]
def flatten(lst):
    return sum( ([x] if not isinstance(x, list) else flatten(x)
         for x in lst), [] )
list_tokens = flatten(para_tokens)
english_stops = set(stopwords.words('english'))
no_stops = [t for t in list_tokens if t not in english_stops]
repurchase_para = Counter(no_stops)  
repurchase_para.most_common(20)


[(',', 2207),
 ('$', 1063),
 ('repurchase', 1046),
 ('stock', 788),
 ('program', 759),
 ('.', 728),
 ('2017', 573),
 ('common', 548),
 ('board', 529),
 ('company', 525),
 ('million', 522),
 ('share', 511),
 ('billion', 501),
 ('shares', 491),
 ('directors', 441),
 ('authorized', 412),
 ("'s", 349),
 ('2016', 256),
 ('december', 254),
 ('repurchases', 244)]

In [28]:
intent_text = purchase[purchase["data_key_friendly_name"] == 'Share Repurchase Intention'].text
util_text = purchase[purchase["data_key_friendly_name"] == 'Share Repurchase Utilization'].text
unk_repurchase_text = purchase[purchase["data_key_friendly_name"] == 'Unknown Share Repurchase Data'].text


In [29]:
# bag_repurchase = ['Board', 'Directors','authoriz','purchase','approve','program']

In [30]:
intent_tokens =   [word_tokenize(t.lower()) for t in intent_text]
def flatten(lst):
    return sum( ([x] if not isinstance(x, list) else flatten(x)
         for x in lst), [] )
list_tokens = flatten(intent_tokens)
english_stops = set(stopwords.words('english'))
no_stops = [t for t in list_tokens if t not in english_stops]
intent_text = Counter(no_stops)  
intent_text.most_common(20)

[('repurchase', 104),
 ('2017', 76),
 (',', 72),
 ('program', 62),
 ('remaining', 58),
 ('december', 49),
 ('authorization', 44),
 ('available', 40),
 ('31', 39),
 ('share', 38),
 ('remained', 34),
 ('board', 34),
 ('authorized', 34),
 ('stock', 29),
 ('total', 26),
 ('company', 22),
 ('directors', 22),
 ('1', 21),
 ('-', 17),
 ('common', 16)]

In [31]:
util_tokens =   [word_tokenize(t.lower()) for t in util_text]
def flatten(lst):
    return sum( ([x] if not isinstance(x, list) else flatten(x)
         for x in lst), [] )
list_tokens = flatten(util_tokens)
english_stops = set(stopwords.words('english'))
no_stops = [t for t in list_tokens if t not in english_stops]
util_text = Counter(no_stops)  
util_text.most_common(20)

[('repurchased', 14),
 ('total', 12),
 (',', 12),
 ('cost', 11),
 ('aggregate', 9),
 ('2017', 8),
 ('company', 7),
 ('approximately', 5),
 ('program', 5),
 ('repurchase', 5),
 ('december', 4),
 ('purchased', 4),
 ('share', 4),
 ('shares', 3),
 ('31', 3),
 ('authorization', 3),
 ('inception', 2),
 ('amount', 2),
 ('board', 2),
 ('directors', 2)]

In [32]:
unk_tokens =   [word_tokenize(t.lower()) for t in unk_repurchase_text]
def flatten(lst):
    return sum( ([x] if not isinstance(x, list) else flatten(x)
         for x in lst), [] )
list_tokens = flatten(unk_tokens)
english_stops = set(stopwords.words('english'))
no_stops = [t for t in list_tokens if t not in english_stops]
unk_text = Counter(no_stops)  
unk_text.most_common(20)

[('repurchase', 79),
 ('board', 58),
 ('authorized', 56),
 ('directors', 53),
 ('program', 31),
 ('company', 27),
 ('shares', 25),
 ("'s", 21),
 ('authorization', 18),
 ('share', 15),
 (',', 13),
 ('approved', 12),
 ('available', 12),
 ('purchase', 12),
 ('repurchased', 11),
 ('total', 11),
 ('december', 11),
 ('2017', 9),
 ('stock', 9),
 ('additional', 8)]

### ngrams NLP analysis for data_key_friendly_name text field

In [33]:
    
def get_ngrams(text, n ):
        n_grams = ngrams(word_tokenize(t.lower()), n)
        return [ ' '.join(grams) for grams in n_grams]
ngrams_text = []    
for t in text:
        ngrams_text.append(get_ngrams(t, 3 )) 
# bigrams_text
list_ngrams = flatten(ngrams_text)
english_stops = set(stopwords.words('english'))
no_stops = [t for t in list_ngrams if t not in english_stops]
repurchase_text_tri = Counter(no_stops)  


  after removing the cwd from sys.path.


### Creating a Dictionary

In [34]:
trigram_dict = dict(repurchase_text_tri.most_common(20))
trigram_dict#.keys()

{"'s board of": 54,
 'authorized a new': 24,
 'authorized the repurchase': 42,
 'board of directors': 186,
 "company 's board": 41,
 'directors approved a': 33,
 'directors authorized a': 40,
 'directors authorized the': 36,
 'of directors approved': 46,
 'of directors authorized': 103,
 'of up to': 84,
 'our board of': 67,
 'repurchase of up': 37,
 'repurchase up to': 38,
 'share repurchase program': 53,
 'stock repurchase program': 31,
 'the board of': 47,
 "the company 's": 47,
 'the repurchase of': 56,
 'to repurchase up': 35}

In [17]:
# for filename in os.listdir(path):
#     if filename.endswith('.html'):
#        fname = os.path.join(path,filename)
#        with open(fname, 'r', encoding="utf8") as f:
#            soup = BeautifulSoup(f.read(),'html.parser')
#            soup.head.extract()
#            soup = soup.get_text().strip()
# util_text

In [18]:

util_text = purchase[purchase["data_key_friendly_name"] == 'Share Repurchase Utilization'].text

def get_ngrams(text, n ):
        n_grams = ngrams(word_tokenize(t.lower()), n)
        return [ ' '.join(grams) for grams in n_grams]
ngrams_text = []    
for t in util_text:
        ngrams_text.append(get_ngrams(t, 3 )) 
# bigrams_text
list_ngrams = flatten(ngrams_text)
english_stops = set(stopwords.words('english'))
no_stops = [t for t in list_ngrams if t not in english_stops]
util_text_tri = Counter(no_stops)  
util_text_tri.most_common(20)

  


[('a total of', 4),
 ('for a total', 4),
 (', 2017 ,', 4),
 ('2017 , the', 4),
 (', the company', 4),
 ('the company had', 3),
 ('a total cost', 3),
 ('total cost of', 3),
 ('december 31 ,', 3),
 ('31 , 2017', 3),
 ('aggregate cost of', 3),
 ('company had repurchased', 2),
 ('repurchased a total', 2),
 ('as of december', 2),
 ('of december 31', 2),
 ('the company has', 2),
 (', through december', 2),
 ('an aggregate cost', 2),
 ('board of directors', 2),
 ('share repurchase program', 2)]

In [30]:
trigram_dict_util = dict(util_text_tri.most_common(20))
trigram_dict_util.keys()
# trigram_dict_util.keys()

dict_keys(['a total of', 'for a total', ', 2017 ,', '2017 , the', ', the company', 'the company had', 'a total cost', 'total cost of', 'december 31 ,', '31 , 2017', 'aggregate cost of', 'company had repurchased', 'repurchased a total', 'as of december', 'of december 31', 'the company has', ', through december', 'an aggregate cost', 'board of directors', 'share repurchase program'])

In [20]:
intent_text = purchase[purchase["data_key_friendly_name"] == 'Share Repurchase Intention'].text
def get_ngrams(text, n ):
        n_grams = ngrams(word_tokenize(t.lower()), n)
        return [ ' '.join(grams) for grams in n_grams]
ngrams_text = []    
for t in intent_text:
        ngrams_text.append(get_ngrams(t, 3 )) 
# bigrams_text
list_ngrams = flatten(ngrams_text)
english_stops = set(stopwords.words('english'))
no_stops = [t for t in list_ngrams if t not in english_stops]
intent_text_tri = Counter(no_stops)  
intent_text_tri.most_common(20)

  after removing the cwd from sys.path.


[('31 , 2017', 32),
 ('december 31 ,', 22),
 ('board of directors', 22),
 ('share repurchase program', 16),
 ('stock repurchase program', 14),
 ("the company 's", 13),
 ('remained available for', 11),
 ('our board of', 11),
 ('repurchase up to', 10),
 ('share repurchase authorization', 10),
 ('available under the', 10),
 ('to repurchase up', 9),
 ('repurchase under the', 8),
 ('under the program', 8),
 ('1 , 2017', 8),
 ('as of december', 7),
 ('for repurchase under', 7),
 ('of directors authorized', 7),
 ('30 , 2017', 6),
 ('available for repurchase', 6)]

In [29]:
trigram_dict_intent = dict(intent_text_tri.most_common(20))
trigram_dict_intent.keys()

dict_keys(['31 , 2017', 'december 31 ,', 'board of directors', 'share repurchase program', 'stock repurchase program', "the company 's", 'remained available for', 'our board of', 'repurchase up to', 'share repurchase authorization', 'available under the', 'to repurchase up', 'repurchase under the', 'under the program', '1 , 2017', 'as of december', 'for repurchase under', 'of directors authorized', '30 , 2017', 'available for repurchase'])

In [22]:
unk_repurchase_text = purchase[purchase["data_key_friendly_name"] == 'Unknown Share Repurchase Data'].text
def get_ngrams(text, n ):
        n_grams = ngrams(word_tokenize(t.lower()), n)
        return [ ' '.join(grams) for grams in n_grams]
ngrams_text = []    
for t in unk_repurchase_text:
        ngrams_text.append(get_ngrams(t, 3 )) 
# bigrams_text
list_ngrams = flatten(ngrams_text)
english_stops = set(stopwords.words('english'))
no_stops = [t for t in list_ngrams if t not in english_stops]
unk_text_tri = Counter(no_stops)  
unk_text_tri.most_common(20)

  after removing the cwd from sys.path.


[('board of directors', 53),
 ('of directors authorized', 25),
 ('of up to', 24),
 ("'s board of", 19),
 ('authorized the repurchase', 18),
 ('the repurchase of', 18),
 ("the company 's", 16),
 ('the board of', 15),
 ('directors authorized the', 15),
 ("company 's board", 14),
 ('repurchase up to', 14),
 ('our board of', 14),
 ('repurchase of up', 12),
 ('to repurchase up', 12),
 ('share repurchase program', 12),
 ('of directors approved', 8),
 ('available for repurchase', 8),
 ('the purchase of', 8),
 ('for repurchase under', 7),
 ('repurchase under the', 7)]

In [28]:
trigram_dict_unk = dict(unk_text_tri.most_common(20))
trigram_dict_unk.keys()

dict_keys(['board of directors', 'of directors authorized', 'of up to', "'s board of", 'authorized the repurchase', 'the repurchase of', "the company 's", 'the board of', 'directors authorized the', "company 's board", 'repurchase up to', 'our board of', 'repurchase of up', 'to repurchase up', 'share repurchase program', 'of directors approved', 'available for repurchase', 'the purchase of', 'for repurchase under', 'repurchase under the'])

In [36]:
#  Share Repurchase Authorization Date'

amt_repurchase_text = purchase[purchase["data_key_friendly_name"] == 'Amount Spent on Share Repurchases'].text
def get_ngrams(text, n ):
        n_grams = ngrams(word_tokenize(t.lower()), n)
        return [ ' '.join(grams) for grams in n_grams]
ngrams_text = []    
for t in date_repurchase_text:
        ngrams_text.append(get_ngrams(t, 3 )) 
# bigrams_text
list_ngrams = flatten(ngrams_text)
english_stops = set(stopwords.words('english'))
no_stops = [t for t in list_ngrams if t not in english_stops]
amt_text_tri = Counter(no_stops)  
dict(amt_text_tri.most_common(20)).keys()


  


dict_keys(['a cost of', 'total cost of', 'at a cost', 'of common stock', 'a total cost', 'aggregate cost of', 'for a total', 'at a total', 'a total of', 'cost of shares', 'of shares repurchased', 'at an aggregate', 'an aggregate cost', 'in millions )', '( in millions', 'repurchase of common', 'december 31 ,', '31 , 2017', 'aggregate purchase price', 'cost of repurchases'])

In [41]:
count_repurchase_text = purchase[purchase["data_key_friendly_name"] == 'Share Repurchase Count'].text
def get_ngrams(text, n ):
        n_grams = ngrams(word_tokenize(t.lower()), n)
        return [ ' '.join(grams) for grams in n_grams]
ngrams_text = []    
for t in date_repurchase_text:
        ngrams_text.append(get_ngrams(t, 3 )) 
# bigrams_text
list_ngrams = flatten(ngrams_text)
english_stops = set(stopwords.words('english'))
no_stops = [t for t in list_ngrams if t not in english_stops]
count_text_tri = Counter(no_stops)  
dict(count_text_tri.most_common(20)).keys()

  after removing the cwd from sys.path.


dict_keys(['a cost of', 'total cost of', 'at a cost', 'of common stock', 'a total cost', 'aggregate cost of', 'for a total', 'at a total', 'a total of', 'cost of shares', 'of shares repurchased', 'at an aggregate', 'an aggregate cost', 'in millions )', '( in millions', 'repurchase of common', 'december 31 ,', '31 , 2017', 'aggregate purchase price', 'cost of repurchases'])

In [25]:
share_repurchase_auth = ['board of directors', 'of directors authoriz', 'of up to', 'our board of', 'the repurchase of', "'s board of", 
 'share repurchase program', "the company 's", 'the board of', 'of directors approved', 'authorized the repurchase',
 "company 's board", 'directors authorized a', 'repurchase up to', 'repurchase of up', 'directors authorized the', 
 'to repurchase up', 'directors approved a', 'stock repurchase program', 'authorized a new']

In [None]:
Unknown_Share_Repurchase_Data = ['board of director', 'of directors authoriz', 'of up to', "'s board of", 'authorized the repurchase', 
 'the repurchase of', "the company 's", 'the board of', 'directors authorized the', "company 's board",'repurchase up to', 'our board of', 'repurchase of up', 'to repurchase up', 'share repurchase program', 
 'of directors approv', 'available for repurchase', 'the purchase of', 'for repurchase under', 'repurchase under the']

In [None]:
Share_Repurchase_Intention = ['31 , 2017', 'december 31 ,', 'board of directors', 'share repurchase program', 'stock repurchase program', 
 "the company 's", 'remained available for', 'our board of', 'repurchase up to', 'share repurchase authorization',
 'available under the', 'to repurchase up', 'repurchase under the', 'under the program', '1 , 2017', 
 'as of december', 'for repurchase under', 'of directors authorized', '30 , 2017', 'available for repurchase']


In [None]:
Share_Repurchase_Utilization = ['a total of', 'for a total', ', 2017 ,', '2017 , the', ', the company', 
'the company had', 'a total cost', 'total cost of', 'december 31 ,', '31 , 2017', 'aggregate cost of', 
'company had repurchased', 'repurchased a total', 'as of december', 'of december 31', 'the company has', 
', through december', 'an aggregate cost', 'board of directors', 'share repurchase program']


In [None]:
Amount_Spent_on_Share_Repurchases = ['a cost of', 'total cost of', 'at a cost', 'of common stock', 'a total cost', 'aggregate cost of', 
 'for a total', 'at a total', 'a total of', 'cost of shares', 'of shares repurchased', 'at an aggregate', 'an aggregate cost', 
 'in millions )', '( in millions', 'repurchase of common', 'december 31 ,', '31 , 2017', 'aggregate purchase price', 'cost of repurchases']



In [None]:
Share_Repurchase_Count = ['a cost of', 'total cost of', 'at a cost', 'of common stock', 'a total cost',
'aggregate cost of', 'for a total', 'at a total', 'a total of', 'cost of shares', 'of shares repurchased', 
'at an aggregate', 'an aggregate cost', 'in millions )', '( in millions', 'repurchase of common', 'december 31 ,', 
'31 , 2017', 'aggregate purchase price', 'cost of repurchases']


In [39]:
purchase[purchase["data_key_friendly_name"] == 'Share Repurchase Count'].paragraph_text

3       Table of Contents   2016, upon the completion ...
8       During the year ended  December 31, 2017,  we ...
11      The Company repurchased 57 thousand and 116 th...
19      Number of             Average           ASR   ...
20      Number of             Average           ASR   ...
21      Number of         Average                     ...
22      Number of             Average           ASR   ...
23      Number of             Average           ASR   ...
29      In addition to the ASRs, AbbVie repurchased on...
36      In November 2016, the  Company's board of dire...
44      Accenture Holdings plc Ordinary Shares and    ...
47      The financial institutions agree to deliver sh...
54      The Company currently utilizes treasury stock ...
60      During the years ended December 31, 2017, 2016...
63      Common Stock Repurchase Program   Autodesk has...
73      Shares Purchased                              ...
82      In the year ended December 31, 2017, the Compa...
89      Years 

In [5]:
df_post_ngram = pd.read_csv('data/full_validation_set.csv')
df_post_ngram.head()

Unnamed: 0.1,Unnamed: 0,acc,list,text
0,0,0000007332-18-000016,share_repurchase_auth,"in addition, protective provisions in our amen..."
1,1,0000007332-18-000016,share_repurchase_auth,the types of incentives that may be awarded ar...
2,2,0000007332-18-000016,share_repurchase_auth,the registrant's other certifying officer and ...
3,3,0000007332-18-000016,share_repurchase_auth,the registrant's other certifying officer and ...
4,4,0000007332-18-000016,share_repurchase_auth,we also have guarantee obligations of up to $8...


In [35]:
# amt_repurchase = df_post_ngram[df_post_ngram['list'] == 'Amount_Spent_on_Share_Repurchases']

amt_repurchase = df_post_ngram[(df_post_ngram['list'] == 'Amount_Spent_on_Share_Repurchases') & df_post_ngram['text'].str.contains('repurchase')]

amt_repurchase.head()


Unnamed: 0.1,Unnamed: 0,acc,list,text
381,381,0000028917-18-000159,Amount_Spent_on_Share_Repurchases,the following is a summary of share repurchase...
422,422,0000039899-18-000011,Amount_Spent_on_Share_Repurchases,39 tegna inc. consolidated statements of cash ...
625,625,0000070318-18-000011,Amount_Spent_on_Share_Repurchases,these ownership changes include purchases of c...
626,626,0000070318-18-000011,Amount_Spent_on_Share_Repurchases,these ownership changes include purchases of c...
627,627,0000070318-18-000011,Amount_Spent_on_Share_Repurchases,99 table of contents consolidated statements o...


In [9]:
amt_repurchase.to_csv('data/amt_repurchase.csv')

In [37]:
count_repurchase = df_post_ngram[df_post_ngram['list'] == 'Share_Repurchase_Count']
count_repurchase.to_csv('data/count_repurchase.csv')


In [38]:
count_repurchase = df_post_ngram[(df_post_ngram['list'] == 'Share_Repurchase_Count') & df_post_ngram['text'].str.contains('repurchase')]
count_repurchase


Unnamed: 0.1,Unnamed: 0,acc,list,text
390,390,0000028917-18-000159,Share_Repurchase_Count,the following is a summary of share repurchase...
430,430,0000039899-18-000011,Share_Repurchase_Count,39 tegna inc. consolidated statements of cash ...
643,643,0000070318-18-000011,Share_Repurchase_Count,these ownership changes include purchases of c...
644,644,0000070318-18-000011,Share_Repurchase_Count,these ownership changes include purchases of c...
645,645,0000070318-18-000011,Share_Repurchase_Count,99 table of contents consolidated statements o...
646,646,0000070318-18-000011,Share_Repurchase_Count,100 table of contents consolidated statements ...
650,650,0000070318-18-000011,Share_Repurchase_Count,these ownership changes include purchases of c...
655,655,0000070318-18-000011,Share_Repurchase_Count,"pursuant to the share repurchase program, we p..."
783,783,0000078814-18-000004,Share_Repurchase_Count,see notes to consolidated financial statements...
786,786,0000078814-18-000004,Share_Repurchase_Count,common and treasury stock the following table ...
