In [11]:
import json 
import spacy 
from nltk.tokenize import sent_tokenize

spacy_model = spacy.load("en_core_web_sm")

In [3]:
path_to_dev = './classification-scripts/same-noun-modifications/SAME-NOUN-MODIFICATIONS-DEV.JSON'
with open(path_to_dev, 'r') as json_in: 
     content = json.load(json_in)

In [5]:
content[0].keys()

dict_keys(['Filename', 'All_Versions', 'Revision_Length', 'Base_Sentence', 'Revisions', 'Key', 'Source_Tokenized', 'Target_Tokenized', 'Correction', 'Source_Line_Tagged', 'Target_Line_Tagged', 'Differences', 'Entailment_Rel', 'PPDB_Matches', 'Loc_in_splits', 'Target_Line_Nr', 'Source_Line_Nr', 'Source_Context', 'Target_Context'])

In [9]:
def tokenize_sents_spacy(doc): 
    doc = spacy_model(doc)
    return [sent.text for sent in doc.sents]

In [12]:
def tokenize_sents_nltk(doc): 
    return sent_tokenize(doc)

In [10]:
tokenize_sents_spacy("This is a sentence. And this is another one, but yeah.")

['This is a sentence.', 'And this is another one,', 'but yeah.']

In [13]:
tokenize_sents_nltk("This is a sentence. And this is another one, but yeah.")

['This is a sentence.', 'And this is another one, but yeah.']

In [14]:
tokenize_sents_nltk("1) This is a sentence. 2) And this is another one, but yeah.")

['1) This is a sentence.', '2) And this is another one, but yeah.']

In [19]:
tokenize_sents_nltk("1. This is a sentence. 2. And this is another one, but yeah.")

['1.', 'This is a sentence.', '2.', 'And this is another one, but yeah.']

## Test tokenizer with data 

In [106]:
document = content[0]['Source_Context']
sent = content[0]['Source_Tokenized']

In [109]:
for c, elem in enumerate(document,0):
    print(c, '\t', elem)
    
print(sent)

0 	 ## Section::::Related wikiHows.
1 	 This is a good way to approach women in a public setting without seeming creepy. You can meet women anywhere. You may even meet your future wife standing at a bus stop.
2 	 ## Steps
3 	 1. You have to get out there and seek them out! Most intelligent and classy girls do not go door to door looking for dates.
4 	 2. Enter a public place such as school and check out the girls (Note: if you are too old to be in school, it will look creepy).
5 	 3. Seek eye contact. Maintain eye contact while trying to keep underwear and armpits dry.
6 	 4. Smile. Avoid appearing overconfident as she may find you cocky, but you mustn't be shy either. Some girls do like shy guys. But most of them want a guy who's not afraid to go out on a limb sometimes.
7 	 5. Approach with confidence; don't use cheesy pick-up lines (A pick-up line is a conversation opener with the intent of engaging an unfamiliar person for sex, romance, or dating. ...), they rarely work. A simple "

In [100]:
## Tokenize per 'sub sentence list' instead of joining (to keep markdown headers separated)
unflattened_sentences = (tokenize_sents_nltk(sent_item) for sent_item in document)

## Flatten sentences: (https://stackoverflow.com/questions/952914/how-to-make-a-flat-list-out-of-list-of-lists)
sentences = (sentence for sub_sentences in unflattened_sentences for sentence in sub_sentences)

In [101]:
import re
pattern = re.compile(r"^[0-9]+\.$")

merged_item_sents = []

sentence = next(sentences)
while sentence:
    if re.match(pattern, sentence):
        next_sentence = next(sentences)
        merged = f"{sentence} {next_sentence}"
        merged_item_sents.append(merged)
    else:
        merged_item_sents.append(sentence)
    
    try:
        sentence = next(sentences)
    except StopIteration:
        sentence = False

In [102]:
print('\n\n'.join(merged_item_sents))


## Section::::Related wikiHows.

This is a good way to approach women in a public setting without seeming creepy.

You can meet women anywhere.

You may even meet your future wife standing at a bus stop.

## Steps

1. You have to get out there and seek them out!

Most intelligent and classy girls do not go door to door looking for dates.

2. Enter a public place such as school and check out the girls (Note: if you are too old to be in school, it will look creepy).

3. Seek eye contact.

Maintain eye contact while trying to keep underwear and armpits dry.

4. Smile.

Avoid appearing overconfident as she may find you cocky, but you mustn't be shy either.

Some girls do like shy guys.

But most of them want a guy who's not afraid to go out on a limb sometimes.

5. Approach with confidence; don't use cheesy pick-up lines (A pick-up line is a conversation opener with the intent of engaging an unfamiliar person for sex, romance, or dating.

...), they rarely work.

A simple "Hi, how are you 

In [104]:
print(merged_item_sents)

['## Section::::Related wikiHows.', 'This is a good way to approach women in a public setting without seeming creepy.', 'You can meet women anywhere.', 'You may even meet your future wife standing at a bus stop.', '## Steps', '1. You have to get out there and seek them out!', 'Most intelligent and classy girls do not go door to door looking for dates.', '2. Enter a public place such as school and check out the girls (Note: if you are too old to be in school, it will look creepy).', '3. Seek eye contact.', 'Maintain eye contact while trying to keep underwear and armpits dry.', '4. Smile.', "Avoid appearing overconfident as she may find you cocky, but you mustn't be shy either.", 'Some girls do like shy guys.', "But most of them want a guy who's not afraid to go out on a limb sometimes.", "5. Approach with confidence; don't use cheesy pick-up lines (A pick-up line is a conversation opener with the intent of engaging an unfamiliar person for sex, romance, or dating.", '...), they rarely w

## Make function

In [110]:
def sentence_splitter(document): 
    ## Tokenize per 'sub sentence list' instead of joining (to keep markdown headers separated)
    unflattened_sentences = (tokenize_sents_nltk(sent_item) for sent_item in document)
    ## Flatten sentences: (https://stackoverflow.com/questions/952914/how-to-make-a-flat-list-out-of-list-of-lists)
    sentences = (sentence for sub_sentences in unflattened_sentences for sentence in sub_sentences)
    pattern = re.compile(r"^[0-9]+\.$")
    merged_item_sents = []

    sentence = next(sentences)
    while sentence:
        if re.match(pattern, sentence):
            next_sentence = next(sentences)
            merged = f"{sentence} {next_sentence}"
            merged_item_sents.append(merged)
        else:
            merged_item_sents.append(sentence)

        try:
            sentence = next(sentences)
        except StopIteration:
            sentence = False
    return merged_item_sents

In [134]:
res = sentence_splitter(content[1]['Source_Context'])
print(content[1]['Source_Tokenized'])
for c, item in enumerate(res, 0):
    print(c, '\t', item)

['If', 'you', 'approach', 'a', 'woman', 'who', 'is', 'with', 'her', 'friends', ',', 'be', 'prepared', 'to', 'impress', 'the', 'entire', 'group', '.']
0 	 * Ask a sister, mother, or female friend for advice.
1 	 They are women, after all.
2 	 They might have some advice for you.
3 	 * Make sure the conversation goes both ways.
4 	 There's nothing worse than being in a conversation where only one person is talking.
5 	 Ask her opinion, and LISTEN to what she says.
6 	 This shows that you care about what she thinks and that you're paying attention.
7 	 * Girls like to be asked questions - that's how you make the girl feel more comfortable and secure with you.
8 	 For an example, say "How was your day?
9 	 ", "What are your interests?
10 	 ", "Do you have any good friends?
11 	 ", or "Do you stay close to your Family?"
12 	 Girls love a guy that is very close to their family.
13 	 * Really important: Don't be annoying because you think they like it - treat a women as you would at a fancy p

In [133]:
res = sentence_splitter(content[1]['Target_Context'])
print(content[1]['Target_Tokenized'])
for c, item in enumerate(res, 0):
    print(c, '\t', item)

['If', 'you', 'approach', 'a', 'girl', 'who', 'is', 'with', 'her', 'friends', ',', 'be', 'prepared', 'to', 'impress', 'the', 'entire', 'group', '.']
0 	 * Ask a sister, aunt, or female friend for advice.
1 	 They are girls, after all.
2 	 They might have some advice for you.
3 	 * Make sure the conversation goes both ways.
4 	 There's nothing worse than being in a conversation where only one person is talking.
5 	 Ask her opinion, and LISTEN to what she says.
6 	 This shows that you care about what she thinks and that you're paying attention.
7 	 * Girls like to be asked questions - that's how you make the girl feel more comfortable and secure with you.
8 	 For an example, say "How was your day?
9 	 ", "What are your interests?
10 	 ", "Do you have any good friends?
11 	 ", or "Do you stay close to your Family?"
12 	 Girls love a guy that is very close to their family.
13 	 * Really important: Don't be annoying because you think they like it - treat a girl as you would at a fancy party

In [186]:
candidate = content[1]['Target_Context']
print(content[1]['Target_Tokenized'])
print("-------------------------")
bleu_scores = []
sents = []
for elem in res: 
    reference = [word_tokenize(elem)]
    candidate = content[1]['Target_Tokenized']
    score = sentence_bleu(reference, candidate)
    print(score, '\t', reference)
    bleu_scores.append(score)
    sents.append(reference)

['If', 'you', 'approach', 'a', 'girl', 'who', 'is', 'with', 'her', 'friends', ',', 'be', 'prepared', 'to', 'impress', 'the', 'entire', 'group', '.']
-------------------------
1.1484186507842885e-231 	 [['*', 'Ask', 'a', 'sister', ',', 'mother', ',', 'or', 'female', 'friend', 'for', 'advice', '.']]
1.0377133938315695e-231 	 [['They', 'are', 'women', ',', 'after', 'all', '.']]
1.0377133938315695e-231 	 [['They', 'might', 'have', 'some', 'advice', 'for', 'you', '.']]
1.0377133938315695e-231 	 [['*', 'Make', 'sure', 'the', 'conversation', 'goes', 'both', 'ways', '.']]
1.1484186507842885e-231 	 [['There', "'s", 'nothing', 'worse', 'than', 'being', 'in', 'a', 'conversation', 'where', 'only', 'one', 'person', 'is', 'talking', '.']]
1.2340561512781763e-231 	 [['Ask', 'her', 'opinion', ',', 'and', 'LISTEN', 'to', 'what', 'she', 'says', '.']]
1.0377133938315695e-231 	 [['This', 'shows', 'that', 'you', 'care', 'about', 'what', 'she', 'thinks', 'and', 'that', 'you', "'re", 'paying', 'attention', '

In [187]:
max_bleu = max(bleu_scores)
print(max_bleu)
index = bleu_scores.index(max_bleu)
print(index)
sents[index]

0.8056920633274978
15


[['*',
  'If',
  'you',
  'approach',
  'a',
  'woman',
  'who',
  'is',
  'with',
  'her',
  'friends',
  ',',
  'be',
  'prepared',
  'to',
  'impress',
  'the',
  'entire',
  'group',
  '.']]

In [184]:
context = content[1]['Target_Context']
sents = content[1]['Target_Tokenized']
print(sents)
print(context)

['If', 'you', 'approach', 'a', 'girl', 'who', 'is', 'with', 'her', 'friends', ',', 'be', 'prepared', 'to', 'impress', 'the', 'entire', 'group', '.']


In [188]:
context = content[1]['Source_Context']
sents = content[1]['Source_Tokenized']
print(sents)
print(context)

['If', 'you', 'approach', 'a', 'woman', 'who', 'is', 'with', 'her', 'friends', ',', 'be', 'prepared', 'to', 'impress', 'the', 'entire', 'group', '.']


In [216]:
def get_matching_sent(context, sent): 
    """
        Use this function to get closest match to a source_line or target_line in a paragraph. 
    """
    sentence_tokenized_document = sentence_splitter(context)
    bleu_scores = []
    sents = []
    for elem in sentence_tokenized_document: 
        reference = [word_tokenize(elem)]
        score = sentence_bleu(reference, sent)
        bleu_scores.append(score)
        sents.append(elem)
    index_of_max_bleu = bleu_scores.index(max(bleu_scores))
    matched_sent = sents[index_of_max_bleu]
    print(matched_sent)

15
* If you approach a girl who is with her friends, be prepared to impress the entire group.


In [217]:
subset = content[0:100]
for wikihow_instance in subset: 
    print(wikihow_instance['Target_Tokenized'])
    get_matching_sent(wikihow_instance['Target_Context'], wikihow_instance['Target_Tokenized'])

['Maintain', 'eye', 'contact', '.']
Maintain eye contact.
['If', 'you', 'approach', 'a', 'girl', 'who', 'is', 'with', 'her', 'friends', ',', 'be', 'prepared', 'to', 'impress', 'the', 'entire', 'group', '.']
* If you approach a girl who is with her friends, be prepared to impress the entire group.
['Weigh', 'the', 'kittens', 'once', 'per', 'day', '.']
5. Weigh the kittens once per day.
['To', 'make', 'sure', 'that', 'the', 'kittens', 'are', 'getting', 'proper', 'nutrition', 'and', 'putting', 'on', 'weight', ',', 'you', 'will', 'need', 'to', 'weigh', 'the', 'kittens', 'once', 'per', 'day', 'and', 'keep', 'a', 'record', 'of', 'their', 'weights', '.']
To make sure that the kittens are getting proper nutrition and putting on weight, you will need to weigh the kittens once per day and keep a record of their weights.
['Attempt', 'to', 'be', 'compassionate', 'and', 'give', 'them', 'the', 'benefit', 'of', 'the', 'doubt', '.']
Attempt to be compassionate and give them the benefit of the doubt.
[

Have you ever noticed while texting your friends that some things come across as sounding mean or nasty, even if they're really not?
['Tap', 'the', '``', 'Block', "''", 'option', 'at', 'the', 'top', 'of', 'the', 'screen', '.']
* Tap the "Block" option at the top of the screen.
['How', 'much', 'weight', 'is', 'considered', '“', 'heavy', '”', 'depends', 'on', 'the', 'person', ',', 'including', 'experience', 'level', 'and', 'sex', '.']
How much weight is considered “heavy” depends on the person, including experience level and sex.
['For', 'even', 'more', 'simplicity', ',', 'just', 'start', 'doing', '10', 'push-ups', 'and', '20', 'crunches', 'every', 'morning', ',', 'and', 'work', 'up', 'to', '25', 'or', 'more', 'push-ups', 'and', '50', 'or', 'more', 'crunches', '.']
For even more simplicity, just start doing 10 push-ups and 20 crunches every morning, and work up to 25 or more push-ups and 50 or more crunches.
['Give', 'children', 'the', 'time', 'and', 'space', 'to', 'try', 'something', 'n

If you encounter terrorists who target people of a particular religion, you could become a victim of terrorism.
['Open', 'the', 'Google', 'Drive', 'app', '.']
* Open the Google Drive app.
['Most', 'of', 'the', 'organizations', 'responsible', 'for', 'governance', 'of', 'snow', 'sports', 'training', 'are', 'members', 'of', 'the', 'International', 'Ski', 'Instructors', 'Association', '(', 'ISIA', ')', ',', 'the', 'worldwide', 'association', 'for', 'ski', 'instructors', ',', 'which', 'determines', 'minimum', 'standards', 'for', 'ski', 'instructor', 'certification', '.']
* Most of the organizations responsible for governance of snow sports training are members of the International Ski Instructors Association (ISIA), the worldwide association for ski instructors, which determines minimum standards for ski instructor certification.
['When', 'attempting', 'in', 'any', 'condition', ',', 'always', 'wear', 'a', 'personal', 'flotation', 'device', '.']
* When attempting in any condition, always wea

In [241]:
def get_matching_sent_context(context, sent, windows=[1,2,3,4,5]): 
    """
        Use this function to get closest match to a source_line or target_line in a paragraph. 
    """
    sentence_tokenized_document = sentence_splitter(context)
    bleu_scores = []
    sents = []
    for elem in sentence_tokenized_document: 
        reference = [word_tokenize(elem)]
        score = sentence_bleu(reference, sent)
        bleu_scores.append(score)
        sents.append(elem)
    index_of_max_bleu = bleu_scores.index(max(bleu_scores))
    matched_sent = sents[index_of_max_bleu]
    previous_sentences = []
    next_sentences = []
    for window in windows:
        # get five sentences after max bleu index 
        next_sent = sents[index_of_max_bleu+window]
        next_sentences.append(next_sent)
        previous_sent = sents[index_of_max_bleu-window]
        previous_sentences.append(previous_sent)
    previous_sentences.reverse()
    context = previous_sentences + [matched_sent] + next_sentences
    print(context)    

In [242]:
get_matching_sent_context(content[1]['Target_Context'], content[1]['Target_Tokenized'])



In [232]:
for elem in content[1]['Target_Context']: 
    print(elem)

* Ask a sister, aunt, or female friend for advice. They are girls, after all. They might have some advice for you.
* Make sure the conversation goes both ways. There's nothing worse than being in a conversation where only one person is talking. Ask her opinion, and LISTEN to what she says. This shows that you care about what she thinks and that you're paying attention.
* Girls like to be asked questions - that's how you make the girl feel more comfortable and secure with you. For an example, say "How was your day?", "What are your interests?", "Do you have any good friends?", or "Do you stay close to your Family?" Girls love a guy that is very close to their family.
* Really important: Don't be annoying because you think they like it - treat a girl as you would at a fancy party, don't try any flirty/poking her elbow tricks until you have established a proper friendship or more.
* If you approach a girl who is with her friends, be prepared to impress the entire group.
* If you get rejec

In [260]:
def get_matching_sent_context(context, sent, windows=[1,2,3,4,5]): 
    """
        Use this function to get closest match to a source_line or target_line in a paragraph. 
    """
    sentence_tokenized_document = sentence_splitter(context)
    bleu_scores = []
    sents = []
    for elem in sentence_tokenized_document: 
        reference = [word_tokenize(elem)]
        score = sentence_bleu(reference, sent)
        bleu_scores.append(score)
        sents.append(elem)
    index_of_max_bleu = bleu_scores.index(max(bleu_scores))
    matched_sent = sents[index_of_max_bleu]
    previous_sentences = []
    next_sentences = []
    for window in windows:
        # get five sentences after max bleu index 
        next_sent = sents[index_of_max_bleu+window]
        next_sentences.append(next_sent)
        previous_sent = sents[index_of_max_bleu-window]
        previous_sentences.append(previous_sent)
    previous_sentences.reverse()
    context = previous_sentences + [matched_sent] + next_sentences
    return context    

In [243]:
subset = content[0:100]
for wikihow_instance in subset: 
    print(wikihow_instance['Target_Tokenized'])
    get_matching_sent_context(wikihow_instance['Target_Context'], wikihow_instance['Target_Tokenized'])

['Maintain', 'eye', 'contact', '.']
['## Steps', '1. You have to get out there and seek them out!', 'Most intelligent and classy girls do not go door to door looking for dates.', '2. Enter a public place such as school and check out the girls (Note: if you are too old to be in school, it will look creepy).', '3. Seek eye contact.', 'Maintain eye contact.', '4. Smile.', "Avoid appearing overconfident as she may find you cocky, but you mustn't be shy either.", 'Some girls do like shy guys.', "But most of them want a guy who's not afraid to go out on a limb sometimes.", "5. Approach with confidence; don't use cheesy pick-up lines (A pick-up line is a conversation opener with the intent of engaging an unfamiliar person for sex, romance, or dating."]
['If', 'you', 'approach', 'a', 'girl', 'who', 'is', 'with', 'her', 'friends', ',', 'be', 'prepared', 'to', 'impress', 'the', 'entire', 'group', '.']
['Weigh', 'the', 'kittens', 'once', 'per', 'day', '.']
['Do not give cow’s milk to the kittens 

['If kept in a suitable environment, Fancy goldfish normally grow to 8 to 12 inches or more!', 'and the normal life expectancy of Fancy goldfish is about 12 years.', '## Steps', '1. Buy an appropriate aquarium.', 'A single adult fancy goldfish needs a 30 gallon tank, with another ten gallons per additional fancy goldfish.', 'This equates to 140 litres for the first fish, and 45 liters per additional fish.', 'The "goldfish bowl" is much too small and doesn\'t let enough oxygen get to the goldfish.', 'Plus, it will absolutely stunt the growth of your fish.', 'Leave bowls for flowers at weddings!<br><br>', '2. Obtain a good filter.', 'Whatever the total amount of gallons is, you multiply it by ten and get the answer for the gallons pumped per hour (GPH).<br><br>']
['A', 'horse', 'for', 'a', 'knight', 'can', 'go', 'well', ',', 'or', 'a', 'dog', 'for', 'any', 'time', 'zone', '.']
['have a general or king to rule the army, give him assistants, then officers and so on.', '5. Get some weapons.

['It will get a unique attraction to you.', '5. Change your style!', 'Be aware that things about you will be changing.', "Don't be frightened.", 'Accept and embrace them.Always dress smart.', 'As it may change the way colleagues are talking to you', '* The transition to puberty is full of changes.', 'Embrace them!', 'It is a rare opportunity to redefine yourself--what you like, , or as a teen can be completely different than what you preferred as a child.', 'Try making drastic changes and explore your options.', '6. Try things that only teens can do.']
['NEVER', 'ALLOW', 'THE', 'CALIPER', 'TO', 'HANG', 'ON', 'THE', 'HOSE', '.']
['NEVER', 'ALLOW', 'THE', 'CALIPER', 'TO', 'HANG', 'ON', 'THE', 'HOSE', '.']
['Pump', 'the', 'brake', 'lever', 'to', 'clamp', 'the', 'pads', 'onto', 'the', 'spacer', '.']
['careful the brake fluid does not over run the master cylinder reservoir.', '3. Use a piece of wire though the caliper mounting holes to allow it to be hung from the handle bars or similar.', 

['Scientific research shows that relaxed people think more positively and are happier.', 'Where that point of balance between rest and activity lies, must be decided for oneself.', 'Everyone needs to experiment a little to find this correct balance.', '6. Think positive.', 'Those who think positively double their chance to realize happiness.', '7. Watch only a little TV.', 'Scientific research states, „ every hour of television lowers the general quality of life by 5%“.', 'The way to positive TV viewing consists of: a) choose your programs carefully.', 'Avoid negative films.', 'b) Find the correct amount of TV.', 'Children should watch a maximum of one hour per day of television.']
['If', 'you', 'buy', 'real', 'estate', ',', 'you', 'must', 'be', 'aware', 'of', 'it', '.']
['It is unlikely that anyone will care or take notice and the general consensus among many expats is that now that the housing boom is over, the Spanish see no use for foreigners in their country.', 'If you wish to buy

['There are USDA offices located all over the country.', 'The Department’s website provides a locator that will help you find the nearest service center.', '2. Get a copy of the Certificate of Good Health of Exportation of Small Animals.', 'The title of this document may vary as issued by your country of origin.', 'In the United States, this is form 7001 issued by the USDA.', "3. Go to your veterinarian's office.", 'Have your them do a health exam and fill out a health certificate, a rabies vaccination certificate, and a certificate of origin.', 'Have all of the forms filled out in ink.', '* Consider having your <a href="Microchip%20Your%20Dog">pet micro-chipped</a> at this point.', 'It is a form of identification that mitigates the risk of your pet being lost and not found again.', 'Panama recommends, but does not require, that you have your pet micro-chipped.']
['Stick', 'to', 'red', ',', 'grays', ',', 'blacks', ',', 'and', 'light', 'colors', '.']
['Stand tall and take long strides.'

In [249]:
for wikihow_instance in subset: 
    source_context = wikihow_instance['Source_Context']
    source_line = wikihow_instance['Source_tagged']
    
    print(source_line)
    print(source_context)
    print("------------------")
    get_matching_sent_context(source_context, source_line, windows=[1,2,3,4,5])
    print('\n')

KeyError: 'Source_tagged'

In [255]:
print(content[120]['Source_Tokenized'])
for c, elem in enumerate(content[120]['Source_Context'], 0): 
    print(c, '\t', elem)

['Be', 'Like', 'a', 'New', 'Yorker']
0 	 ## Things You'll Need
1 	 * Enough money to get quality clothes, makeup, and hair supplies
2 	 * A mirror so you can see yourself while you practise how you act
3 	 * An alarm clock- you need to get up nice and early in order to do your makeup and hair
4 	 ## Related wikiHows
5 	 * <a href="Be%20Like%20a%20New%20Yorker">Be Like a New Yorker</a>
6 	 * <a href="Be%20a%20True%20Rock%20N%20Roller">Be a True Rock N Roller</a>
7 	 * <a href="Be%20a%20Greaser">Be a Greaser</a>
8 	 ## Timestamp::::2018-04-30T16:38:43Z
9 	 ## Section::::Related wikiHows.
10 	 Moe is a style in Japan. It means somebody who has small but adorable and sexy features. There's a certain way to act as well!


In [261]:
res = get_matching_sent_context(content[120]['Source_Context'], content[120]['Source_Tokenized'])
for c, elem in enumerate(res, 0):  
    print(c, elem)

0 ## Things You'll Need
1 * Enough money to get quality clothes, makeup, and hair supplies
2 * A mirror so you can see yourself while you practise how you act
3 * An alarm clock- you need to get up nice and early in order to do your makeup and hair
4 ## Related wikiHows
5 * <a href="Be%20Like%20a%20New%20Yorker">Be Like a New Yorker</a>
6 * <a href="Be%20a%20True%20Rock%20N%20Roller">Be a True Rock N Roller</a>
7 * <a href="Be%20a%20Greaser">Be a Greaser</a>
8 ## Timestamp::::2018-04-30T16:38:43Z
9 ## Section::::Related wikiHows.
10 Moe is a style in Japan.


In [280]:
def get_context(context,sent):
    sentence_tokenized_document = sentence_splitter(context)
    bleu_scores = []
    sents = []
    for elem in sentence_tokenized_document:
        reference = [word_tokenize(elem)]
        score = sentence_bleu(reference, sent)
        bleu_scores.append(score)
        sents.append(elem)
    index_of_max_bleu = bleu_scores.index(max(bleu_scores))
    matched_sent = sents[index_of_max_bleu]
    previous_sentences = []
    next_sentences = [] 
    for window in windows: 
        next_sent = sents[index_of_max_bleu+window]
        next_sentences.append(next_sent)
        previous_sent = sents[index_of_max_bleu-window]
        previous_sentences.append(previous_sent)
    previous_sentences.reverse()
    context = previous_sentences + [matched_sent] + next_sentences
    return context 

In [268]:
print(context)

["## Things You'll Need", '* Enough money to get quality clothes, makeup, and hair supplies', '* A mirror so you can see yourself while you practise how you act', '* An alarm clock- you need to get up nice and early in order to do your makeup and hair', '## Related wikiHows', '* If you approach a girl who is with her friends, be prepared to impress the entire group.', '* <a href="Be%20a%20True%20Rock%20N%20Roller">Be a True Rock N Roller</a>', '* <a href="Be%20a%20Greaser">Be a Greaser</a>', '## Timestamp::::2018-04-30T16:38:43Z', '## Section::::Related wikiHows.', 'Moe is a style in Japan.']


In [282]:
get_context(content[120]['Target_Context'], content[120]['Target_Tokenized'])

IndexError: list index out of range

In [283]:
print(content[120]['Target_Context'])

["## Things You'll Need", '* Enough money to get quality clothes, makeup, and hair supplies', '* A mirror so you can see yourself while you practise how you act', '* An alarm clock- you need to get up nice and early in order to do your makeup and hair', '## Related wikiHows', '* <a href="Act%20Like%20a%20New%20Yorker">Act Like a New Yorker</a>', '* <a href="Be%20a%20True%20Rock%20N%20Roller">Be a True Rock N Roller</a>', '* <a href="Look%20Like%20a%20Greaser">Look Like a Greaser</a>']
