<a href="https://colab.research.google.com/github/OracyC/6560-MachineLearning-ClassNotes/blob/master/NLP_exerice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Introduction to Natural Language Processing

In this workbook, at a high-level we will learn about text tokenization; text normalization such as lowercasing, stemming; part-of-speech tagging; Named entity recognition; Sentiment analysis; Topic modeling; Word embeddings





In [3]:
####PLEASE EXECUTE THESE COMMANDS BEFORE PROCEEDING####

import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [4]:
#Tokenization -- Text into word tokens; Paragraphs into sentences;
from nltk.tokenize import sent_tokenize 
  
text = "Hello everyone. Welcome to Intro to Machine Learning Applications. We are now learning important basics of NLP."
sent_tokenize(text) 



['Hello everyone.',
 'Welcome to Intro to Machine Learning Applications.',
 'We are now learning important basics of NLP.']

In [5]:
import nltk.data 
  
german_tokenizer = nltk.data.load('tokenizers/punkt/PY3/german.pickle') 
  
text = 'Wie geht es Ihnen? Mir geht es gut.'
german_tokenizer.tokenize(text) 


['Wie geht es Ihnen?', 'Mir geht es gut.']

In [6]:
from nltk.tokenize import word_tokenize 
  
text = "Hello everyone. Welcome to Intro to Machine Learning Applications. We are now learning important basics of NLP."
word_tokenize(text) 



['Hello',
 'everyone',
 '.',
 'Welcome',
 'to',
 'Intro',
 'to',
 'Machine',
 'Learning',
 'Applications',
 '.',
 'We',
 'are',
 'now',
 'learning',
 'important',
 'basics',
 'of',
 'NLP',
 '.']

In [7]:
from nltk.tokenize import TreebankWordTokenizer 
  
tokenizer = TreebankWordTokenizer() 
tokenizer.tokenize(text) 


['Hello',
 'everyone.',
 'Welcome',
 'to',
 'Intro',
 'to',
 'Machine',
 'Learning',
 'Applications.',
 'We',
 'are',
 'now',
 'learning',
 'important',
 'basics',
 'of',
 'NLP',
 '.']

###n-grams vs tokens

##### n-grams are contiguous sequences of n-items in a sentence. N can be 1, 2 or any other positive integers, although usually we do not consider very large N because those n-grams rarely appears in many different places.

##### Tokens do not have any conditions on contiguity

In [8]:
#Using pure python

import re

def generate_ngrams(text, n):
    # Convert to lowercases
    text = text.lower()
    
    # Replace all none alphanumeric characters with spaces
    text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text)
    
    # Break sentence in the token, remove empty tokens
    tokens = [token for token in text.split(" ") if token != ""]
    
    # Use the zip function to help us generate n-grams
    # Concatentate the tokens into ngrams and return
    ngrams = zip(*[tokens[i:] for i in range(n)])
    return [" ".join(ngram) for ngram in ngrams]

text = "Hello everyone. Welcome to Intro to Machine Learning Applications. We are now learning important basics of NLP."
print(text)
generate_ngrams(text, n=2)

Hello everyone. Welcome to Intro to Machine Learning Applications. We are now learning important basics of NLP.


['hello everyone',
 'everyone welcome',
 'welcome to',
 'to intro',
 'intro to',
 'to machine',
 'machine learning',
 'learning applications',
 'applications we',
 'we are',
 'are now',
 'now learning',
 'learning important',
 'important basics',
 'basics of',
 'of nlp']

In [9]:
#Using NLTK import ngrams

import re
from nltk.util import ngrams

text = text.lower()
text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text)
tokens = [token for token in text.split(" ") if token != ""]
output = list(ngrams(tokens, 3))
print(output)

[('hello', 'everyone', 'welcome'), ('everyone', 'welcome', 'to'), ('welcome', 'to', 'intro'), ('to', 'intro', 'to'), ('intro', 'to', 'machine'), ('to', 'machine', 'learning'), ('machine', 'learning', 'applications'), ('learning', 'applications', 'we'), ('applications', 'we', 'are'), ('we', 'are', 'now'), ('are', 'now', 'learning'), ('now', 'learning', 'important'), ('learning', 'important', 'basics'), ('important', 'basics', 'of'), ('basics', 'of', 'nlp')]


In [10]:
#Text Normalization

#Lowercasing
text = "Hello everyone. Welcome to Intro to Machine Learning Applications. We are now learning important basics of NLP."
lowert = text.lower()
uppert = text.upper()

print(lowert)
print(uppert)


hello everyone. welcome to intro to machine learning applications. we are now learning important basics of nlp.
HELLO EVERYONE. WELCOME TO INTRO TO MACHINE LEARNING APPLICATIONS. WE ARE NOW LEARNING IMPORTANT BASICS OF NLP.


In [11]:
#Text Normalization
#stemming
#Porter stemmer is a famous stemming approach

from nltk.stem import PorterStemmer 
from nltk.tokenize import word_tokenize 
   
ps = PorterStemmer() 
  
# choose some words to be stemmed 
words = ["hike", "hikes", "hiked", "hiking", "hikers", "hiker"] 
  
for w in words: 
    print(w, " : ", ps.stem(w)) 



hike  :  hike
hikes  :  hike
hiked  :  hike
hiking  :  hike
hikers  :  hiker
hiker  :  hiker


In [12]:
from nltk.stem import PorterStemmer 
from nltk.tokenize import word_tokenize 
import re
   
ps = PorterStemmer() 
text = "Hello everyone. Welcome to Intro to Machine Learning Applications. We are now learning important basics of NLP."
print(text)


#Tokenize and stem the words
text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text)
tokens = [token for token in text.split(" ") if token != ""]

i=0
while i<len(tokens):
  tokens[i]=ps.stem(tokens[i])
  i=i+1

#merge all the tokens to form a long text sequence 
text2 = ' '.join(tokens) 

print(text2)

Hello everyone. Welcome to Intro to Machine Learning Applications. We are now learning important basics of NLP.
hello everyon welcom to intro to machin learn applic We are now learn import basic of nlp


In [13]:
from nltk.stem.snowball import SnowballStemmer
from nltk.tokenize import word_tokenize 
import re
   
ss = SnowballStemmer("english")
text = "Hello everyone. Welcome to Intro to Machine Learning Applications. We are now learning important basics of NLP."
print(text)


#Tokenize and stem the words
text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text)
tokens = [token for token in text.split(" ") if token != ""]

i=0
while i<len(tokens):
  tokens[i]=ss.stem(tokens[i])
  i=i+1

#merge all the tokens to form a long text sequence 
text2 = ' '.join(tokens) 

print(text2)

Hello everyone. Welcome to Intro to Machine Learning Applications. We are now learning important basics of NLP.
hello everyon welcom to intro to machin learn applic we are now learn import basic of nlp


In [14]:
#Stopwords removal 

from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize 

text = "Hello everyone. Welcome to Intro to Machine Learning Applications. We are now learning important basics of NLP."

stop_words = set(stopwords.words('english')) 
word_tokens = word_tokenize(text) 
  
filtered_sentence = [w for w in word_tokens if not w in stop_words] 
  
filtered_sentence = [] 
  
for w in word_tokens: 
    if w not in stop_words: 
        filtered_sentence.append(w) 
  
print(word_tokens) 
print(filtered_sentence) 

text2 = ' '.join(filtered_sentence)

['Hello', 'everyone', '.', 'Welcome', 'to', 'Intro', 'to', 'Machine', 'Learning', 'Applications', '.', 'We', 'are', 'now', 'learning', 'important', 'basics', 'of', 'NLP', '.']
['Hello', 'everyone', '.', 'Welcome', 'Intro', 'Machine', 'Learning', 'Applications', '.', 'We', 'learning', 'important', 'basics', 'NLP', '.']


In [15]:
#Part-of-Speech tagging

import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag

text = 'GitHub is a development platform inspired by the way you work. From open source to business, you can host and review code, manage projects, and build software alongside 40 million developers.'

def preprocess(sent):
    sent = nltk.word_tokenize(sent)
    sent = nltk.pos_tag(sent)
    return sent

sent = preprocess(text)
print(sent)


[('GitHub', 'NNP'), ('is', 'VBZ'), ('a', 'DT'), ('development', 'NN'), ('platform', 'NN'), ('inspired', 'VBN'), ('by', 'IN'), ('the', 'DT'), ('way', 'NN'), ('you', 'PRP'), ('work', 'VBP'), ('.', '.'), ('From', 'IN'), ('open', 'JJ'), ('source', 'NN'), ('to', 'TO'), ('business', 'NN'), (',', ','), ('you', 'PRP'), ('can', 'MD'), ('host', 'VB'), ('and', 'CC'), ('review', 'VB'), ('code', 'NN'), (',', ','), ('manage', 'NN'), ('projects', 'NNS'), (',', ','), ('and', 'CC'), ('build', 'VB'), ('software', 'NN'), ('alongside', 'RB'), ('40', 'CD'), ('million', 'CD'), ('developers', 'NNS'), ('.', '.')]


In [16]:
#Named entity recognition

#spaCy is an NLP Framework -- easy to use and having ability to use neural networks

import en_core_web_sm
nlp = en_core_web_sm.load()

text = 'GitHub is a development platform inspired by the way you work. From open source to business, you can host and review code, manage projects, and build software alongside 40 million developers.'

doc = nlp(text)
print(doc.ents)
print([(X.text, X.label_) for X in doc.ents])

(GitHub, 40 million)
[('GitHub', 'ORG'), ('40 million', 'CARDINAL')]


In [0]:
#Sentiment analysis

In [0]:
#Topic modeling

In [0]:
#Word embeddings


#Class exercise

#### 1. Read a file from its URL 
#### 2. Extract the text and tokenize it meaningfully into words. 
#### 3. Print the entire text combined after tokenization. 
#### 4. Perform stemming using both porter and snowball stemmers. Which one works the best? Why? 
#### 5. Remove stopwords
#### 6. Identify the top-10 unigrams based on their frequency.


In [19]:

#Load the file first
!wget https://www.dropbox.com/s/o8lxi6yrezmt5em/reviews.txt


--2019-12-18 01:10:13--  https://www.dropbox.com/s/o8lxi6yrezmt5em/reviews.txt
Resolving www.dropbox.com (www.dropbox.com)... 162.125.65.1, 2620:100:6021:1::a27d:4101
Connecting to www.dropbox.com (www.dropbox.com)|162.125.65.1|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /s/raw/o8lxi6yrezmt5em/reviews.txt [following]
--2019-12-18 01:10:13--  https://www.dropbox.com/s/raw/o8lxi6yrezmt5em/reviews.txt
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://uc03d9848d9da6aec7e0ec717f14.dl.dropboxusercontent.com/cd/0/inline/AueoIqaxmzfMPZzpUAqdxkksViO2aJucFlE3jqX0XmuUA0UositTfrEHWnvp16uAe4QgTHcvFLgTO3eBsSM041RMz9AQByiRLx6mwaVbB4RYUoRHNu0D5SoknUhNULtIz-0/file# [following]
--2019-12-18 01:10:14--  https://uc03d9848d9da6aec7e0ec717f14.dl.dropboxusercontent.com/cd/0/inline/AueoIqaxmzfMPZzpUAqdxkksViO2aJucFlE3jqX0XmuUA0UositTfrEHWnvp16uAe4QgTHcvFLgTO3eBsSM041RMz9AQByiRLx6mwaVbB4RYUoRHNu

In [30]:
reviews = open('reviews.txt').read()
reviews

"\nAfter a morning of Thrift Store hunting, a friend and I were thinking of lunch, and he suggested Emil's after he'd seen Chris Sebak do a bit on it and had tried it a time or two before, and I had not. He said they had a decent Reuben, but to be prepared to step back in time.\n\nWell, seeing as how I'm kind of addicted to late 40's and early 50's, and the whole Rat Pack scene, stepping back in time is a welcomed change in da burgh...as long as it doesn't involve 1979, which I can see all around me every day.\n\nAnd yet another shot at finding a decent Reuben in da burgh...well, that's like hunting the Holy Grail. So looking under one more bush certainly wouldn't hurt.\n\nSo off we go right at lunchtime in the middle of...where exactly were we? At first I thought we were lost, driving around a handful of very rather dismal looking blocks in what looked like a neighborhood that had been blighted by the building of a highway. And then...AHA! Here it is! And yep, there it was. This littl

In [21]:
word_tokenize(reviews) 

['After',
 'a',
 'morning',
 'of',
 'Thrift',
 'Store',
 'hunting',
 ',',
 'a',
 'friend',
 'and',
 'I',
 'were',
 'thinking',
 'of',
 'lunch',
 ',',
 'and',
 'he',
 'suggested',
 'Emil',
 "'s",
 'after',
 'he',
 "'d",
 'seen',
 'Chris',
 'Sebak',
 'do',
 'a',
 'bit',
 'on',
 'it',
 'and',
 'had',
 'tried',
 'it',
 'a',
 'time',
 'or',
 'two',
 'before',
 ',',
 'and',
 'I',
 'had',
 'not',
 '.',
 'He',
 'said',
 'they',
 'had',
 'a',
 'decent',
 'Reuben',
 ',',
 'but',
 'to',
 'be',
 'prepared',
 'to',
 'step',
 'back',
 'in',
 'time',
 '.',
 'Well',
 ',',
 'seeing',
 'as',
 'how',
 'I',
 "'m",
 'kind',
 'of',
 'addicted',
 'to',
 'late',
 '40',
 "'s",
 'and',
 'early',
 '50',
 "'s",
 ',',
 'and',
 'the',
 'whole',
 'Rat',
 'Pack',
 'scene',
 ',',
 'stepping',
 'back',
 'in',
 'time',
 'is',
 'a',
 'welcomed',
 'change',
 'in',
 'da',
 'burgh',
 '...',
 'as',
 'long',
 'as',
 'it',
 'does',
 "n't",
 'involve',
 '1979',
 ',',
 'which',
 'I',
 'can',
 'see',
 'all',
 'around',
 'me',
 'e

In [22]:
#Tokenize and stem the words
reviews = re.sub(r'[^a-zA-Z0-9\s]', ' ', reviews)
tokens = [token for token in reviews.split(" ") if token != ""]

i=0
while i<len(tokens):
  tokens[i]=ps.stem(tokens[i])
  i=i+1

#merge all the tokens to form a long text sequence 
reviews2 = ' '.join(tokens) 

print(reviews2)


after a morn of thrift store hunt a friend and I were think of lunch and he suggest emil s after he d seen chri sebak do a bit on it and had tri it a time or two befor and I had not He said they had a decent reuben but to be prepar to step back in time 

well see as how I m kind of addict to late 40 s and earli 50 s and the whole rat pack scene step back in time is a welcom chang in da burgh as long as it doesn t involv 1979 which I can see all around me everi day 

and yet anoth shot at find a decent reuben in da burgh well that s like hunt the holi grail So look under one more bush certainli wouldn t hurt 

so off we go right at lunchtim in the middl of where exactli were we At first I thought we were lost drive around a hand of veri rather dismal look block in what look like a neighborhood that had been blight by the build of a highway and then aha here it is and yep there it wa thi littl unassum build with an add on entranc with what look like a veri old hand paint sign state quit

In [23]:
#Tokenize and stem the words
reviews = re.sub(r'[^a-zA-Z0-9\s]', ' ', reviews)
tokens = [token for token in reviews.split(" ") if token != ""]

i=0
while i<len(tokens):
  tokens[i]=ss.stem(tokens[i])
  i=i+1

#merge all the tokens to form a long text sequence 
reviews2 = ' '.join(tokens) 

print(reviews2)


after a morn of thrift store hunt a friend and i were think of lunch and he suggest emil s after he d seen chris sebak do a bit on it and had tri it a time or two befor and i had not he said they had a decent reuben but to be prepar to step back in time 

well see as how i m kind of addict to late 40 s and earli 50 s and the whole rat pack scene step back in time is a welcom chang in da burgh as long as it doesn t involv 1979 which i can see all around me everi day 

and yet anoth shot at find a decent reuben in da burgh well that s like hunt the holi grail so look under one more bush certain wouldn t hurt 

so off we go right at lunchtim in the middl of where exact were we at first i thought we were lost drive around a hand of veri rather dismal look block in what look like a neighborhood that had been blight by the build of a highway and then aha here it is and yep there it was this littl unassum build with an add on entranc with what look like a veri old hand paint sign state quit 

In [24]:
word_tokens = word_tokenize(reviews) 
  
filtered_sentence = [w for w in word_tokens if not w in stop_words] 
  
filtered_sentence = [] 
  
for w in word_tokens: 
    if w not in stop_words: 
        filtered_sentence.append(w) 
  
print(word_tokens) 
print(filtered_sentence) 

reviews2 = ' '.join(filtered_sentence)

['After', 'a', 'morning', 'of', 'Thrift', 'Store', 'hunting', 'a', 'friend', 'and', 'I', 'were', 'thinking', 'of', 'lunch', 'and', 'he', 'suggested', 'Emil', 's', 'after', 'he', 'd', 'seen', 'Chris', 'Sebak', 'do', 'a', 'bit', 'on', 'it', 'and', 'had', 'tried', 'it', 'a', 'time', 'or', 'two', 'before', 'and', 'I', 'had', 'not', 'He', 'said', 'they', 'had', 'a', 'decent', 'Reuben', 'but', 'to', 'be', 'prepared', 'to', 'step', 'back', 'in', 'time', 'Well', 'seeing', 'as', 'how', 'I', 'm', 'kind', 'of', 'addicted', 'to', 'late', '40', 's', 'and', 'early', '50', 's', 'and', 'the', 'whole', 'Rat', 'Pack', 'scene', 'stepping', 'back', 'in', 'time', 'is', 'a', 'welcomed', 'change', 'in', 'da', 'burgh', 'as', 'long', 'as', 'it', 'doesn', 't', 'involve', '1979', 'which', 'I', 'can', 'see', 'all', 'around', 'me', 'every', 'day', 'And', 'yet', 'another', 'shot', 'at', 'finding', 'a', 'decent', 'Reuben', 'in', 'da', 'burgh', 'well', 'that', 's', 'like', 'hunting', 'the', 'Holy', 'Grail', 'So', 'lo

In [25]:
gram = list(ngrams(tokens, 3))
print(gram)

[('\nafter', 'a', 'morn'), ('a', 'morn', 'of'), ('morn', 'of', 'thrift'), ('of', 'thrift', 'store'), ('thrift', 'store', 'hunt'), ('store', 'hunt', 'a'), ('hunt', 'a', 'friend'), ('a', 'friend', 'and'), ('friend', 'and', 'i'), ('and', 'i', 'were'), ('i', 'were', 'think'), ('were', 'think', 'of'), ('think', 'of', 'lunch'), ('of', 'lunch', 'and'), ('lunch', 'and', 'he'), ('and', 'he', 'suggest'), ('he', 'suggest', 'emil'), ('suggest', 'emil', 's'), ('emil', 's', 'after'), ('s', 'after', 'he'), ('after', 'he', 'd'), ('he', 'd', 'seen'), ('d', 'seen', 'chris'), ('seen', 'chris', 'sebak'), ('chris', 'sebak', 'do'), ('sebak', 'do', 'a'), ('do', 'a', 'bit'), ('a', 'bit', 'on'), ('bit', 'on', 'it'), ('on', 'it', 'and'), ('it', 'and', 'had'), ('and', 'had', 'tri'), ('had', 'tri', 'it'), ('tri', 'it', 'a'), ('it', 'a', 'time'), ('a', 'time', 'or'), ('time', 'or', 'two'), ('or', 'two', 'befor'), ('two', 'befor', 'and'), ('befor', 'and', 'i'), ('and', 'i', 'had'), ('i', 'had', 'not'), ('had', 'not

In [28]:
generate_ngrams(reviews, n=1)

['\nafter',
 'a',
 'morning',
 'of',
 'thrift',
 'store',
 'hunting',
 'a',
 'friend',
 'and',
 'i',
 'were',
 'thinking',
 'of',
 'lunch',
 'and',
 'he',
 'suggested',
 'emil',
 's',
 'after',
 'he',
 'd',
 'seen',
 'chris',
 'sebak',
 'do',
 'a',
 'bit',
 'on',
 'it',
 'and',
 'had',
 'tried',
 'it',
 'a',
 'time',
 'or',
 'two',
 'before',
 'and',
 'i',
 'had',
 'not',
 'he',
 'said',
 'they',
 'had',
 'a',
 'decent',
 'reuben',
 'but',
 'to',
 'be',
 'prepared',
 'to',
 'step',
 'back',
 'in',
 'time',
 '\n\nwell',
 'seeing',
 'as',
 'how',
 'i',
 'm',
 'kind',
 'of',
 'addicted',
 'to',
 'late',
 '40',
 's',
 'and',
 'early',
 '50',
 's',
 'and',
 'the',
 'whole',
 'rat',
 'pack',
 'scene',
 'stepping',
 'back',
 'in',
 'time',
 'is',
 'a',
 'welcomed',
 'change',
 'in',
 'da',
 'burgh',
 'as',
 'long',
 'as',
 'it',
 'doesn',
 't',
 'involve',
 '1979',
 'which',
 'i',
 'can',
 'see',
 'all',
 'around',
 'me',
 'every',
 'day',
 '\n\nand',
 'yet',
 'another',
 'shot',
 'at',
 'fin

In [0]:
rev = reviews.split("delimiter")

In [44]:
bigram_vectorizer = CountVectorizer(ngram_range=(1, 1))
bigram_vectorizer.fit(rev)
bigram_vectorizer.get_feature_names()

['12',
 '1950',
 '1979',
 '2nd',
 '40',
 '50',
 '99',
 'about',
 'actual',
 'add',
 'addendum',
 'addicted',
 'adjoining',
 'afford',
 'after',
 'again',
 'aha',
 'ain',
 'all',
 'also',
 'am',
 'amazing',
 'an',
 'and',
 'another',
 'any',
 'around',
 'as',
 'ask',
 'asked',
 'at',
 'ate',
 'awesome',
 'back',
 'bar',
 'batter',
 'be',
 'been',
 'before',
 'behind',
 'best',
 'big',
 'bit',
 'blighted',
 'blocks',
 'blue',
 'bread',
 'breaded',
 'brewed',
 'building',
 'bun',
 'burgers',
 'burgh',
 'burrito',
 'bush',
 'busy',
 'but',
 'by',
 'came',
 'can',
 'certainly',
 'chairs',
 'change',
 'charges',
 'cheap',
 'cheers',
 'chicken',
 'chris',
 'collar',
 'comfortable',
 'could',
 'counts',
 'crave',
 'crowd',
 'da',
 'dark',
 'day',
 'decent',
 'definitely',
 'did',
 'dino',
 'dismal',
 'dive',
 'do',
 'doesn',
 'door',
 'driving',
 'each',
 'early',
 'emil',
 'entered',
 'entrance',
 'especially',
 'every',
 'exactly',
 'excellent',
 'eyes',
 'fan',
 'fantastic',
 'feel',
 'few'

In [45]:
bigram_vectorizer.transform(rev).toarray()

array([[ 1,  1,  1,  1,  1,  1,  1,  3,  1,  1,  1,  1,  1,  1,  2,  1,
         1,  1,  1,  1,  1,  1,  2, 30,  5,  1,  2, 11,  1,  1,  5,  1,
         1,  3,  2,  1,  2,  2,  2,  1,  1,  2,  1,  1,  1,  1,  1,  1,
         1,  2,  2,  2,  5,  1,  1,  1,  7,  2,  2,  3,  1,  1,  1,  1,
         1,  1,  1,  1,  1,  1,  5,  1,  1,  1,  5,  1,  3,  3,  2,  2,
         1,  1,  1,  2,  1,  1,  1,  1,  1,  3,  1,  1,  1,  1,  2,  2,
         1,  1,  1,  1,  1,  1,  2,  1,  1,  8,  1,  1,  1,  1,  5,  1,
         1,  1,  4,  1,  1,  1,  1,  1,  1,  1,  1,  1,  6,  1,  1,  1,
         7,  1,  1,  1,  5,  4,  1,  1,  2,  1,  1,  1,  1,  1,  1,  1,
         1,  2,  2,  1,  1,  1, 15,  1, 10, 13,  1,  1,  1,  3,  1,  4,
         1,  2,  1,  3,  2,  1,  1,  1,  2,  2,  1,  1,  1,  2,  1,  1,
         1,  1,  1,  1,  1,  4,  4,  2,  4,  2,  1,  2, 13,  1,  1,  2,
         2, 11,  4,  5,  1,  2,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
         1,  2,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  2, 