In [158]:
import nltk
nltk.download('stopwords')
nltk.download('punkt')
import urllib.request
from nltk.corpus import stopwords
import re
import numpy as np
from collections import Counter
from sklearn import preprocessing
import string

from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import MultinomialNB


[nltk_data] Downloading package stopwords to
[nltk_data]     /home/tanmoypaul/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/tanmoypaul/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [159]:
# load the data
non_clickbait_url = "http://www.cs.columbia.edu/~sarahita/CL/non_clickbait_data.txt"
clickbait_url = "http://www.cs.columbia.edu/~sarahita/CL/clickbait_data.txt"

# read url .txt file into string "data"
def get_data(url):
  data = urllib.request.urlopen(url).read().decode('utf-8')
  return data

non_clickbait_data = get_data(non_clickbait_url)
clickbait_data = get_data(clickbait_url)

In [160]:
# combine clickbait and non-clickbait data in a single list
non_clickbait_headlines = non_clickbait_data.rstrip('\n').split('\n')
clickbait_headlines = clickbait_data.rstrip('\n').split('\n')
all_headlines = non_clickbait_headlines + clickbait_headlines

In [161]:
# create a list of corresponding labels
non_cb_labels = [0] * len(non_clickbait_headlines)
cb_labels = [1] * len(clickbait_headlines)
all_labels = non_cb_labels + cb_labels

# 1. Stopwords

In [162]:
# extract features: bag of stop words
def stop_words(texts):
        bow = []        # what we are returning
        eng_stopwords = stopwords.words('english')      # all english stopwords
        for text in texts:      # for headline in headlines
                counts = []     # list of counts
                tokens = nltk.word_tokenize(text.lower())  # tokens is dict of tokenized headline
                for sw in eng_stopwords:                # go through stopwords
                        sw_count = tokens.count(sw)
                        counts.append(sw_count)         # COUNT OF ALL STOPWORD APPEARANCES IN ONE HEADLINE
                bow.append(counts)                      # 2D ARRAY OF SW COUNT FOR EACH HEADLINE
        bow_np = np.array(bow).astype(float)
        return bow_np

In [163]:
# extract features
stop_words_features = stop_words(all_headlines)

In [164]:
stop_words_features.shape

(31998, 179)

In [165]:
# convert features and labels to numpy arrays
X = stop_words_features
Y = np.array(all_labels)

# run classifier using 10-fold cross validation
# report mean accuracy 

scores = cross_val_score(MultinomialNB(), X, Y, scoring='accuracy', cv=10)
print(scores.mean())
print(scores)

0.8735535323538606
[0.88       0.8790625  0.866875   0.878125   0.8815625  0.8821875
 0.8834375  0.87125    0.8380744  0.87496093]


# 2. Syntactic

In [166]:
from nltk.tag import pos_tag
from nltk.tokenize import word_tokenize

In [167]:
pos_counts = {}

for headline in all_headlines:
    pos_headline = pos_tag(word_tokenize(headline))
    for token, tag in pos_headline:
        if tag in pos_counts:
            pos_counts[tag] += 1
        else: 
            pos_counts[tag] = 1

In [168]:
print(f'Length of pos_counts: {len(pos_counts)}')
pos_top_ten = list(sorted(pos_counts.items(), key=lambda item: item[1]))[-10:]
print(f'Ordered: {pos_top_ten}')

Length of pos_counts: 43
Ordered: [('VBZ', 7548), ('VB', 7798), ('PRP', 8294), ('CD', 11959), ('JJ', 12661), ('DT', 12876), ('NNS', 15322), ('NN', 22577), ('IN', 29113), ('NNP', 117851)]


## accuracy

In [169]:
# extract features: bag of pos tags
def pos_accuracy(texts, pos_top_ten):
        bow = []        # what we are returning
        for text in texts:      # for headline in headlines
                counts = []     # list of counts
                tokens = nltk.word_tokenize(text)  # tokens is dict of tokenized headline
                tags = pos_tag(tokens)
                tags_list = [item for t in tags for item in t]
                for pos in pos_top_ten:               
                        pos_count = tags_list.count(pos)
                        counts.append(pos_count)        
                bow.append(counts)                     
        bow_np = np.array(bow).astype(float)
        return bow_np

In [170]:
# tags = pos_tag(nltk.word_tokenize(all_headlines[0]))
# tags_list = [item for t in tags for item in t]
# print(tags_list)

In [171]:
pos_top_ten = [tag for tag, count in pos_top_ten]
print(f'only pos list: {pos_top_ten}')
pos_features = pos_accuracy(all_headlines, pos_top_ten)
pos_features.shape

only pos list: ['VBZ', 'VB', 'PRP', 'CD', 'JJ', 'DT', 'NNS', 'NN', 'IN', 'NNP']


(31998, 10)

In [172]:
# convert features and labels to numpy arrays
X = pos_features
Y = np.array(all_labels)

# run classifier using 10-fold cross validation
# report mean accuracy 

scores = cross_val_score(MultinomialNB(), X, Y, scoring='accuracy', cv=10)
print(scores.mean())
print(scores)

0.7732356888871521
[0.7765625  0.78       0.761875   0.771875   0.7784375  0.7853125
 0.77       0.76625    0.77305408 0.76899031]


# 3. Lexical

In [173]:
from nltk.util import ngrams

n = 1
unigram_count = {}
eng_stopwords = stopwords.words('english')

for headline in all_headlines:
    unigrams = list(ngrams(headline.split(), n))

    for item in list(unigrams):
        for word in item:
            if word.lower() not in eng_stopwords:
                if word not in unigram_count:
                    unigram_count[word] = 1
                else:
                    unigram_count[word] += 1

In [174]:
print(f'Length of unigram_count: {len(unigram_count)}')
unigram_top_thirty = list(sorted(unigram_count.items(), key=lambda item : item[1]))[-30:]
print(f'Ordered: {unigram_top_thirty}')

Length of unigram_count: 35578
Ordered: [('Ever', 326), ('Every', 333), ('dies', 336), ('Time', 339), ('Get', 343), ('World', 344), ('First', 363), ('18', 364), ('15', 373), ('23', 377), ('One', 391), ('Life', 402), ('Need', 414), ("Here's", 423), ('Best', 477), ('2015', 494), ('Like', 503), ('U.S.', 537), ('Times', 550), ('Actually', 579), ('19', 593), ('US', 599), ('Based', 607), ('21', 663), ('17', 687), ('Make', 789), ('Know', 804), ('People', 920), ('Things', 996), ('New', 1036)]


## accuracy

In [175]:
# extract features: bag of unigrams
def unigram_accuracy(texts, unigram_top_thirty):
        bow = []        # what we are returning
        for text in texts:      # for headline in headlines
                counts = []     # list of counts
                tokens = nltk.word_tokenize(text)  # tokens is dict of tokenized headline
                for unigram in unigram_top_thirty:               
                        unigram_count = tokens.count(unigram)
                        counts.append(unigram_count)        
                bow.append(counts)                     
        bow_np = np.array(bow).astype(float)
        return bow_np

In [176]:
unigram_top_thirty = [tag for tag, count in unigram_top_thirty]
print(f'Only unigram list: {unigram_top_thirty}')
unigram_features = unigram_accuracy(all_headlines, unigram_top_thirty)
unigram_features.shape

Only unigram list: ['Ever', 'Every', 'dies', 'Time', 'Get', 'World', 'First', '18', '15', '23', 'One', 'Life', 'Need', "Here's", 'Best', '2015', 'Like', 'U.S.', 'Times', 'Actually', '19', 'US', 'Based', '21', '17', 'Make', 'Know', 'People', 'Things', 'New']


(31998, 30)

In [177]:
# convert features and labels to numpy arrays
X = unigram_features
Y = np.array(all_labels)

# run classifier using 10-fold cross validation
# report mean accuracy 

scores = cross_val_score(MultinomialNB(), X, Y, scoring='accuracy', cv=10)
print(scores.mean())
print(scores)

0.7173213211159737
[0.739375   0.7359375  0.7346875  0.7365625  0.7428125  0.7409375
 0.7334375  0.7271875  0.54517037 0.73710535]


# 4. Punctuation

In [178]:
punctuation_counts = {}

for headline in all_headlines:
    tokenized_headline = word_tokenize(headline)
    for token in tokenized_headline:
        if token not in string.punctuation:
            continue
        if token in punctuation_counts:
            punctuation_counts[token] += 1
        else: 
            punctuation_counts[token] = 1


In [179]:
print(f'Length of punctuation_counts: {len(punctuation_counts)}')
punctuation_list = list(sorted(punctuation_counts.items(), key=lambda item: item[1]))
print(f'Ordered: {punctuation_list}')

Length of punctuation_counts: 23
Ordered: [('|', 1), ('/', 1), ('[', 1), (']', 1), ('`', 2), ('=', 3), ('+', 4), ('@', 6), ('*', 30), ('-', 33), ('!', 40), ('#', 57), ('(', 91), (')', 91), ('&', 99), ('%', 144), ('?', 162), (';', 230), ('$', 259), ("'", 602), ('.', 630), (':', 1073), (',', 4081)]


## accuracy

In [180]:
# extract features: bag of punctuation
def punctuation_accuracy(texts, punctuation_list):
        bow = []        # what we are returning
        for text in texts:      # for headline in headlines
                counts = []     # list of counts
                tokens = nltk.word_tokenize(text)  # tokens is dict of tokenized headline
                for punctuation in punctuation_list:               
                        punctuation_count = tokens.count(punctuation)
                        counts.append(punctuation_count)        
                bow.append(counts)                     
        bow_np = np.array(bow).astype(float)
        return bow_np

In [181]:
punctuation_list = [tag for tag, count in punctuation_list]
print(f'Only punctuation list: {punctuation_list}')
punctuation_features = punctuation_accuracy(all_headlines, punctuation_list)
punctuation_features.shape

Only punctuation list: ['|', '/', '[', ']', '`', '=', '+', '@', '*', '-', '!', '#', '(', ')', '&', '%', '?', ';', '$', "'", '.', ':', ',']


(31998, 23)

In [182]:
# convert features and labels to numpy arrays
X = punctuation_features
Y = np.array(all_labels)

# run classifier using 10-fold cross validation
# report mean accuracy 

scores = cross_val_score(MultinomialNB(), X, Y, scoring='accuracy', cv=10)
print(scores.mean())
print(scores)

0.5012212800875273
[0.494375  0.4859375 0.4946875 0.49      0.495625  0.4871875 0.49375
 0.4896875 0.5948734 0.4860894]


# 5. Complexity

In [183]:
total_words = 0
total_chars = 0
unique_words = []
long_words = 0

for headline in all_headlines:
    tokenized_headline = word_tokenize(headline)
    for token in tokenized_headline:
        token = token.lower()
        if token not in string.punctuation:
            total_words += 1
            total_chars += len(token)
            if len(token) >= 6:
                long_words += 1
            if token not in unique_words:
                unique_words.append(token)


### average number of characters per word    

In [184]:
print(total_chars)
avg_chars_per_word = total_chars / total_words
print(f'Average characters: {avg_chars_per_word}')

1461156
Average characters: 4.867453504292296


### #unique words/#total words

In [185]:
print(len(unique_words))
unique_to_total = len(unique_words) / total_words
print(f'Average words: {unique_to_total}')

24854
Average words: 0.08279450612780614


### number of words


In [186]:
print(total_words)

300189


### Count of “long” words - words with >= 6 letters

In [187]:
print(long_words)

104454


## accuracy

In [189]:
complexity_list = [[avg_chars_per_word, unique_to_total, total_words, long_words]] * 31998
print(f'Only complexity list: {complexity_list[0:3]}')
complexity_features = np.array(complexity_list).astype(float)
complexity_features.shape

Only complexity list: [[4.867453504292296, 0.08279450612780614, 300189, 104454], [4.867453504292296, 0.08279450612780614, 300189, 104454], [4.867453504292296, 0.08279450612780614, 300189, 104454]]


(31998, 4)

In [190]:
# convert features and labels to numpy arrays
X = complexity_features
Y = np.array(all_labels)

# run classifier using 10-fold cross validation
# report mean accuracy 

scores = cross_val_score(MultinomialNB(), X, Y, scoring='accuracy', cv=10)
print(scores.mean())
print(scores)

0.49996874023132226
[0.5       0.5       0.5       0.5       0.5       0.5       0.5
 0.5       0.4998437 0.4998437]


# 6. Slang words

In [191]:
abbreviations = {
    "4ao" : "for adults only",
    "a.m" : "before midday",
    "a3" : "anytime anywhere anyplace",
    "aamof" : "as a matter of fact",
    "acct" : "account",
    "adih" : "another day in hell",
    "afaic" : "as far as i am concerned",
    "afaict" : "as far as i can tell",
    "afaik" : "as far as i know",
    "afair" : "as far as i remember",
    "afk" : "away from keyboard",
    "approx" : "approximately",
    "asap" : "as soon as possible",
    "asl" : "age, sex, location",
    "atk" : "at the keyboard",
    "ave." : "avenue",
    "aymm" : "are you my mother",
    "ayor" : "at your own risk", 
    "b&b" : "bed and breakfast",
    "b+b" : "bed and breakfast",
    "b.c" : "before christ",
    "b2b" : "business to business",
    "b2c" : "business to customer",
    "b4" : "before",
    "b4n" : "bye for now",
    "b@u" : "back at you",
    "bae" : "before anyone else",
    "bak" : "back at keyboard",
    "bbbg" : "bye bye be good",
    "bbias" : "be back in a second",
    "bbl" : "be back later",
    "bbs" : "be back soon",
    "be4" : "before",
    "bfn" : "bye for now",
    "blvd" : "boulevard",
    "bout" : "about",
    "brb" : "be right back",
    "bros" : "brothers",
    "brt" : "be right there",
    "bsaaw" : "big smile and a wink",
    "btw" : "by the way",
    "bwl" : "bursting with laughter",
    "c/o" : "care of",
    "cet" : "central european time",
    "cf" : "compare",
    "csl" : "can not stop laughing",
    "cu" : "see you",
    "cul8r" : "see you later",
    "cv" : "curriculum vitae",
    "cwot" : "complete waste of time",
    "cya" : "see you",
    "cyt" : "see you tomorrow",
    "dae" : "does anyone else",
    "dbmib" : "do not bother me i am busy",
    "diy" : "do it yourself",
    "dm" : "direct message",
    "dwh" : "during work hours",
    "e123" : "easy as one two three",
    "eet" : "eastern european time",
    "eg" : "example",
    "embm" : "early morning business meeting",
    "encl" : "enclosed",
    "encl." : "enclosed",
    "etc" : "and so on",
    "faq" : "frequently asked questions",
    "fawc" : "for anyone who cares",
    "fb" : "facebook",
    "fc" : "fingers crossed",
    "fig" : "figure",
    "fimh" : "forever in my heart", 
    "ft." : "feet",
    "ft" : "featuring",
    "ftl" : "for the loss",
    "ftw" : "for the win",
    "fwiw" : "for what it is worth",
    "fyi" : "for your information",
    "g9" : "genius",
    "gahoy" : "get a hold of yourself",
    "gal" : "get a life",
    "gcse" : "general certificate of secondary education",
    "gfn" : "gone for now",
    "gg" : "good game",
    "gl" : "good luck",
    "glhf" : "good luck have fun",
    "gmt" : "greenwich mean time",
    "gmta" : "great minds think alike",
    "gn" : "good night",
    "g.o.a.t" : "greatest of all time",
    "goat" : "greatest of all time",
    "goi" : "get over it",
    "gps" : "global positioning system",
    "gr8" : "great",
    "gratz" : "congratulations",
    "gyal" : "girl",
    "h&c" : "hot and cold",
    "hp" : "horsepower",
    "hr" : "hour",
    "hrh" : "his royal highness",
    "ht" : "height",
    "ibrb" : "i will be right back",
    "ic" : "i see",
    "icq" : "i seek you",
    "icymi" : "in case you missed it",
    "idc" : "i do not care",
    "idgadf" : "i do not give a damn fuck",
    "idgaf" : "i do not give a fuck",
    "idk" : "i do not know",
    "ie" : "that is",
    "i.e" : "that is",
    "ifyp" : "i feel your pain",
    "IG" : "instagram",
    "iirc" : "if i remember correctly",
    "ilu" : "i love you",
    "ily" : "i love you",
    "imho" : "in my humble opinion",
    "imo" : "in my opinion",
    "imu" : "i miss you",
    "iow" : "in other words",
    "irl" : "in real life",
    "j4f" : "just for fun",
    "jic" : "just in case",
    "jk" : "just kidding",
    "jsyk" : "just so you know",
    "l8r" : "later",
    "lb" : "pound",
    "lbs" : "pounds",
    "ldr" : "long distance relationship",
    "lmao" : "laugh my ass off",
    "lmfao" : "laugh my fucking ass off",
    "lol" : "laughing out loud",
    "ltd" : "limited",
    "ltns" : "long time no see",
    "m8" : "mate",
    "mf" : "motherfucker",
    "mfs" : "motherfuckers",
    "mfw" : "my face when",
    "mofo" : "motherfucker",
    "mph" : "miles per hour",
    "mr" : "mister",
    "mrw" : "my reaction when",
    "ms" : "miss",
    "mte" : "my thoughts exactly",
    "nagi" : "not a good idea",
    "nbd" : "not big deal",
    "nfs" : "not for sale",
    "ngl" : "not going to lie",
    "nhs" : "national health service",
    "nrn" : "no reply necessary",
    "nsfl" : "not safe for life",
    "nsfw" : "not safe for work",
    "nth" : "nice to have",
    "nvr" : "never",
    "oc" : "original content",
    "og" : "original",
    "ohp" : "overhead projector",
    "oic" : "oh i see",
    "omdb" : "over my dead body",
    "omg" : "oh my god",
    "omw" : "on my way",
    "p.a" : "per annum",
    "poc" : "people of color",
    "pov" : "point of view",
    "pp" : "pages",
    "ppl" : "people",
    "prw" : "parents are watching",
    "ps" : "postscript",
    "pt" : "point",
    "ptb" : "please text back",
    "pto" : "please turn over",
    "qpsa" : "what happens",
    "ratchet" : "rude",
    "rbtl" : "read between the lines",
    "rlrt" : "real life retweet", 
    "rofl" : "rolling on the floor laughing",
    "roflol" : "rolling on the floor laughing out loud",
    "rotflmao" : "rolling on the floor laughing my ass off",
    "rt" : "retweet",
    "ruok" : "are you ok",
    "sfw" : "safe for work",
    "sk8" : "skate",
    "smh" : "shake my head",
    "sq" : "square",
    "srsly" : "seriously", 
    "ssdd" : "same stuff different day",
    "tbh" : "to be honest",
    "tbs" : "tablespooful",
    "tbsp" : "tablespooful",
    "tfw" : "that feeling when",
    "thks" : "thank you",
    "tho" : "though",
    "thx" : "thank you",
    "tia" : "thanks in advance",
    "til" : "today i learned",
    "tl;dr" : "too long i did not read",
    "tldr" : "too long i did not read",
    "tmb" : "tweet me back",
    "tntl" : "trying not to laugh",
    "ttyl" : "talk to you later",
    "u" : "you",
    "u2" : "you too",
    "u4e" : "yours for ever",
    "utc" : "coordinated universal time",
    "w/" : "with",
    "w/o" : "without",
    "w8" : "wait",
    "wassup" : "what is up",
    "wb" : "welcome back",
    "wtf" : "what the fuck",
    "wtg" : "way to go",
    "wtpa" : "where the party at",
    "wuf" : "where are you from",
    "wuzup" : "what is up",
    "wywh" : "wish you were here",
    "yd" : "yard",
    "ygtr" : "you got that right",
    "ynk" : "you never know",
    "zzz" : "sleeping bored and tired"
}

slang_words = []
for acronym in abbreviations:
    slang_words.append(acronym)

slang_counts = {}
unique_words = []

for headline in all_headlines:
    tokenized_headline = word_tokenize(headline)
    for token in tokenized_headline:
        token = token.lower()
        if token in slang_words:
            if token in slang_counts:
                slang_counts[token] += 1
            else: 
                slang_counts[token] = 1

In [192]:
print(f'Length of slang_counts: {len(slang_counts)}')
slang_top_ten = list(sorted(slang_counts.items(), key=lambda item: item[1]))[-10:]
print(f'Ordered: {slang_top_ten}')

Length of slang_counts: 37
Ordered: [('bros', 5), ('irl', 7), ('fyi', 10), ('fc', 11), ('bae', 11), ('asap', 14), ('omg', 15), ('ie', 26), ('wtf', 30), ('diy', 31)]


## accuracy

In [193]:
# extract features: bag of pos tags
def slang_accuracy(texts, slang_top_ten):
        bow = []        # what we are returning
        for text in texts:      # for headline in headlines
                counts = []     # list of counts
                tokens = nltk.word_tokenize(text)  # tokens is dict of tokenized headline
                for slang in slang_top_ten:               
                        slang_count = tokens.count(slang)
                        counts.append(slang_count)        
                bow.append(counts)                     
        bow_np = np.array(bow).astype(float)
        return bow_np

In [194]:
slang_top_ten = [tag for tag, count in slang_top_ten]
print(f'Only slang list: {slang_top_ten}')
slang_features = slang_accuracy(all_headlines, slang_top_ten)
slang_features.shape

Only slang list: ['bros', 'irl', 'fyi', 'fc', 'bae', 'asap', 'omg', 'ie', 'wtf', 'diy']


(31998, 10)

In [195]:
# convert features and labels to numpy arrays
X = slang_features
Y = np.array(all_labels)

# run classifier using 10-fold cross validation
# report mean accuracy 

scores = cross_val_score(MultinomialNB(), X, Y, scoring='accuracy', cv=10)
print(scores.mean())
print(scores)

0.5007500195373554
[0.500625   0.5009375  0.501875   0.500625   0.500625   0.5009375
 0.500625   0.500625   0.4998437  0.50078149]


# Total Accuracy

In [196]:
combined_features = np.concatenate((stop_words_features, pos_features, unigram_features, punctuation_features, complexity_features, slang_features), axis=1)
combined_features.shape

(31998, 256)

In [197]:
# convert features and labels to numpy arrays
X = combined_features
Y = np.array(all_labels)

# run classifier using 10-fold cross validation
# report mean accuracy 

scores = cross_val_score(MultinomialNB(), X, Y, scoring='accuracy', cv=10)
print(scores.mean())
print(scores)

0.9272457115504846
[0.92875    0.9296875  0.916875   0.9296875  0.929375   0.93125
 0.923125   0.9209375  0.93091591 0.9318537 ]


# Question 2: difference of punctuation use

In [199]:
nonclickbait_punctuation_counts = {}

for headline in non_clickbait_headlines:
    tokenized_headline = word_tokenize(headline)
    for token in tokenized_headline:
        if token not in string.punctuation:
            continue
        if token in nonclickbait_punctuation_counts:
            nonclickbait_punctuation_counts[token] += 1
        else: 
            nonclickbait_punctuation_counts[token] = 1

print(f'Length of nonclickbait_punctuation_counts: {len(nonclickbait_punctuation_counts)}')
nonclickbait_punctuation_list = list(sorted(nonclickbait_punctuation_counts.items(), key=lambda item: item[1]))
print(f'Ordered: {nonclickbait_punctuation_list}')

Length of nonclickbait_punctuation_counts: 17
Ordered: [('+', 1), ('=', 1), ('*', 1), ('`', 2), ('!', 8), ('-', 29), ('(', 32), (')', 32), ('&', 50), ('%', 117), ('?', 119), ('$', 202), (';', 228), ("'", 472), ('.', 547), (':', 711), (',', 3356)]


In [200]:
clickbait_punctuation_counts = {}

for headline in clickbait_headlines:
    tokenized_headline = word_tokenize(headline)
    for token in tokenized_headline:
        if token not in string.punctuation:
            continue
        if token in clickbait_punctuation_counts:
            clickbait_punctuation_counts[token] += 1
        else: 
            clickbait_punctuation_counts[token] = 1

print(f'Length of clickbait_punctuation_counts: {len(clickbait_punctuation_counts)}')
clickbait_punctuation_list = list(sorted(clickbait_punctuation_counts.items(), key=lambda item: item[1]))
print(f'Ordered: {clickbait_punctuation_list}')

Length of clickbait_punctuation_counts: 22
Ordered: [('|', 1), ('/', 1), ('[', 1), (']', 1), ('=', 2), (';', 2), ('+', 3), ('-', 4), ('@', 6), ('%', 27), ('*', 29), ('!', 32), ('?', 43), ('&', 49), ('#', 57), ('$', 57), ('(', 59), (')', 59), ('.', 83), ("'", 130), (':', 362), (',', 725)]
