In [46]:
import pandas as pd
import numpy as np
import os

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from keras.utils import np_utils
from keras.preprocessing import sequence

from sklearn.model_selection import train_test_split
from sklearn import preprocessing

import gensim
from gensim.models import Word2Vec

import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import unicodedata
import spacy

from multiprocessing import Pool

In [47]:
import matplotlib
from matplotlib import rc
import matplotlib.pyplot as plt

#font = {'family': 'Verdana', 'weight': 'normal'}
#rc('font', **font)
matplotlib.style.use('ggplot')

%matplotlib inline

pd.set_option('display.max_rows', 15)

In [48]:
spacy.util.set_data_path('/home/data/spacy/')
nlp = spacy.load('en')

# Загрузка данных

In [49]:
train_set = pd.read_csv('/home/data/share/quora/train.csv', index_col=0)
test_set = pd.read_csv('/home/data/share/quora/test.csv', index_col=0)

In [50]:
train_set.head(10)

Unnamed: 0_level_0,qid1,qid2,question1,question2,is_duplicate
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1,2,What is the step by step guide to invest in sh...,What is the step by step guide to invest in sh...,0
1,3,4,What is the story of Kohinoor (Koh-i-Noor) Dia...,What would happen if the Indian government sto...,0
2,5,6,How can I increase the speed of my internet co...,How can Internet speed be increased by hacking...,0
3,7,8,Why am I mentally very lonely? How can I solve...,Find the remainder when [math]23^{24}[/math] i...,0
4,9,10,"Which one dissolve in water quikly sugar, salt...",Which fish would survive in salt water?,0
5,11,12,Astrology: I am a Capricorn Sun Cap moon and c...,"I'm a triple Capricorn (Sun, Moon and ascendan...",1
6,13,14,Should I buy tiago?,What keeps childern active and far from phone ...,0
7,15,16,How can I be a good geologist?,What should I do to be a great geologist?,1
8,17,18,When do you use シ instead of し?,"When do you use ""&"" instead of ""and""?",0
9,19,20,Motorola (company): Can I hack my Charter Moto...,How do I hack Motorola DCX3400 for free internet?,0


## Пробелы в данных

In [51]:
test_set[test_set.isnull().any(axis=1)]

Unnamed: 0_level_0,question1,question2
test_id,Unnamed: 1_level_1,Unnamed: 2_level_1
379205,How I can learn android app development?,
817520,How real can learn android app development?,
943911,How app development?,
1046690,,How I what can learn android app development?
1270024,How I can learn app development?,
1461432,,How distinct can learn android app development?


In [52]:
train_set.fillna('empty_string', inplace=True)
test_set.fillna('empty_string', inplace=True)

# Tokenization

### Text Preparation

In [205]:
def replace_characters_in_string(s, characters_pairs=[('\n', ' '), ('\t', ' ')]):    
    for old_c, new_c in characters_pairs:
        s = s.replace(old_c, new_c)
        
    return s

def remove_extra_spaces(s):
    return ' '.join( s.split() )

#### [math]...[/math]

In [206]:
pairs_to_replace_in_plain_math = [('[math]', ''), ('[/math]', ''), ('{', ''), ('}', ''), 
                                  ('\\left', ''), ('\\right', ''), ('\\displaystyle', ''), ('\\ln', 'log'), ('\\mathrm', ''),
                                  ('_', ''), ('\\', ''), (' ', '')]

def plain_math_replace(matchobj):
    start, end = matchobj.span()
    matching_string = matchobj.string[start:end]
    matching_string = replace_characters_in_string(matching_string, characters_pairs=pairs_to_replace_in_plain_math)
    return matching_string

plain_math_regex = r'\[math\]((?!\[math\]|\[\/math\]).)*\[\/math\]'

def clean_text_from_latex(text):
    clean_text = re.sub(plain_math_regex, plain_math_replace, text)
    return clean_text

In [13]:
counter = 0
for q1, q2, is_duplicate in train_set[['question1', 'question2', 'is_duplicate']].values:
    if ('[math]' in q1 or '[math]' in q2):
        if  is_duplicate:
            print(q1)
            print(q2, '\n')
            q1_fixed = clean_text_from_latex(q1).lower()
            q2_fixed = clean_text_from_latex(q2).lower()
            print(q1_fixed)
            print(q2_fixed, '\n\n\n')
        
        counter += 1

print('{} latex questions at train_set'.format(counter))

How do I learn [math]\LaTeX[/math]?
How can I learn latex in the easiest way? 

how do i learn latex?
how can i learn latex in the easiest way? 



Let [math]S_{n} = n^{2} + 20n + 12[/math], where [math]n[/math] is a positive integer. What is the sum of all possible values of n for which [math]S_{n}[/math] is a perfect square?
Given [math]S_n = n^2 +20n + 12[/math]. What will be the sum of positive integers such that [math]S_n[/math] will be a perfect square? 

let sn=n^2+20n+12, where n is a positive integer. what is the sum of all possible values of n for which sn is a perfect square?
given sn=n^2+20n+12. what will be the sum of positive integers such that sn will be a perfect square? 



If real numbers are in [math]x[/math]-axis and complex numbers in [math]y[/math]-axis, what is in the [math]z[/math]-axis?
If the real numbers are usually placed on the [math]x[/math]-axis and imaginary numbers placed on the [math]y[/math]-axis, what would be placed on the [math]z[/math]-axis? 

if 

#### non ascii

In [207]:
pairs_to_replace_unicode = [('’', '\''), ('“', '\''),  ('”', '\''), ('…', ' ... '), ('‘', '\''), ('—', '-'), ('–', '-'),
                            ('（', ' ('), ('）', ') '),
                            ('δ', 'delta'), ('π', 'pi'), ('√', 'sqrt'), ('∫', 'int'), ('℅', '%'), ('∧', '^'), ('ı', 'I'),
                            ('ø', 'o'), ('×', 'x'), 
                            ('₹', ' rupees '), ('£', '$'), ('€', '$'), ('¥', '$'),
                            ('̇', ''), ('￼', ''), ('•', ' '), ('​', ' '), 
                            ('\ufeff', ' '), ('\x80', ' '), ('\u202a', ' '), ('\u202c', ' '), ('\u200e', ' '),
                            ('\u200f', ' '), ('\uf09e', ' '), ('\u2061', ' ')]

def unicode_normalization(text):
    text = replace_characters_in_string(text, characters_pairs=pairs_to_replace_unicode)
    text_normalized = unicodedata.normalize('NFKD', text).encode('ascii','ignore').decode('utf8')
    if len(text_normalized) == len(text):
        text = text_normalized
    return text

In [208]:
def is_have_non_ascii(s):
    return any(ord(c) >= 128 for c in s)

def get_all_non_ascii(s):
    return str([c for c in s if ord(c) >= 128])
    
counter = 0
for q1, q2, is_duplicate in train_set[['question1', 'question2', 'is_duplicate']].values:
    if is_have_non_ascii(q1) or is_have_non_ascii(q2):
        
        q1_fixed = unicode_normalization(q1)
        q2_fixed = unicode_normalization(q2)
        if is_have_non_ascii(q1_fixed) or is_have_non_ascii(q2_fixed):
            print(q1)
            print(q2, '\n')
            print(q1_fixed)
            print(q2_fixed)
            print(get_all_non_ascii(q1_fixed), get_all_non_ascii(q2_fixed), '\n\n\n')
        
        counter += 1

print('{} non ascii questions at train_set'.format(counter))

When do you use シ instead of し?
When do you use "&" instead of "and"? 

When do you use シ instead of し?
When do you use "&" instead of "and"?
['シ', 'し'] [] 



What is my GPA if I have scored 65% in my 11th std CBSE?
Why is Persian word “ذليل” meaning ‘a Muslim’ translated incorrectly on Microsoft Translator? Is it intentional or careless? 

What is my GPA if I have scored 65% in my 11th std CBSE?
Why is Persian word 'ذليل' meaning 'a Muslim' translated incorrectly on Microsoft Translator? Is it intentional or careless?
[] ['ذ', 'ل', 'ي', 'ل'] 



What does tw4t mean?
What does 분위기 mean? 

What does tw4t mean?
What does 분위기 mean?
[] ['분', '위', '기'] 



What is the difference between ج and ز?
What is the difference between == and ===? 

What is the difference between ج and ز?
What is the difference between == and ===?
['ج', 'ز'] [] 



How can I prove that (A × B) − (C × D) = (A − C) × B ∪ A × (B − D)?
How can I prove (A × B) − (C × D) = (A − C) × B ∪ A × (B − D)? 

How can I prove that

What are some of the ways to get funding for Master's in U.S?
Why No. of moles of a given substance=Mass of the substance ÷ Molar mass of the substance=No. of formula units of the substance ÷ Avogadro's constant?
[] ['÷', '÷'] 



What does "まな" mean in Japanese?
What does this mean in Japanese? 

What does "まな" mean in Japanese?
What does this mean in Japanese?
['ま', 'な'] [] 



What is the difference between 多少 and 几 in Mandarin Chinese?
What is the difference between 画 and 画画 in Mandarin Chinese? 

What is the difference between 多少 and 几 in Mandarin Chinese?
What is the difference between 画 and 画画 in Mandarin Chinese?
['多', '少', '几'] ['画', '画', '画'] 



What is the difference between ج and ز?
What is the difference between "&" and "and"? 

What is the difference between ج and ز?
What is the difference between "&" and "and"?
['ج', 'ز'] [] 



What does 'polytics' mean?
What does 精神 mean? 

What does 'polytics' mean?
What does 精神 mean?
[] ['精', '神'] 



Is "sher" (शेर) a lion or a tig


What is the number of terms in the expansion of (x - 3x² + 3x³) ²°?
In the multiplicative group C^x complex numbers, find the order of the elements A = -√2/2+√2/2i and B = -√2/2-√2/2i? 

What is the number of terms in the expansion of (x - 3x² + 3x³) ²°?
In the multiplicative group C^x complex numbers, find the order of the elements A = -sqrt2/2+sqrt2/2i and B = -sqrt2/2-sqrt2/2i?
['²', '³', '²', '°'] [] 



What does 亮瞎了 mean?
What does يعني mean? 

What does 亮瞎了 mean?
What does يعني mean?
['亮', '瞎', '了'] ['ي', 'ع', 'ن', 'ي'] 



Can my followers on Tumblr see what I post on my secondary blog?
Why does Tumblr not allow secondary accounts/blogs to Initiate Social Features (Follow, Like, Ask, Submit → to other blogs)? 

Can my followers on Tumblr see what I post on my secondary blog?
Why does Tumblr not allow secondary accounts/blogs to Initiate Social Features (Follow, Like, Ask, Submit → to other blogs)?
[] ['→'] 



What does باقر mean in Arabic?
What does this mean in Arabic? 

Wha

How do I translate '竟然' to English?
How do I translate '才' to English? 

How do I translate '竟然' to English?
How do I translate '才' to English?
['竟', '然'] ['才'] 



Can I install an Android app on an unsupported device?
زب app: Can I install a Nomao naked camera for an Android? 

Can I install an Android app on an unsupported device?
زب app: Can I install a Nomao naked camera for an Android?
[] ['ز', 'ب'] 



What is the viscosity of air at 20°c?
What is the viscosity of water? 

What is the viscosity of air at 20°c?
What is the viscosity of water?
['°'] [] 



What is the meaning of Hindi word 'Parayi'?
What is the meaning of the Hindi word "बेहराल"? 

What is the meaning of Hindi word 'Parayi'?
What is the meaning of the Hindi word "बेहराल"?
[] ['ब', 'े', 'ह', 'र', 'ा', 'ल'] 



If x∝y/z^2, y∝ab^2 and z∝b/a then what is the relation between x, a and b? The answer has been given as b^4/a^3. Is that correct?
Which are the best book for preparation of UES (UNIVERSITY ENTRY SCHEME).? 

I

[] ['م', 'ي', 'ن', 'ع', 'ا', 'ي', 'ز', 'ه', 'ا', 'م', 'ب', 'و', 'ر', 'ج', 'ر', 'و', 'م', 'ي', 'ن', 'ع', 'ا', 'ي', 'ز', 'س', 'و', 'س', 'ي', 'س', '؟'] 



Why does the Chinese characters 吝啬 mean 小气?
What does the Chinese character "⺈" mean? 

Why does the Chinese characters 吝啬 mean 小气?
What does the Chinese character "⺈" mean?
['吝', '啬', '小', '气'] ['⺈'] 



I am 21 years old and suffering from back pain and ankle pain. what can I do to improve my condition and live a healthy life?
I am a heałthy 25-year-old female, but I have been waking with a dull, throbbing pain in my ankles, calves, and sometimes in the humerus area of my arms. It is difficult to descend the stairs, but as I get moving the pain subsides. What could this be? 

I am 21 years old and suffering from back pain and ankle pain. what can I do to improve my condition and live a healthy life?
I am a heałthy 25-year-old female, but I have been waking with a dull, throbbing pain in my ankles, calves, and sometimes in the humerus

What is the value of 1+1°=?
What is the value of 1÷0? 

What is the value of 1+1°=?
What is the value of 1÷0?
['°'] ['÷'] 



I am 16 years old girl. I'm 5′3″ tall and weigh 80 kg. How do I lose weight?
I am a 13 year old girl, and I am overweight. I am 5 feet 5 inches tall, and weigh 120 pounds. How do I lose weight? 

I am 16 years old girl. I'm 5′3″ tall and weigh 80 kg. How do I lose weight?
I am a 13 year old girl, and I am overweight. I am 5 feet 5 inches tall, and weigh 120 pounds. How do I lose weight?
['′', '″'] [] 



What does "Гиркинюгенд" mean?
What does 吴刚伐桂 mean? 

What does "Гиркинюгенд" mean?
What does 吴刚伐桂 mean?
['Г', 'и', 'р', 'к', 'и', 'н', 'ю', 'г', 'е', 'н', 'д'] ['吴', '刚', '伐', '桂'] 



What is meaning of ทวิชา?
What does 逆天 mean? 

What is meaning of ทวิชา?
What does 逆天 mean?
['ท', 'ว', 'ิ', 'ช', 'า'] ['逆', '天'] 



How do I understand β = r/g as a non-economist?
How do I understand ∝ = r * β as a non-economist? 

How do I understand β = r/g as a non-economist?


How could we say "也是醉了" in English?
How do I say "有规律可循" in English? 

How could we say "也是醉了" in English?
How do I say "有规律可循" in English?
['也', '是', '醉', '了'] ['有', '规', '律', '可', '循'] 



Can anyone translate this?
Can anyone translate this for me? 弱虫僕と君と花 

Can anyone translate this?
Can anyone translate this for me? 弱虫僕と君と花
[] ['弱', '虫', '僕', 'と', '君', 'と', '花'] 



What does 枯萎 mean?
What "K" means? 

What does 枯萎 mean?
What "K" means?
['枯', '萎'] [] 



How do I translate assammee language into english?
How do I translate “吃香喝辣” into English? 

How do I translate assammee language into english?
How do I translate '吃香喝辣' into English?
[] ['吃', '香', '喝', '辣'] 



What kind of marketing can you do with $500?
What's the difference between a PhD doctorate in the west vs “Candidate of Sciences" and "Doctor of Sciences" in Russia; "Кандидат Наук", "Доктор Наук"? 

What kind of marketing can you do with $500?
What's the difference between a PhD doctorate in the west vs 'Candidate of Scie

What is the difference between 不太好 and 太不好 in Mandarin Chinese?
What is the difference between Chinese and Mandarin? 

What is the difference between 不太好 and 太不好 in Mandarin Chinese?
What is the difference between Chinese and Mandarin?
['不', '太', '好', '太', '不', '好'] [] 



What does: * mean?
What does 逆天 mean? 

What does: * mean?
What does 逆天 mean?
[] ['逆', '天'] 



What’s Ystävä in English?
What is "如何收场" in English? 

What's Ystava in English?
What is "如何收场" in English?
[] ['如', '何', '收', '场'] 



μTorrent: What motivates people to upload torrents?
How do people benefit from uploading torrents? 

μTorrent: What motivates people to upload torrents?
How do people benefit from uploading torrents?
['μ'] [] 



What is the etymology of the Sanskrit word निश्चित?
What is the etymology of Sanskrit word 'Yoga'? 

What is the etymology of the Sanskrit word निश्चित?
What is the etymology of Sanskrit word 'Yoga'?
['न', 'ि', 'श', '्', 'च', 'ि', 'त'] [] 



What does もう一度うそをついたら、あなたの首をもらう…以上。 (J

What's the English translation of 澎湃激昂的高音唱法?
['ل', 'ج', 'ن', 'ة', 'ا', 'م', 'ت', 'ح', 'ا', 'ن'] ['澎', '湃', '激', '昂', '的', '高', '音', '唱', '法'] 



How do I solve inequalities like: x + 3^x < 4 ?
How do I solve the inequality |f (x)| + 2x + 3 ≥ 0? 

How do I solve inequalities like: x + 3^x < 4 ?
How do I solve the inequality |f (x)| + 2x + 3 ≥ 0?
[] ['≥'] 



How do you throw a heli (360°) on a bike?
Would it be wise to use a folding bike for bike touring or bike camping? 

How do you throw a heli (360°) on a bike?
Would it be wise to use a folding bike for bike touring or bike camping?
['°'] [] 



What does the symbol ℞ in medical prescriptions stand for?
What does the P symbol stand for? 

What does the symbol ℞ in medical prescriptions stand for?
What does the P symbol stand for?
['℞'] [] 



What do the Muslims and Jews think of the Prophet Joseph?
Does the cold weather (<10° F) make our brain slow and memory weak? 

What do the Muslims and Jews think of the Prophet Joseph?
Does th

[] ['«', '»'] 



How do I convert a 5 V DC to 3.3 V DC using resistors (voltage divider)? I have a couple of 220 Ω, 560 Ω, 2.2 kΩ and 10 kΩ resistors.
What is the minimum setup to operate the VIPER12 DIP LED Driver? Which voltage regulator should I use to get 350mA current at 3.2 V? 7805? Or LM317? 

How do I convert a 5 V DC to 3.3 V DC using resistors (voltage divider)? I have a couple of 220 Ω, 560 Ω, 2.2 kΩ and 10 kΩ resistors.
What is the minimum setup to operate the VIPER12 DIP LED Driver? Which voltage regulator should I use to get 350mA current at 3.2 V? 7805? Or LM317?
['Ω', 'Ω', 'Ω', 'Ω'] [] 



When do the numbers x, y not satisfy the system of inequalities 3≤x≤5 and |x−y|<1?
When do the numbers x, y not satisfy inequalities 3≤x≤5 and |x−y|<1? X<3 or x>5 or |x−y|<1 x<3 or x>5 or x≥y+1 or y≥x+1 x≤3 or x≥5 or x>y+1 or y>x+1 

When do the numbers x, y not satisfy the system of inequalities 3≤x≤5 and |x−y|<1?
When do the numbers x, y not satisfy inequalities 3≤x≤5 and |x−y|<1? 

What does the Chinese word 隔壁老王 mean? 

What does the Chinese word 「气质」mean?
What does the Chinese word 隔壁老王 mean?
['「', '气', '质', '」'] ['隔', '壁', '老', '王'] 



Heinlein longevity immortality?
What is the integration of (1-x⁴) ½? 

Heinlein longevity immortality?
What is the integration of (1-x⁴) ½?
[] ['⁴', '½'] 



What is the meaning of Hindi word ‘भेदभाव’ (Bhedbhaav)?
What is the meaning of Hindi word 'sampann'? 

What is the meaning of Hindi word 'भेदभाव' (Bhedbhaav)?
What is the meaning of Hindi word 'sampann'?
['भ', 'े', 'द', 'भ', 'ा', 'व'] [] 



What does 심하잖아 mean?
What does 任せてほしい mean? 

What does 심하잖아 mean?
What does 任せてほしい mean?
['심', '하', '잖', '아'] ['任', 'せ', 'て', 'ほ', 'し', 'い'] 



What is the meaning of ทองอาถรรพ์?
What does 슬마 mean? 

What is the meaning of ทองอาถรรพ์?
What does 슬마 mean?
['ท', 'อ', 'ง', 'อ', 'า', 'ถ', 'ร', 'ร', 'พ', '์'] ['슬', '마'] 



How do you express 有目共睹?
What is the best way to express your thoughts? 

How do you express 有目共睹?
What is the best w

What's the English translation of 整序变量?
['存', '在', '即', '是', '合', '理', '的'] ['整', '序', '变', '量', '\x87', '\x8f'] 



Why is ‘தமிழ்’ spelled as ‘Tamil’ in English, Isn't this supposed to be ‘Tamizh’ considering the special "ழ"?
Which is correct, Tamil or Tamizh? 

Why is 'தமிழ்' spelled as 'Tamil' in English, Isn't this supposed to be 'Tamizh' considering the special "ழ"?
Which is correct, Tamil or Tamizh?
['த', 'ம', 'ி', 'ழ', '்', 'ழ'] [] 



What does the Chinese word "煥森" mean?
What does the Chinese word 「气质」mean? 

What does the Chinese word "煥森" mean?
What does the Chinese word 「气质」mean?
['煥', '森'] ['「', '气', '质', '」'] 



What does this symbol mean Σ?
What does the symbol ~ mean? 

What does this symbol mean Σ?
What does the symbol ~ mean?
['Σ'] [] 



What does this mean in English 窩艾尼?
What does this mean in English: 莫斯科郊外的晚上。一个静悄悄的无风之夜。? 

What does this mean in English 窩艾尼?
What does this mean in English: 莫斯科郊外的晚上。一个静悄悄的无风之夜。?
['窩', '艾', '尼'] ['莫', '斯', '科', '郊', '外', '的', '

How can I show that [math](x^2-y^2)(a^2-b^2)=t^2-z^2[/math]? 

How can I show that [math](x+t) ^2=x^2+2·t·x+t^2[/math]?
How can I show that [math](x^2-y^2)(a^2-b^2)=t^2-z^2[/math]?
['·', '·'] [] 



What is the difference between ج and ز?
What is the difference between ":=" and "::=" ? 

What is the difference between ج and ز?
What is the difference between ":=" and "::=" ?
['ج', 'ز'] [] 



What is the precise meaning of the Sanskrit word मत्परः? What is/are the root word/s used in forming the Sanskrit word मत्परः?
What is the precise meaning of the Sanskrit word परिचर्यात्मकम्? What is/are the root word/s used in forming the Sanskrit word परिचर्यात्मकम्? 

What is the precise meaning of the Sanskrit word मत्परः? What is/are the root word/s used in forming the Sanskrit word मत्परः?
What is the precise meaning of the Sanskrit word परिचर्यात्मकम्? What is/are the root word/s used in forming the Sanskrit word परिचर्यात्मकम्?
['म', 'त', '्', 'प', 'र', 'ः', 'म', 'त', '्', 'प', 'र', 'ः'] ['

What is the meaning of מכונת אייס קפה?
What is the meaning of meaning?
['מ', 'כ', 'ו', 'נ', 'ת', 'א', 'י', 'י', 'ס', 'ק', 'פ', 'ה'] [] 



What is the meaning of a hindi word बेदबी?
What is the meaning of Hindi word 'Aavashyakta' (आवश्यकता)? 

What is the meaning of a hindi word बेदबी?
What is the meaning of Hindi word 'Aavashyakta' (आवश्यकता)?
['ब', 'े', 'द', 'ब', 'ी'] ['आ', 'व', 'श', '्', 'य', 'क', 'त', 'ा'] 



Wasn't the Fedorov Avtomat the first assault rifle instead of the StG 44?
Determine whether the sequence an = (1^3) /(n^4) +(2^3) /(n^4) +⋯+(n^3) /(n^4) converges or diverges. If it converges, what is the limit? 

Wasn't the Fedorov Avtomat the first assault rifle instead of the StG 44?
Determine whether the sequence an = (1^3) /(n^4) +(2^3) /(n^4) +⋯+(n^3) /(n^4) converges or diverges. If it converges, what is the limit?
[] ['⋯'] 



How do Trump supporters feel about Trump now with all the 180′s he is doing?
What do Trump supporters think about the fact that he is going bac

What "K" means?
What's the meaning of this: 不允许持卡人进行的交易? 

What "K" means?
What's the meaning of this: 不允许持卡人进行的交易?
[] ['不', '允', '许', '持', '卡', '人', '进', '行', '的', '交', '易'] 



What is meaning of ไฟรักไฟพยาบาท?
What does ΙΧΣ mean? 

What is meaning of ไฟรักไฟพยาบาท?
What does ΙΧΣ mean?
['ไ', 'ฟ', 'ร', 'ั', 'ก', 'ไ', 'ฟ', 'พ', 'ย', 'า', 'บ', 'า', 'ท'] ['Ι', 'Χ', 'Σ'] 



What is the meaning of Bengali word 'Shudhu'?
What is the meaning of Bengali word "জ্যাড্যাপহ"? 

What is the meaning of Bengali word 'Shudhu'?
What is the meaning of Bengali word "জ্যাড্যাপহ"?
[] ['জ', '্', 'য', 'া', 'ড', '্', 'য', 'া', 'প', 'হ'] 



What is psycosis?
What is "☐"? 

What is psycosis?
What is "☐"?
[] ['☐'] 



What is the English word for फ़जेती?
What is the English word for "Kalkandu"? 

What is the English word for फ़जेती?
What is the English word for "Kalkandu"?
['फ़', 'ज', 'े', 'त', 'ी'] [] 



रिक्त स्थान भरो :-एक _____औरत _____पर बैठकर _____ गीत गा रही थी |उस _____ से दूर दूर तक _____ही दिखाई देती 

Is 5'6 tall for a 12 year old?
Is 5'5″ tall for an 15 year old? 

Is 5'6 tall for a 12 year old?
Is 5'5″ tall for an 15 year old?
[] ['″'] 



What does “毕竟” mean?
What does "月光族" mean? 

What does '毕竟' mean?
What does "月光族" mean?
['毕', '竟'] ['月', '光', '族'] 



Can I say "出してある" instead of "出している" here?
Can I say “welcome” instead of “you are welcome”? 

Can I say "出してある" instead of "出している" here?
Can I say 'welcome' instead of 'you are welcome'?
['出', 'し', 'て', 'あ', 'る', '出', 'し', 'て', 'い', 'る'] [] 



What is the value of 0?
What is the value of 1÷0? 

What is the value of 0?
What is the value of 1÷0?
[] ['÷'] 



What is the etymology of entrée?
What's the etymology of 个? 

What is the etymology of entree?
What's the etymology of 个?
[] ['个'] 



9.Sulfur dioxide and bromine and water react to form hydrogen bromide and sulfuric acid (H2SO4) . SO2+Br2+2H2O→HBr+2H2SO4 S2O+2Br+2H2O→2HBr+H2SO4 SO2+Br2+H2O→HBr+2H2SO4 SO2+Br2+2H2O→2HBr+H2SO4?
Is SO2 a Lewis acid or base? 

9.Sulfur dioxide 


What does the Hindi word “श्चश्चश्च” mean?
What is the meaning of Hindi word 'sampann'? 

What does the Hindi word 'श्चश्चश्च' mean?
What is the meaning of Hindi word 'sampann'?
['श', '्', 'च', 'श', '्', 'च', 'श', '्', 'च'] [] 



If I want to ask my young child “What is this in Chinese” in Mandarin, could I say “用中文的話, 這是什麽?” What would be a better way to say this?
How do you say "My Chinese friend gave me my Chinese name" in Mandarin? 

If I want to ask my young child 'What is this in Chinese' in Mandarin, could I say '用中文的話, 這是什麽?' What would be a better way to say this?
How do you say "My Chinese friend gave me my Chinese name" in Mandarin?
['用', '中', '文', '的', '話', '這', '是', '什', '麽'] [] 



What does 'kunjinkao' mean in Chinese?
What does "封建成功" mean in Chinese? 

What does 'kunjinkao' mean in Chinese?
What does "封建成功" mean in Chinese?
[] ['封', '建', '成', '功'] 



What is 3-3x6+2?
Show that cos^2 (45°)-sin^2 (15°) = √3/4? 

What is 3-3x6+2?
Show that cos^2 (45°)-sin^2 (15°) = sqr

Player A has n+1 coins, while B has n coins. Both players throw all of their coins simultaneously and observe the no. of heads. If all coins are fair, then what is the probability that A obtains more heads than B?
A fair coin is independently flipped n times, k times by A and n − k times by B. Show that the probability that A and B flip the same number of heads is equal to the probability that there are a total of k heads? 

Player A has n+1 coins, while B has n coins. Both players throw all of their coins simultaneously and observe the no. of heads. If all coins are fair, then what is the probability that A obtains more heads than B?
A fair coin is independently flipped n times, k times by A and n − k times by B. Show that the probability that A and B flip the same number of heads is equal to the probability that there are a total of k heads?
[] ['−'] 



What are the best slogans based on the theme "United India" or "राष्ट्रीय एकता"?
What are some of the best slogans written on truck

## tokenization

In [272]:
pairs_to_replace_in_plain_math = [('[@math]', ''), ('[#math]', ''), ('{', ''), ('}', ''), 
                                  ('left', ''), ('right', ''), ('displaystyle', ''), ('ln', 'log'), 
                                  ('mathrm', ''), ('frac', ''), ('lfloor', ''), ('rfloor', ''),
                                  ('_', ''), ('\\', ''), ('&', ''), (' ', '')]

def plain_math_replace(matchobj):
    start, end = matchobj.span()
    matching_string = matchobj.string[start:end]
    matching_string = replace_characters_in_string(matching_string, characters_pairs=pairs_to_replace_in_plain_math)
    return matching_string

plain_math_regex = r'\[@math\]((?!\[@math\]|\[#math\]).)*\[#math\]'

def clean_text_from_latex(text):
    clean_text = re.sub(plain_math_regex, plain_math_replace, text)
    return clean_text

In [273]:
def number_with_comma_replace(matchobj):
    start, end = matchobj.span()
    matching_string = matchobj.string[start:end]
    matching_string = matching_string.replace(',', '')
    return matching_string

number_with_comma_replace_regex = r'[0-9]+,[0-9]+'

def number_with_comma_regex_replace(number):
    number = re.sub(number_with_comma_replace_regex, number_with_comma_replace, number)
    return number

In [274]:
def number_replace(matchobj):
    start, end = matchobj.span()
    matching_string = matchobj.string[start:end]
    matching_string = matching_string.rstrip('+-')
    matching_string = matching_string.replace('k', '000')
    return matching_string

number_regex = r'((?:^|\s)(\+?\-?[0-9]+)(k?)(\+?\-?)(?!\S))'

def number_regex_replace(number):
    number = re.sub(number_regex, number_replace, number)
    return number

In [275]:
def clean_text(text):
    text = text.lower()
    text = unicode_normalization(text)
    text = number_with_comma_regex_replace(text)
    text = remove_extra_spaces(text)
    
    return text

In [276]:
substrings_to_replace = [('"', "'"), ('(', ' '), (')', ' '), (':', ' '), (';', ' ')]

substrings_to_replace += [("what's", "what is"), ("who's","who is"), ("where's","where is"),
                          ("when's","when is"), ("how's","how is"), ("it's","it is"),
                          ("he's","he is"), ("she's","she is"), ("that's","that is"),
                          ("there's","there is"), ("'s", " "), ("'ve", " have "),
                          ("can't", "cannot "), ("n't", " not "), ("i'm", "i am "),
                          ("'re", " are "), ("'d", " would "), ("'ll", " will ")]

substrings_to_replace += [("'", ''), ('&', ' and '), ('~', ''), ('^', ''), ('*', '')]

tokens_characters = [(',', ' '), ('.', '  '), ('!', ' ! '), ('?', ' '), ('\\', ' '), ('/', ' '), ('-', ' ')]

def text_tokenization(text):
    text = replace_characters_in_string(text, characters_pairs=[('[math]', '[@math]'), ('[/math]', '[#math]')])
    text = clean_text(text)
    text = replace_characters_in_string(text, characters_pairs=substrings_to_replace)
    text = replace_characters_in_string(text, characters_pairs=tokens_characters)
    text = clean_text_from_latex(text)
    text = replace_characters_in_string(text, characters_pairs=[('[', ''), (']', ''), 
                                                                ('{', ''), ('}', ''), 
                                                                ('$', ' $ '),
                                                                ('@math', ' @math '), ('#math', ' #math ')])
    text = number_regex_replace(text)
    text = remove_extra_spaces(text)
    tokens = text.split()
    return tokens

In [277]:
for q1, q2, is_duplicate in train_set[['question1', 'question2', 'is_duplicate']].values:
    if is_duplicate:
        tokens_q1 = text_tokenization(q1)
        tokens_q2 = text_tokenization(q2)
        if ('[math]' in q1 or '[math]' in q2):
            print(q1)
            print(q2)
            print(tokens_q1)
            print(tokens_q2)
            print('\n\n\n')

How do I learn [math]\LaTeX[/math]?
How can I learn latex in the easiest way?
['how', 'do', 'i', 'learn', 'latex']
['how', 'can', 'i', 'learn', 'latex', 'in', 'the', 'easiest', 'way']




Let [math]S_{n} = n^{2} + 20n + 12[/math], where [math]n[/math] is a positive integer. What is the sum of all possible values of n for which [math]S_{n}[/math] is a perfect square?
Given [math]S_n = n^2 +20n + 12[/math]. What will be the sum of positive integers such that [math]S_n[/math] will be a perfect square?
['let', 'sn=n2+20n+12', 'where', 'n', 'is', 'a', 'positive', 'integer', 'what', 'is', 'the', 'sum', 'of', 'all', 'possible', 'values', 'of', 'n', 'for', 'which', 'sn', 'is', 'a', 'perfect', 'square']
['given', 'sn=n2+20n+12', 'what', 'will', 'be', 'the', 'sum', 'of', 'positive', 'integers', 'such', 'that', 'sn', 'will', 'be', 'a', 'perfect', 'square']




If real numbers are in [math]x[/math]-axis and complex numbers in [math]y[/math]-axis, what is in the [math]z[/math]-axis?
If the real num

What is the difference between H+ ion and proton?
Is there no possible difference between positively charged hydrogen ([math]H^{+}[/math]) and proton?
['what', 'is', 'the', 'difference', 'between', 'h+', 'ion', 'and', 'proton']
['is', 'there', 'no', 'possible', 'difference', 'between', 'positively', 'charged', 'hydrogen', 'h+', 'and', 'proton']




How many polynomial [math]p(x)[/math] are there such that [math]p(q(x))=q(p(x))[/math] for all polynomial [math]q(x) [/math]?
What are all polynomials [math]p(x)[/math] such that [math]p(q(x))=q(p(x))[/math] for every polynomial [math]q(x)[/math]?
['how', 'many', 'polynomial', 'px', 'are', 'there', 'such', 'that', 'pqx=qpx', 'for', 'all', 'polynomial', 'qx']
['what', 'are', 'all', 'polynomials', 'px', 'such', 'that', 'pqx=qpx', 'for', 'every', 'polynomial', 'qx']




In a right-angle triangle, perpendicular is 3^1/2 times the base. What is ratio of their opposite angles?
In a right-angle triangle, perpendicular is [math]\sqrt{3}[/math] times

How is the value of [math]\pi[/math] calculated?
What is the value pi?
['how', 'is', 'the', 'value', 'of', 'pi', 'calculated']
['what', 'is', 'the', 'value', 'pi']




What are some examples of e=mc2?
What are some good examples of [math]E=mc^2[/math]?
['what', 'are', 'some', 'examples', 'of', 'e=mc2']
['what', 'are', 'some', 'good', 'examples', 'of', 'e=mc2']






In [278]:
for q1, q2 in test_set[['question1', 'question2']].values[:100]:
        tokens_q1 = text_tokenization(q1)
        tokens_q2 = text_tokenization(q2)
        print(q1)
        print(q2)
        print(tokens_q1)
        print(tokens_q2)
        print('\n\n\n')

How does the Surface Pro himself 4 compare with iPad Pro?
Why did Microsoft choose core m3 and not core i3 home Surface Pro 4?
['how', 'does', 'the', 'surface', 'pro', 'himself', '4', 'compare', 'with', 'ipad', 'pro']
['why', 'did', 'microsoft', 'choose', 'core', 'm3', 'and', 'not', 'core', 'i3', 'home', 'surface', 'pro', '4']




Should I have a hair transplant at age 24? How much would it cost?
How much cost does hair transplant require?
['should', 'i', 'have', 'a', 'hair', 'transplant', 'at', 'age', '24', 'how', 'much', 'would', 'it', 'cost']
['how', 'much', 'cost', 'does', 'hair', 'transplant', 'require']




What but is the best way to send money from China to the US?
What you send money to China?
['what', 'but', 'is', 'the', 'best', 'way', 'to', 'send', 'money', 'from', 'china', 'to', 'the', 'us']
['what', 'you', 'send', 'money', 'to', 'china']




Which food not emulsifiers?
What foods fibre?
['which', 'food', 'not', 'emulsifiers']
['what', 'foods', 'fibre']




How "aberystwyth

# Сохранение результатов

In [279]:
def tokenize_and_store_series(pd_series, filename):
    pd_series = pd_series.apply(lambda s: ' '.join(text_tokenization(s)))
    pd_series.to_csv(filename, index='Id', header=['question'])

def load_tokenized_series(filename):
    try:
        series = pd.read_csv(filename, index_col='id', header=0)
    except:
        series = pd.read_csv(filename, index_col='test_id', header=0)
    series = series['question'].fillna('')
    return series

In [280]:
# store tokenized questions
tokenize_and_store_series(train_set['question1'], 'train_q1_tokenized.csv')
tokenize_and_store_series(train_set['question2'], 'train_q2_tokenized.csv')
tokenize_and_store_series(test_set['question1'], 'test_q1_tokenized.csv')
tokenize_and_store_series(test_set['question2'], 'test_q2_tokenized.csv')

In [281]:
# load tokenized questions
train_q1_tokenized_objects = load_tokenized_series('train_q1_tokenized.csv')
train_q2_tokenized_objects = load_tokenized_series('train_q2_tokenized.csv')
test_q1_tokenized_objects = load_tokenized_series('test_q1_tokenized.csv')
test_q2_tokenized_objects = load_tokenized_series('test_q2_tokenized.csv')

# Check

In [282]:
from collections import Counter

In [283]:
def space_tokenizer(s):
    return s.split()

all_texts = np.concatenate([train_q1_tokenized_objects.apply(space_tokenizer).values, 
                            train_q2_tokenized_objects.apply(space_tokenizer).values,
                            test_q1_tokenized_objects.apply(space_tokenizer).values, 
                            test_q2_tokenized_objects.apply(space_tokenizer).values])

counts = Counter(np.concatenate(all_texts))

In [292]:
potential_problems = [(token, count) for token, count in counts.items() if not token.isalpha() and not token.isdigit()]
print(len(counts), len(potential_problems))
sorted(potential_problems, key=lambda l: -l[1])

128784 14008


[('$', 19197),
 ('c++', 10518),
 ('12th', 8285),
 ('!', 6765),
 ('4g', 5860),
 ('=', 5665),
 ('+', 4614),
 ('3d', 3984),
 ('10th', 3866),
 ('3g', 3767),
 ('2nd', 3695),
 ('1st', 3520),
 ('c#', 3278),
 ('3rd', 3020),
 ('ps4', 2554),
 ('100%', 2339),
 ('11th', 2210),
 ('4th', 2057),
 ('i5', 1983),
 ('ww2', 1977),
 ('h1b', 1933),
 ('5s', 1799),
 ('6s', 1737),
 ('7th', 1623),
 ('%', 1514),
 ('5th', 1453),
 ('mp3', 1432),
 ('g4', 1268),
 ('21st', 1234),
 ('4gb', 1216),
 ('i7', 1208),
 ('20s', 1167),
 ('f1', 1164),
 ('6th', 1133),
 ('90%', 1120),
 ('ps3', 1103),
 ('i3', 1080),
 ('b2b', 1070),
 ('2gb', 1035),
 ('10%', 994),
 ('b2', 978),
 ('#math', 946),
 ('9th', 944),
 ('x2', 929),
 ('ww1', 918),
 ('google+', 871),
 ('b1', 864),
 ('s7', 852),
 ('2%', 822),
 ('ww3', 822),
 ('8gb', 806),
 ('4s', 804),
 ('5%', 798),
 ('20th', 791),
 ('@math', 767),
 ('2d', 764),
 ('95%', 748),
 ('60%', 720),
 ('50%', 704),
 ('8th', 702),
 ('s6', 698),
 ('14th', 695),
 ('h1', 693),
 ('1080p', 690),
 ('j7', 689),

In [291]:
concret_potential_problems = [item for item in potential_problems if '000' in item[0]]
sorted(concret_potential_problems, key=lambda l: -l[1])

[('1000', 25281),
 ('2000', 5887),
 ('10000', 5319),
 ('25000', 3475),
 ('15000', 3427),
 ('50000', 2863),
 ('5000', 2858),
 ('30000', 2855),
 ('20000', 2772),
 ('100000', 1706),
 ('4000', 1501),
 ('40000', 1281),
 ('3000', 1260),
 ('60000', 1242),
 ('000', 1013),
 ('7000', 763),
 ('8000', 675),
 ('12000', 668),
 ('6000', 660),
 ('500000', 537),
 ('35000', 477),
 ('70000', 423),
 ('80000', 423),
 ('200000', 413),
 ('150000', 305),
 ('1000rs', 303),
 ('11000', 281),
 ('401000', 280),
 ('250000', 275),
 ('65000', 260),
 ('18000', 257),
 ('9000', 256),
 ('45000', 253),
 ('90000', 238),
 ('2000s', 237),
 ('a6000', 216),
 ('300000', 203),
 ('13000', 200),
 ('55000', 199),
 ('22000', 190),
 ('16000', 187),
 ('23000', 176),
 ('27000', 175),
 ('17000', 172),
 ('14000', 161),
 ('75000', 161),
 ('24000', 139),
 ('rs10000', 117),
 ('400000', 117),
 ('a7000', 114),
 ('32000', 101),
 ('800000', 97),
 ('28000', 91),
 ('130000', 90),
 ('120000', 89),
 ('25000inr', 85),
 ('000000', 80),
 ('350000', 77

In [294]:
for q1, q2, is_duplicate in train_set[['question1', 'question2', 'is_duplicate']].values:
    if is_duplicate:
        tokens_q1 = text_tokenization(q1)
        tokens_q2 = text_tokenization(q2)
        if '$' in tokens_q1 or '$' in tokens_q2:
            print(q1)
            print(q2)
            print(tokens_q1)
            print(tokens_q2)
            print('\n\n\n')

What is the easiest way to become a billionaire($)?
What is the best way to become a billionaire?
['what', 'is', 'the', 'easiest', 'way', 'to', 'become', 'a', 'billionaire', '$']
['what', 'is', 'the', 'best', 'way', 'to', 'become', 'a', 'billionaire']




How comfortably can I live in Washington DC on a $80,000 salary?
Can I live comfortably in DC on $80,000 - $114,000 salary?
['how', 'comfortably', 'can', 'i', 'live', 'in', 'washington', 'dc', 'on', 'a', '$', '80000', 'salary']
['can', 'i', 'live', 'comfortably', 'in', 'dc', 'on', '$', '80000', '$', '114000', 'salary']




Why, or why not, should the minimum wage be raised to $15?
Should the minimum wage be raised to $15/hr for all American workers?
['why', 'or', 'why', 'not', 'should', 'the', 'minimum', 'wage', 'be', 'raised', 'to', '$', '15']
['should', 'the', 'minimum', 'wage', 'be', 'raised', 'to', '$', '15', 'hr', 'for', 'all', 'american', 'workers']




I want a real and effective way to make $ 500 per month with the knowledge t

['did', 'microsoft', 'pay', 'too', 'much', 'for', 'yammer']




What are the best in the ear headphones under 2000?
What is the best in-ear headphones under $2K?
['what', 'are', 'the', 'best', 'in', 'the', 'ear', 'headphones', 'under', '2000']
['what', 'is', 'the', 'best', 'in', 'ear', 'headphones', 'under', '$', '2000']




I wrote a letter, asking for donations from companies and philanthropist to help pay off $100,000 private student loan debt. Where should I send it?
I wrote a letter, asking companies and philanthropists to help pay off $100,000 in private student loan debt. Where should I send it?
['i', 'wrote', 'a', 'letter', 'asking', 'for', 'donations', 'from', 'companies', 'and', 'philanthropist', 'to', 'help', 'pay', 'off', '$', '100000', 'private', 'student', 'loan', 'debt', 'where', 'should', 'i', 'send', 'it']
['i', 'wrote', 'a', 'letter', 'asking', 'companies', 'and', 'philanthropists', 'to', 'help', 'pay', 'off', '$', '100000', 'in', 'private', 'student', 'loan', 'debt',

What is the best laptop under 60k(900$ approx)?
What is the best laptop under 60000 Indian Rupees?
['what', 'is', 'the', 'best', 'laptop', 'under', '60000', '900', '$', 'approx']
['what', 'is', 'the', 'best', 'laptop', 'under', '60000', 'indian', 'rupees']




What’s the best used car for under 7000?
What's the best used car I can buy for max $7000?
['what', 'is', 'the', 'best', 'used', 'car', 'for', 'under', '7000']
['what', 'is', 'the', 'best', 'used', 'car', 'i', 'can', 'buy', 'for', 'max', '$', '7000']




What's the best gaming laptop under $2000?
Which one is the best gaming laptop under 2000$?
['what', 'is', 'the', 'best', 'gaming', 'laptop', 'under', '$', '2000']
['which', 'one', 'is', 'the', 'best', 'gaming', 'laptop', 'under', '2000', '$']




What is the value of a 1935 1 dollar silver certificate?
What's the value of a Series 1935 E $1 Silver Certificate?
['what', 'is', 'the', 'value', 'of', 'a', '1935', '1', 'dollar', 'silver', 'certificate']
['what', 'is', 'the', 'value',

What business can I start up with $10,000?
What business can I start with $10,000?
['what', 'business', 'can', 'i', 'start', 'up', 'with', '$', '10000']
['what', 'business', 'can', 'i', 'start', 'with', '$', '10000']




If the integers a,b are coprime, then does it follow that for any positive integers $a^l$ and $b^m$ are coprime as well?
If the integers a,b are coprime, then does it follow that for any positive integers, [math]a^l[/math] and [math]b^m[/math] are coprime as well?
['if', 'the', 'integers', 'a', 'b', 'are', 'coprime', 'then', 'does', 'it', 'follow', 'that', 'for', 'any', 'positive', 'integers', '$', 'al', '$', 'and', '$', 'bm', '$', 'are', 'coprime', 'as', 'well']
['if', 'the', 'integers', 'a', 'b', 'are', 'coprime', 'then', 'does', 'it', 'follow', 'that', 'for', 'any', 'positive', 'integers', 'al', 'and', 'bm', 'are', 'coprime', 'as', 'well']




How can I make $200 a day?
How do I make $200 a day?
['how', 'can', 'i', 'make', '$', '200', 'a', 'day']
['how', 'do', 'i', 

What is the best Used car for under $7000?
What's the best used car I can buy for max $7000?
['what', 'is', 'the', 'best', 'used', 'car', 'for', 'under', '$', '7000']
['what', 'is', 'the', 'best', 'used', 'car', 'i', 'can', 'buy', 'for', 'max', '$', '7000']




How could I use $200 to make substantially more?
How can I use my $200 to make more money?
['how', 'could', 'i', 'use', '$', '200', 'to', 'make', 'substantially', 'more']
['how', 'can', 'i', 'use', 'my', '$', '200', 'to', 'make', 'more', 'money']




Is it worth to buy yahoo in $4.8 bn?
Is Yahoo worth $5 billion?
['is', 'it', 'worth', 'to', 'buy', 'yahoo', 'in', '$', '4', '8', 'bn']
['is', 'yahoo', 'worth', '$', '5', 'billion']




How do you invest $1000?
How do I invest 1000$?
['how', 'do', 'you', 'invest', '$', '1000']
['how', 'do', 'i', 'invest', '1000', '$']




What is the quickest way to make $8,000?
What are the quickest ways to make money?
['what', 'is', 'the', 'quickest', 'way', 'to', 'make', '$', '8000']
['what', 'are

Is Uber overvalued?
At a $40B valuation, is Uber overvalued? Why or why not?
['is', 'uber', 'overvalued']
['at', 'a', '$', '40b', 'valuation', 'is', 'uber', 'overvalued', 'why', 'or', 'why', 'not']




How can I make an extra $1000 a month?
How do I make $1000 extra dollars?
['how', 'can', 'i', 'make', 'an', 'extra', '$', '1000', 'a', 'month']
['how', 'do', 'i', 'make', '$', '1000', 'extra', 'dollars']




What are the best studio monitors for under $450 per pair?
What is the best pair of studio monitors under $450?
['what', 'are', 'the', 'best', 'studio', 'monitors', 'for', 'under', '$', '450', 'per', 'pair']
['what', 'is', 'the', 'best', 'pair', 'of', 'studio', 'monitors', 'under', '$', '450']




How can I make an extra $2000 this month?
How do I make $1000 as extra income/month apart from the regular job?
['how', 'can', 'i', 'make', 'an', 'extra', '$', '2000', 'this', 'month']
['how', 'do', 'i', 'make', '$', '1000', 'as', 'extra', 'income', 'month', 'apart', 'from', 'the', 'regular

Am not starting big? How can I make $1000 per month online?
How could I make money online?
['am', 'not', 'starting', 'big', 'how', 'can', 'i', 'make', '$', '1000', 'per', 'month', 'online']
['how', 'could', 'i', 'make', 'money', 'online']




What are the best laptops within 30000?
What are the best laptops to buy within Rs.30000 or $485 and why?
['what', 'are', 'the', 'best', 'laptops', 'within', '30000']
['what', 'are', 'the', 'best', 'laptops', 'to', 'buy', 'within', 'rs', '30000', 'or', '$', '485', 'and', 'why']




How do I get one million dollars right now?
I want $1 million right now, how do I get it?
['how', 'do', 'i', 'get', 'one', 'million', 'dollars', 'right', 'now']
['i', 'want', '$', '1', 'million', 'right', 'now', 'how', 'do', 'i', 'get', 'it']




Can $B creative breakthroughs like Uber happen on demand?
Can billion dollar creative breakthroughs like Uber be created on demand?
['can', '$', 'b', 'creative', 'breakthroughs', 'like', 'uber', 'happen', 'on', 'demand']
['can'

What's the best way to invest $500,000?
What is the best way to invest $500K in 2016?
['what', 'is', 'the', 'best', 'way', 'to', 'invest', '$', '500000']
['what', 'is', 'the', 'best', 'way', 'to', 'invest', '$', '500000', 'in', '2016']




How can we start a business with $100?
What business can I start with $100?
['how', 'can', 'we', 'start', 'a', 'business', 'with', '$', '100']
['what', 'business', 'can', 'i', 'start', 'with', '$', '100']




How can I make an extra $500 per week?
How do you make 500 dollars in a week?
['how', 'can', 'i', 'make', 'an', 'extra', '$', '500', 'per', 'week']
['how', 'do', 'you', 'make', '500', 'dollars', 'in', 'a', 'week']




Can I make $US 100,000 a month betting on horses?
Can I make 20,000 a month betting on horses?
['can', 'i', 'make', '$', 'us', '100000', 'a', 'month', 'betting', 'on', 'horses']
['can', 'i', 'make', '20000', 'a', 'month', 'betting', 'on', 'horses']




Where can I buy a raspberry pi Zero: the $5 computer?
Where can I buy a Raspberr


How can I make every day $10 online?
How can I earn 10$ per day online?
['how', 'can', 'i', 'make', 'every', 'day', '$', '10', 'online']
['how', 'can', 'i', 'earn', '10', '$', 'per', 'day', 'online']




How much can I spend on rent in Manhattan if my yearly salary is $1 million?
How much can I spend on rent in Manhattan?
['how', 'much', 'can', 'i', 'spend', 'on', 'rent', 'in', 'manhattan', 'if', 'my', 'yearly', 'salary', 'is', '$', '1', 'million']
['how', 'much', 'can', 'i', 'spend', 'on', 'rent', 'in', 'manhattan']




What is the best gaming laptop under 2000$?
What's the best gaming laptop around $2000?
['what', 'is', 'the', 'best', 'gaming', 'laptop', 'under', '2000', '$']
['what', 'is', 'the', 'best', 'gaming', 'laptop', 'around', '$', '2000']




What are the best headphones under $100?
Which are the best headphones for under $100?
['what', 'are', 'the', 'best', 'headphones', 'under', '$', '100']
['which', 'are', 'the', 'best', 'headphones', 'for', 'under', '$', '100']




What

Where can I get an explainer video for $100?
Where can I get a quality explainer video done for $100?
['where', 'can', 'i', 'get', 'an', 'explainer', 'video', 'for', '$', '100']
['where', 'can', 'i', 'get', 'a', 'quality', 'explainer', 'video', 'done', 'for', '$', '100']




How can I make an extra $1000 a month?
What can I do to make an extra $1000?
['how', 'can', 'i', 'make', 'an', 'extra', '$', '1000', 'a', 'month']
['what', 'can', 'i', 'do', 'to', 'make', 'an', 'extra', '$', '1000']




What is the best quadcopter for under $200?
What's the best drone that you can buy for under $200?
['what', 'is', 'the', 'best', 'quadcopter', 'for', 'under', '$', '200']
['what', 'is', 'the', 'best', 'drone', 'that', 'you', 'can', 'buy', 'for', 'under', '$', '200']




Am not starting big? How can I make $1000 per month online?
How can one make money online?
['am', 'not', 'starting', 'big', 'how', 'can', 'i', 'make', '$', '1000', 'per', 'month', 'online']
['how', 'can', 'one', 'make', 'money', 'onl