-
Notifications
You must be signed in to change notification settings - Fork 0
/
stop_words.py
61 lines (24 loc) · 894 Bytes
/
stop_words.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python
# coding: utf-8
# In[1]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import docx
# In[2]:
data="This is a story of two young boys who have two fathers one is rich dad and anotheer is a poor dad. the por dad alwaays says to geet good marks ,get good job with best payheck and be safe always where as te rich dad who waas a professttional busssiness says to work smart and try to make money work for you not you working for maney and has taaught financial educationand foundation ehich is never taught in any of the school."
# In[3]:
data
# In[4]:
stop_words = set(stopwords.words("english"))
# In[5]:
stop_words
# In[6]:
words= word_tokenize(data)
# In[7]:
new_data = []
for i in range(len(words)):
if words[i] not in stop_words:
new_data.append(words[i])
# In[8]:
" ".join(new_data)
# In[ ]: